block_int.h 28.2 KB
Newer Older
B
bellard 已提交
1 2
/*
 * QEMU System Emulator block driver
3
 *
B
bellard 已提交
4
 * Copyright (c) 2003 Fabrice Bellard
5
 *
B
bellard 已提交
6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26
 * Permission is hereby granted, free of charge, to any person obtaining a copy
 * of this software and associated documentation files (the "Software"), to deal
 * in the Software without restriction, including without limitation the rights
 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 * copies of the Software, and to permit persons to whom the Software is
 * furnished to do so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice shall be included in
 * all copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
 * THE SOFTWARE.
 */
#ifndef BLOCK_INT_H
#define BLOCK_INT_H

27
#include "block/accounting.h"
28
#include "block/block.h"
M
Max Reitz 已提交
29
#include "block/throttle-groups.h"
30 31
#include "qemu/option.h"
#include "qemu/queue.h"
32
#include "qemu/coroutine.h"
33
#include "qemu/timer.h"
L
Luiz Capitulino 已提交
34
#include "qapi-types.h"
35
#include "qemu/hbitmap.h"
36
#include "block/snapshot.h"
37
#include "qemu/main-loop.h"
38
#include "qemu/throttle.h"
P
pbrook 已提交
39

40 41
#define BLOCK_FLAG_ENCRYPT          1
#define BLOCK_FLAG_LAZY_REFCOUNTS   8
42

43 44 45
#define BLOCK_OPT_SIZE              "size"
#define BLOCK_OPT_ENCRYPT           "encryption"
#define BLOCK_OPT_COMPAT6           "compat6"
46
#define BLOCK_OPT_HWVERSION         "hwversion"
47 48 49 50 51 52 53 54
#define BLOCK_OPT_BACKING_FILE      "backing_file"
#define BLOCK_OPT_BACKING_FMT       "backing_fmt"
#define BLOCK_OPT_CLUSTER_SIZE      "cluster_size"
#define BLOCK_OPT_TABLE_SIZE        "table_size"
#define BLOCK_OPT_PREALLOC          "preallocation"
#define BLOCK_OPT_SUBFMT            "subformat"
#define BLOCK_OPT_COMPAT_LEVEL      "compat"
#define BLOCK_OPT_LAZY_REFCOUNTS    "lazy_refcounts"
55
#define BLOCK_OPT_ADAPTER_TYPE      "adapter_type"
56
#define BLOCK_OPT_REDUNDANCY        "redundancy"
57
#define BLOCK_OPT_NOCOW             "nocow"
58
#define BLOCK_OPT_OBJECT_SIZE       "object_size"
59
#define BLOCK_OPT_REFCOUNT_BITS     "refcount_bits"
60

61 62
#define BLOCK_PROBE_BUF_SIZE        512

63 64 65 66 67 68 69 70
enum BdrvTrackedRequestType {
    BDRV_TRACKED_READ,
    BDRV_TRACKED_WRITE,
    BDRV_TRACKED_FLUSH,
    BDRV_TRACKED_IOCTL,
    BDRV_TRACKED_DISCARD,
};

71 72
typedef struct BdrvTrackedRequest {
    BlockDriverState *bs;
73 74
    int64_t offset;
    unsigned int bytes;
75
    enum BdrvTrackedRequestType type;
76

77
    bool serialising;
78 79 80
    int64_t overlap_offset;
    unsigned int overlap_bytes;

81 82 83
    QLIST_ENTRY(BdrvTrackedRequest) list;
    Coroutine *co; /* owner, used for deadlock detection */
    CoQueue wait_queue; /* coroutines blocked on this request */
84 85

    struct BdrvTrackedRequest *waiting_for;
86 87
} BdrvTrackedRequest;

B
bellard 已提交
88 89 90
struct BlockDriver {
    const char *format_name;
    int instance_size;
91

92 93 94 95
    /* set to true if the BlockDriver is a block filter */
    bool is_filter;
    /* for snapshots block filter like Quorum can implement the
     * following recursive callback.
96 97 98
     * It's purpose is to recurse on the filter children while calling
     * bdrv_recurse_is_first_non_filter on them.
     * For a sample implementation look in the future Quorum block filter.
99
     */
100 101
    bool (*bdrv_recurse_is_first_non_filter)(BlockDriverState *bs,
                                             BlockDriverState *candidate);
102

B
bellard 已提交
103
    int (*bdrv_probe)(const uint8_t *buf, int buf_size, const char *filename);
104
    int (*bdrv_probe_device)(const char *filename);
105 106 107

    /* Any driver implementing this callback is expected to be able to handle
     * NULL file names in its .bdrv_open() implementation */
108
    void (*bdrv_parse_filename)(const char *filename, QDict *options, Error **errp);
109 110 111 112 113 114
    /* Drivers not implementing bdrv_parse_filename nor bdrv_open should have
     * this field set to true, except ones that are defined only by their
     * child's bs.
     * An example of the last type will be the quorum block driver.
     */
    bool bdrv_needs_filename;
115

116 117 118
    /* Set if a driver can support backing files */
    bool supports_backing;

119 120 121 122 123
    /* For handling image reopen for split or non-split files */
    int (*bdrv_reopen_prepare)(BDRVReopenState *reopen_state,
                               BlockReopenQueue *queue, Error **errp);
    void (*bdrv_reopen_commit)(BDRVReopenState *reopen_state);
    void (*bdrv_reopen_abort)(BDRVReopenState *reopen_state);
124
    void (*bdrv_join_options)(QDict *options, QDict *old_options);
125

M
Max Reitz 已提交
126 127 128 129
    int (*bdrv_open)(BlockDriverState *bs, QDict *options, int flags,
                     Error **errp);
    int (*bdrv_file_open)(BlockDriverState *bs, QDict *options, int flags,
                          Error **errp);
B
bellard 已提交
130
    void (*bdrv_close)(BlockDriverState *bs);
C
Chunyan Liu 已提交
131
    int (*bdrv_create)(const char *filename, QemuOpts *opts, Error **errp);
B
bellard 已提交
132
    int (*bdrv_set_key)(BlockDriverState *bs, const char *key);
133
    int (*bdrv_make_empty)(BlockDriverState *bs);
M
Max Reitz 已提交
134

135
    void (*bdrv_refresh_filename)(BlockDriverState *bs, QDict *options);
M
Max Reitz 已提交
136

B
bellard 已提交
137
    /* aio */
138
    BlockAIOCB *(*bdrv_aio_readv)(BlockDriverState *bs,
139
        int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
140
        BlockCompletionFunc *cb, void *opaque);
141
    BlockAIOCB *(*bdrv_aio_writev)(BlockDriverState *bs,
142
        int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
143
        BlockCompletionFunc *cb, void *opaque);
144
    BlockAIOCB *(*bdrv_aio_flush)(BlockDriverState *bs,
145
        BlockCompletionFunc *cb, void *opaque);
146
    BlockAIOCB *(*bdrv_aio_discard)(BlockDriverState *bs,
147
        int64_t sector_num, int nb_sectors,
148
        BlockCompletionFunc *cb, void *opaque);
B
bellard 已提交
149

K
Kevin Wolf 已提交
150 151
    int coroutine_fn (*bdrv_co_readv)(BlockDriverState *bs,
        int64_t sector_num, int nb_sectors, QEMUIOVector *qiov);
152 153
    int coroutine_fn (*bdrv_co_preadv)(BlockDriverState *bs,
        uint64_t offset, uint64_t bytes, QEMUIOVector *qiov, int flags);
K
Kevin Wolf 已提交
154 155
    int coroutine_fn (*bdrv_co_writev)(BlockDriverState *bs,
        int64_t sector_num, int nb_sectors, QEMUIOVector *qiov);
156 157
    int coroutine_fn (*bdrv_co_writev_flags)(BlockDriverState *bs,
        int64_t sector_num, int nb_sectors, QEMUIOVector *qiov, int flags);
158 159
    int coroutine_fn (*bdrv_co_pwritev)(BlockDriverState *bs,
        uint64_t offset, uint64_t bytes, QEMUIOVector *qiov, int flags);
160 161 162

    int supported_write_flags;

163 164 165 166 167 168 169
    /*
     * Efficiently zero a region of the disk image.  Typically an image format
     * would use a compact metadata representation to implement this.  This
     * function pointer may be NULL and .bdrv_co_writev() will be called
     * instead.
     */
    int coroutine_fn (*bdrv_co_write_zeroes)(BlockDriverState *bs,
170
        int64_t sector_num, int nb_sectors, BdrvRequestFlags flags);
171 172
    int coroutine_fn (*bdrv_co_discard)(BlockDriverState *bs,
        int64_t sector_num, int nb_sectors);
173
    int64_t coroutine_fn (*bdrv_co_get_block_status)(BlockDriverState *bs,
174 175
        int64_t sector_num, int nb_sectors, int *pnum,
        BlockDriverState **file);
K
Kevin Wolf 已提交
176

177 178 179
    /*
     * Invalidate any cached meta-data.
     */
180
    void (*bdrv_invalidate_cache)(BlockDriverState *bs, Error **errp);
181
    int (*bdrv_inactivate)(BlockDriverState *bs);
182

P
Pavel Dovgalyuk 已提交
183 184 185 186 187 188 189
    /*
     * Flushes all data for all layers by calling bdrv_co_flush for underlying
     * layers, if needed. This function is needed for deterministic
     * synchronization of the flush finishing callback.
     */
    int coroutine_fn (*bdrv_co_flush)(BlockDriverState *bs);

190 191 192 193 194 195
    /*
     * Flushes all data that was already written to the OS all the way down to
     * the disk (for example raw-posix calls fsync()).
     */
    int coroutine_fn (*bdrv_co_flush_to_disk)(BlockDriverState *bs);

K
Kevin Wolf 已提交
196 197 198 199 200 201 202
    /*
     * Flushes all internal caches to the OS. The data may still sit in a
     * writeback cache of the host OS, but it will survive a crash of the qemu
     * process.
     */
    int coroutine_fn (*bdrv_co_flush_to_os)(BlockDriverState *bs);

B
bellard 已提交
203 204
    const char *protocol_name;
    int (*bdrv_truncate)(BlockDriverState *bs, int64_t offset);
205

B
bellard 已提交
206
    int64_t (*bdrv_getlength)(BlockDriverState *bs);
207
    bool has_variable_length;
208
    int64_t (*bdrv_get_allocated_file_size)(BlockDriverState *bs);
209

210
    int (*bdrv_write_compressed)(BlockDriverState *bs, int64_t sector_num,
B
bellard 已提交
211 212
                                 const uint8_t *buf, int nb_sectors);

213
    int (*bdrv_snapshot_create)(BlockDriverState *bs,
B
bellard 已提交
214
                                QEMUSnapshotInfo *sn_info);
215
    int (*bdrv_snapshot_goto)(BlockDriverState *bs,
B
bellard 已提交
216
                              const char *snapshot_id);
217 218 219 220
    int (*bdrv_snapshot_delete)(BlockDriverState *bs,
                                const char *snapshot_id,
                                const char *name,
                                Error **errp);
221
    int (*bdrv_snapshot_list)(BlockDriverState *bs,
B
bellard 已提交
222
                              QEMUSnapshotInfo **psn_info);
E
edison 已提交
223
    int (*bdrv_snapshot_load_tmp)(BlockDriverState *bs,
224 225 226
                                  const char *snapshot_id,
                                  const char *name,
                                  Error **errp);
B
bellard 已提交
227
    int (*bdrv_get_info)(BlockDriverState *bs, BlockDriverInfo *bdi);
M
Max Reitz 已提交
228
    ImageInfoSpecific *(*bdrv_get_specific_info)(BlockDriverState *bs);
B
bellard 已提交
229

230 231
    int (*bdrv_save_vmstate)(BlockDriverState *bs, QEMUIOVector *qiov,
                             int64_t pos);
232 233
    int (*bdrv_load_vmstate)(BlockDriverState *bs, uint8_t *buf,
                             int64_t pos, int size);
234

K
Kevin Wolf 已提交
235 236 237
    int (*bdrv_change_backing_file)(BlockDriverState *bs,
        const char *backing_file, const char *backing_fmt);

B
bellard 已提交
238
    /* removable device specific */
239
    bool (*bdrv_is_inserted)(BlockDriverState *bs);
B
bellard 已提交
240
    int (*bdrv_media_changed)(BlockDriverState *bs);
241
    void (*bdrv_eject)(BlockDriverState *bs, bool eject_flag);
242
    void (*bdrv_lock_medium)(BlockDriverState *bs, bool locked);
243

244
    /* to control generic scsi devices */
245
    BlockAIOCB *(*bdrv_aio_ioctl)(BlockDriverState *bs,
246
        unsigned long int req, void *buf,
247
        BlockCompletionFunc *cb, void *opaque);
248

249
    /* List of options for creating images, terminated by name == NULL */
250
    QemuOptsList *create_opts;
251

252 253 254 255
    /*
     * Returns 0 for completed check, -errno for internal errors.
     * The check results are stored in result.
     */
256 257
    int (*bdrv_check)(BlockDriverState* bs, BdrvCheckResult *result,
        BdrvCheckMode fix);
A
aliguori 已提交
258

259
    int (*bdrv_amend_options)(BlockDriverState *bs, QemuOpts *opts,
260 261
                              BlockDriverAmendStatusCB *status_cb,
                              void *cb_opaque);
M
Max Reitz 已提交
262

263
    void (*bdrv_debug_event)(BlockDriverState *bs, BlkdebugEvent event);
K
Kevin Wolf 已提交
264

K
Kevin Wolf 已提交
265 266 267
    /* TODO Better pass a option string/QDict/QemuOpts to add any rule? */
    int (*bdrv_debug_breakpoint)(BlockDriverState *bs, const char *event,
        const char *tag);
F
Fam Zheng 已提交
268 269
    int (*bdrv_debug_remove_breakpoint)(BlockDriverState *bs,
        const char *tag);
K
Kevin Wolf 已提交
270 271 272
    int (*bdrv_debug_resume)(BlockDriverState *bs, const char *tag);
    bool (*bdrv_debug_is_suspended)(BlockDriverState *bs, const char *tag);

273
    void (*bdrv_refresh_limits)(BlockDriverState *bs, Error **errp);
274

K
Kevin Wolf 已提交
275 276 277 278 279
    /*
     * Returns 1 if newly created images are guaranteed to contain only
     * zeros, 0 otherwise.
     */
    int (*bdrv_has_zero_init)(BlockDriverState *bs);
280

281 282 283 284 285 286 287 288 289 290 291 292 293
    /* Remove fd handlers, timers, and other event loop callbacks so the event
     * loop is no longer in use.  Called with no in-flight requests and in
     * depth-first traversal order with parents before child nodes.
     */
    void (*bdrv_detach_aio_context)(BlockDriverState *bs);

    /* Add fd handlers, timers, and other event loop callbacks so I/O requests
     * can be processed again.  Called with no in-flight requests and in
     * depth-first traversal order with child nodes before parent nodes.
     */
    void (*bdrv_attach_aio_context)(BlockDriverState *bs,
                                    AioContext *new_context);

294 295 296 297
    /* io queue for linux-aio */
    void (*bdrv_io_plug)(BlockDriverState *bs);
    void (*bdrv_io_unplug)(BlockDriverState *bs);

298 299 300 301 302 303 304 305 306 307 308 309 310 311 312
    /**
     * Try to get @bs's logical and physical block size.
     * On success, store them in @bsz and return zero.
     * On failure, return negative errno.
     */
    int (*bdrv_probe_blocksizes)(BlockDriverState *bs, BlockSizes *bsz);
    /**
     * Try to get @bs's geometry (cyls, heads, sectors)
     * On success, store them in @geo and return 0.
     * On failure return -errno.
     * Only drivers that want to override guest geometry implement this
     * callback; see hd_geometry_guess().
     */
    int (*bdrv_probe_geometry)(BlockDriverState *bs, HDGeometry *geo);

313 314 315 316 317 318
    /**
     * Drain and stop any internal sources of requests in the driver, and
     * remain so until next I/O callback (e.g. bdrv_co_writev) is called.
     */
    void (*bdrv_drain)(BlockDriverState *bs);

319
    QLIST_ENTRY(BlockDriver) list;
B
bellard 已提交
320 321
};

322 323 324 325 326 327 328 329 330 331 332 333
typedef struct BlockLimits {
    /* maximum number of sectors that can be discarded at once */
    int max_discard;

    /* optimal alignment for discard requests in sectors */
    int64_t discard_alignment;

    /* maximum number of sectors that can zeroized at once */
    int max_write_zeroes;

    /* optimal alignment for write zeroes requests in sectors */
    int64_t write_zeroes_alignment;
334 335 336

    /* optimal transfer length in sectors */
    int opt_transfer_length;
337

338 339 340
    /* maximal transfer length in sectors */
    int max_transfer_length;

341
    /* memory alignment so that no bounce buffer is needed */
342 343 344
    size_t min_mem_alignment;

    /* memory alignment for bounce buffer */
345
    size_t opt_mem_alignment;
346 347 348

    /* maximum number of iovec elements */
    int max_iov;
349 350
} BlockLimits;

351 352
typedef struct BdrvOpBlocker BdrvOpBlocker;

M
Max Reitz 已提交
353 354 355 356 357 358 359 360 361
typedef struct BdrvAioNotifier {
    void (*attached_aio_context)(AioContext *new_context, void *opaque);
    void (*detach_aio_context)(void *opaque);

    void *opaque;

    QLIST_ENTRY(BdrvAioNotifier) list;
} BdrvAioNotifier;

362
struct BdrvChildRole {
363 364
    void (*inherit_options)(int *child_flags, QDict *child_options,
                            int parent_flags, QDict *parent_options);
365 366 367 368 369
};

extern const BdrvChildRole child_file;
extern const BdrvChildRole child_format;

K
Kevin Wolf 已提交
370
struct BdrvChild {
371
    BlockDriverState *bs;
372
    char *name;
373 374
    const BdrvChildRole *role;
    QLIST_ENTRY(BdrvChild) next;
K
Kevin Wolf 已提交
375
    QLIST_ENTRY(BdrvChild) next_parent;
K
Kevin Wolf 已提交
376
};
377

378 379 380 381 382 383
/*
 * Note: the function bdrv_append() copies and swaps contents of
 * BlockDriverStates, so if you add new fields to this struct, please
 * inspect bdrv_append() to determine if the new fields need to be
 * copied as well.
 */
B
bellard 已提交
384
struct BlockDriverState {
385 386
    int64_t total_sectors; /* if we are reading a disk image, give its
                              size in sectors */
B
bellard 已提交
387
    int read_only; /* if true, the media is read only */
388
    int open_flags; /* flags used to open the file, re-used for re-open */
B
bellard 已提交
389
    int encrypted; /* if true, the media is encrypted */
390
    int valid_key; /* if true, a valid encryption key has been set */
391
    int sg;        /* if true, the device is a /dev/sg* */
392 393
    int copy_on_read; /* if true, copy read backing sectors into image
                         note this is a reference count */
394
    bool probed;
B
bellard 已提交
395

B
bellard 已提交
396
    BlockDriver *drv; /* NULL means no media */
B
bellard 已提交
397 398
    void *opaque;

399 400
    BlockBackend *blk;          /* owning backend, if any */

401
    AioContext *aio_context; /* event loop used for fd handlers, timers, etc */
M
Max Reitz 已提交
402 403 404 405
    /* long-running tasks intended to always use the same AioContext as this
     * BDS may register themselves in this list to be notified of changes
     * regarding this BDS's context */
    QLIST_HEAD(, BdrvAioNotifier) aio_notifiers;
406

407 408 409
    char filename[PATH_MAX];
    char backing_file[PATH_MAX]; /* if non zero, the image is a diff of
                                    this file image */
410
    char backing_format[16]; /* if non-zero and backing_file exists */
B
bellard 已提交
411

M
Max Reitz 已提交
412
    QDict *full_open_options;
413
    char exact_filename[PATH_MAX];
M
Max Reitz 已提交
414

415
    BdrvChild *backing;
K
Kevin Wolf 已提交
416
    BdrvChild *file;
417

418 419 420
    /* Callback before write request is processed */
    NotifierWithReturnList before_write_notifiers;

421 422
    /* number of in-flight serialising requests */
    unsigned int serialising_in_flight;
423

424 425
    /* I/O throttling.
     * throttle_state tells us if this BDS has I/O limits configured.
426
     * io_limits_disabled tells us if they are currently being enforced */
427
    CoQueue      throttled_reqs[2];
428 429
    unsigned int io_limits_disabled;

430 431 432 433 434
    /* The following fields are protected by the ThrottleGroup lock.
     * See the ThrottleGroup documentation for details. */
    ThrottleState *throttle_state;
    ThrottleTimers throttle_timers;
    unsigned       pending_reqs[2];
435
    QLIST_ENTRY(BlockDriverState) round_robin;
436

437 438 439
    /* Offset after the highest byte written to */
    uint64_t wr_highest_offset;

440 441 442
    /* I/O Limits */
    BlockLimits bl;

A
Asias He 已提交
443 444 445
    /* Whether produces zeros when read beyond eof */
    bool zero_beyond_eof;

446 447 448
    /* Alignment requirement for offset/length of I/O requests */
    unsigned int request_alignment;

449 450 451 452
    /* the following member gives a name to every node on the bs graph. */
    char node_name[32];
    /* element of the list of named nodes building the graph */
    QTAILQ_ENTRY(BlockDriverState) node_list;
453 454
    /* element of the list of all BlockDriverStates (all_bdrv_states) */
    QTAILQ_ENTRY(BlockDriverState) bs_list;
455 456
    /* element of the list of monitor-owned BDS */
    QTAILQ_ENTRY(BlockDriverState) monitor_list;
F
Fam Zheng 已提交
457
    QLIST_HEAD(, BdrvDirtyBitmap) dirty_bitmaps;
458
    int refcnt;
S
Stefan Hajnoczi 已提交
459 460

    QLIST_HEAD(, BdrvTrackedRequest) tracked_requests;
461

462 463 464
    /* operation blockers */
    QLIST_HEAD(, BdrvOpBlocker) op_blockers[BLOCK_OP_TYPE_MAX];

465 466
    /* long-running background operation */
    BlockJob *job;
467

468 469 470 471
    /* The node that this node inherited default options from (and a reopen on
     * which can affect this node by changing these defaults). This is always a
     * parent node of this node. */
    BlockDriverState *inherits_from;
472
    QLIST_HEAD(, BdrvChild) children;
K
Kevin Wolf 已提交
473
    QLIST_HEAD(, BdrvChild) parents;
474

475
    QDict *options;
K
Kevin Wolf 已提交
476
    QDict *explicit_options;
477
    BlockdevDetectZeroesOptions detect_zeroes;
478 479 480

    /* The error object in use for blocking operations on backing_hd */
    Error *backing_blocker;
481 482 483 484

    /* threshold limit for writes, in bytes. "High water mark". */
    uint64_t write_threshold_offset;
    NotifierWithReturn write_threshold_notifier;
485

486 487 488 489
    /* counters for nested bdrv_io_plug and bdrv_io_unplugged_begin */
    unsigned io_plugged;
    unsigned io_plug_disabled;

490
    int quiesce_counter;
B
bellard 已提交
491 492
};

M
Max Reitz 已提交
493 494 495 496 497 498 499 500 501
struct BlockBackendRootState {
    int open_flags;
    bool read_only;
    BlockdevDetectZeroesOptions detect_zeroes;

    char *throttle_group;
    ThrottleState *throttle_state;
};

502 503 504 505 506
static inline BlockDriverState *backing_bs(BlockDriverState *bs)
{
    return bs->backing ? bs->backing->bs : NULL;
}

507 508 509 510 511 512 513

/* Essential block drivers which must always be statically linked into qemu, and
 * which therefore can be accessed without using bdrv_find_format() */
extern BlockDriver bdrv_file;
extern BlockDriver bdrv_raw;
extern BlockDriver bdrv_qcow2;

514 515 516 517 518 519 520 521
/**
 * bdrv_setup_io_funcs:
 *
 * Prepare a #BlockDriver for I/O request processing by populating
 * unimplemented coroutine and AIO interfaces with generic wrapper functions
 * that fall back to implemented interfaces.
 */
void bdrv_setup_io_funcs(BlockDriver *bdrv);
522

523
int coroutine_fn bdrv_co_preadv(BlockDriverState *bs,
524 525
    int64_t offset, unsigned int bytes, QEMUIOVector *qiov,
    BdrvRequestFlags flags);
526
int coroutine_fn bdrv_co_pwritev(BlockDriverState *bs,
527 528
    int64_t offset, unsigned int bytes, QEMUIOVector *qiov,
    BdrvRequestFlags flags);
529

530
int get_tmp_filename(char *filename, int size);
531 532
BlockDriver *bdrv_probe_all(const uint8_t *buf, int buf_size,
                            const char *filename);
533

534
void bdrv_set_io_limits(BlockDriverState *bs,
535 536
                        ThrottleConfig *cfg);

537

538 539 540 541 542 543 544 545 546
/**
 * bdrv_add_before_write_notifier:
 *
 * Register a callback that is invoked before write requests are processed but
 * after any throttling or waiting for overlapping requests.
 */
void bdrv_add_before_write_notifier(BlockDriverState *bs,
                                    NotifierWithReturn *notifier);

547 548 549 550 551
/**
 * bdrv_detach_aio_context:
 *
 * May be called from .bdrv_detach_aio_context() to detach children from the
 * current #AioContext.  This is only needed by block drivers that manage their
552
 * own children.  Both ->file and ->backing are automatically handled and
553 554 555 556 557 558 559 560 561
 * block drivers should not call this function on them explicitly.
 */
void bdrv_detach_aio_context(BlockDriverState *bs);

/**
 * bdrv_attach_aio_context:
 *
 * May be called from .bdrv_attach_aio_context() to attach children to the new
 * #AioContext.  This is only needed by block drivers that manage their own
562
 * children.  Both ->file and ->backing are automatically handled and block
563 564 565 566 567
 * drivers should not call this function on them explicitly.
 */
void bdrv_attach_aio_context(BlockDriverState *bs,
                             AioContext *new_context);

M
Max Reitz 已提交
568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595
/**
 * bdrv_add_aio_context_notifier:
 *
 * If a long-running job intends to be always run in the same AioContext as a
 * certain BDS, it may use this function to be notified of changes regarding the
 * association of the BDS to an AioContext.
 *
 * attached_aio_context() is called after the target BDS has been attached to a
 * new AioContext; detach_aio_context() is called before the target BDS is being
 * detached from its old AioContext.
 */
void bdrv_add_aio_context_notifier(BlockDriverState *bs,
        void (*attached_aio_context)(AioContext *new_context, void *opaque),
        void (*detach_aio_context)(void *opaque), void *opaque);

/**
 * bdrv_remove_aio_context_notifier:
 *
 * Unsubscribe of change notifications regarding the BDS's AioContext. The
 * parameters given here have to be the same as those given to
 * bdrv_add_aio_context_notifier().
 */
void bdrv_remove_aio_context_notifier(BlockDriverState *bs,
                                      void (*aio_context_attached)(AioContext *,
                                                                   void *),
                                      void (*aio_context_detached)(void *),
                                      void *opaque);

596 597 598 599
#ifdef _WIN32
int is_windows_drive(const char *filename);
#endif

P
Paolo Bonzini 已提交
600 601 602 603 604 605 606
/**
 * stream_start:
 * @bs: Block device to operate on.
 * @base: Block device that will become the new base, or %NULL to
 * flatten the whole backing file chain onto @bs.
 * @base_id: The file name that will be written to @bs as the new
 * backing file if the job completes.  Ignored if @base is %NULL.
607
 * @speed: The maximum speed, in bytes per second, or 0 for unlimited.
P
Paolo Bonzini 已提交
608
 * @on_error: The action to take upon error.
P
Paolo Bonzini 已提交
609 610
 * @cb: Completion function for the job.
 * @opaque: Opaque pointer value passed to @cb.
611
 * @errp: Error object.
P
Paolo Bonzini 已提交
612 613 614 615 616 617 618
 *
 * Start a streaming operation on @bs.  Clusters that are unallocated
 * in @bs, but allocated in any image between @base and @bs (both
 * exclusive) will be written to @bs.  At the end of a successful
 * streaming job, the backing file of @bs will be changed to
 * @base_id in the written image and to @base in the live BlockDriverState.
 */
619
void stream_start(BlockDriverState *bs, BlockDriverState *base,
P
Paolo Bonzini 已提交
620
                  const char *base_id, int64_t speed, BlockdevOnError on_error,
621
                  BlockCompletionFunc *cb,
622
                  void *opaque, Error **errp);
623

624 625
/**
 * commit_start:
F
Fam Zheng 已提交
626 627 628
 * @bs: Active block device.
 * @top: Top block device to be committed.
 * @base: Block device that will be written into, and become the new top.
629 630 631 632
 * @speed: The maximum speed, in bytes per second, or 0 for unlimited.
 * @on_error: The action to take upon error.
 * @cb: Completion function for the job.
 * @opaque: Opaque pointer value passed to @cb.
633
 * @backing_file_str: String to use as the backing file in @top's overlay
634 635 636 637 638
 * @errp: Error object.
 *
 */
void commit_start(BlockDriverState *bs, BlockDriverState *base,
                 BlockDriverState *top, int64_t speed,
639
                 BlockdevOnError on_error, BlockCompletionFunc *cb,
640
                 void *opaque, const char *backing_file_str, Error **errp);
F
Fam Zheng 已提交
641 642 643 644 645 646 647 648 649 650 651 652 653 654
/**
 * commit_active_start:
 * @bs: Active block device to be committed.
 * @base: Block device that will be written into, and become the new top.
 * @speed: The maximum speed, in bytes per second, or 0 for unlimited.
 * @on_error: The action to take upon error.
 * @cb: Completion function for the job.
 * @opaque: Opaque pointer value passed to @cb.
 * @errp: Error object.
 *
 */
void commit_active_start(BlockDriverState *bs, BlockDriverState *base,
                         int64_t speed,
                         BlockdevOnError on_error,
655
                         BlockCompletionFunc *cb,
F
Fam Zheng 已提交
656
                         void *opaque, Error **errp);
P
Paolo Bonzini 已提交
657 658 659 660
/*
 * mirror_start:
 * @bs: Block device to operate on.
 * @target: Block device to write to.
661 662
 * @replaces: Block graph node name to replace once the mirror is done. Can
 *            only be used when full mirroring is selected.
P
Paolo Bonzini 已提交
663
 * @speed: The maximum speed, in bytes per second, or 0 for unlimited.
664
 * @granularity: The chosen granularity for the dirty bitmap.
665
 * @buf_size: The amount of data that can be in flight at one time.
P
Paolo Bonzini 已提交
666
 * @mode: Whether to collapse all images in the chain to the target.
667 668
 * @on_source_error: The action to take upon error reading from the source.
 * @on_target_error: The action to take upon error writing to the target.
669
 * @unmap: Whether to unmap target where source sectors only contain zeroes.
P
Paolo Bonzini 已提交
670 671 672 673 674 675 676 677 678 679
 * @cb: Completion function for the job.
 * @opaque: Opaque pointer value passed to @cb.
 * @errp: Error object.
 *
 * Start a mirroring operation on @bs.  Clusters that are allocated
 * in @bs will be written to @bs until the job is cancelled or
 * manually completed.  At the end of a successful mirroring job,
 * @bs will be switched to read from @target.
 */
void mirror_start(BlockDriverState *bs, BlockDriverState *target,
680
                  const char *replaces,
681
                  int64_t speed, uint32_t granularity, int64_t buf_size,
682
                  MirrorSyncMode mode, BlockdevOnError on_source_error,
683
                  BlockdevOnError on_target_error,
684
                  bool unmap,
685
                  BlockCompletionFunc *cb,
P
Paolo Bonzini 已提交
686 687
                  void *opaque, Error **errp);

688 689 690 691 692
/*
 * backup_start:
 * @bs: Block device to operate on.
 * @target: Block device to write to.
 * @speed: The maximum speed, in bytes per second, or 0 for unlimited.
693
 * @sync_mode: What parts of the disk image should be copied to the destination.
694
 * @sync_bitmap: The dirty bitmap if sync_mode is MIRROR_SYNC_MODE_INCREMENTAL.
695 696 697 698
 * @on_source_error: The action to take upon error reading from the source.
 * @on_target_error: The action to take upon error writing to the target.
 * @cb: Completion function for the job.
 * @opaque: Opaque pointer value passed to @cb.
699
 * @txn: Transaction that this job is part of (may be NULL).
700 701 702 703 704
 *
 * Start a backup operation on @bs.  Clusters in @bs are written to @target
 * until the job is cancelled or manually completed.
 */
void backup_start(BlockDriverState *bs, BlockDriverState *target,
705
                  int64_t speed, MirrorSyncMode sync_mode,
706
                  BdrvDirtyBitmap *sync_bitmap,
707
                  BlockdevOnError on_source_error,
708
                  BlockdevOnError on_target_error,
709
                  BlockCompletionFunc *cb, void *opaque,
710
                  BlockJobTxn *txn, Error **errp);
711

712 713
void hmp_drive_add_node(Monitor *mon, const char *optstr);

714 715 716 717 718
BdrvChild *bdrv_root_attach_child(BlockDriverState *child_bs,
                                  const char *child_name,
                                  const BdrvChildRole *child_role);
void bdrv_root_unref_child(BdrvChild *child);

719 720 721
void bdrv_no_throttling_begin(BlockDriverState *bs);
void bdrv_no_throttling_end(BlockDriverState *bs);

722 723
void blk_dev_change_media_cb(BlockBackend *blk, bool load);
bool blk_dev_has_removable_media(BlockBackend *blk);
M
Max Reitz 已提交
724
bool blk_dev_has_tray(BlockBackend *blk);
725 726 727 728 729
void blk_dev_eject_request(BlockBackend *blk, bool force);
bool blk_dev_is_tray_open(BlockBackend *blk);
bool blk_dev_is_medium_locked(BlockBackend *blk);
void blk_dev_resize_cb(BlockBackend *blk);

730
void bdrv_set_dirty(BlockDriverState *bs, int64_t cur_sector, int nr_sectors);
731
bool bdrv_requests_pending(BlockDriverState *bs);
732

F
Fam Zheng 已提交
733 734 735
void bdrv_clear_dirty_bitmap(BdrvDirtyBitmap *bitmap, HBitmap **out);
void bdrv_undo_clear_dirty_bitmap(BdrvDirtyBitmap *bitmap, HBitmap *in);

736 737
void blockdev_close_all_bdrv_states(void);

B
bellard 已提交
738
#endif /* BLOCK_INT_H */