rbd.c 20.3 KB
Newer Older
1 2 3
/*
 * QEMU Block driver for RADOS (Ceph)
 *
4 5
 * Copyright (C) 2010-2011 Christian Brunner <chb@muc.de>,
 *                         Josh Durgin <josh.durgin@dreamhost.com>
6 7 8 9 10 11
 *
 * This work is licensed under the terms of the GNU GPL, version 2.  See
 * the COPYING file in the top-level directory.
 *
 */

12 13
#include <inttypes.h>

14 15 16 17 18
#include "qemu-common.h"
#include "qemu-error.h"

#include "block_int.h"

19
#include <rbd/librbd.h>
20 21 22 23 24 25



/*
 * When specifying the image filename use:
 *
26
 * rbd:poolname/devicename[@snapshotname][:option1=value1[:option2=value2...]]
27 28 29 30 31 32
 *
 * poolname must be the name of an existing rados pool
 *
 * devicename is the basename for all objects used to
 * emulate the raw device.
 *
33 34 35 36
 * Each option given is used to configure rados, and may be
 * any Ceph option, or "conf". The "conf" option specifies
 * a Ceph configuration file to read.
 *
37 38 39 40 41 42 43 44 45 46 47 48
 * Metadata information (image size, ...) is stored in an
 * object with the name "devicename.rbd".
 *
 * The raw device is split into 4MB sized objects by default.
 * The sequencenumber is encoded in a 12 byte long hex-string,
 * and is attached to the devicename, separated by a dot.
 * e.g. "devicename.1234567890ab"
 *
 */

#define OBJ_MAX_SIZE (1UL << OBJ_DEFAULT_OBJ_ORDER)

49 50 51 52 53 54 55
#define RBD_MAX_CONF_NAME_SIZE 128
#define RBD_MAX_CONF_VAL_SIZE 512
#define RBD_MAX_CONF_SIZE 1024
#define RBD_MAX_POOL_NAME_SIZE 128
#define RBD_MAX_SNAP_NAME_SIZE 128
#define RBD_MAX_SNAPS 100

56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73
typedef struct RBDAIOCB {
    BlockDriverAIOCB common;
    QEMUBH *bh;
    int ret;
    QEMUIOVector *qiov;
    char *bounce;
    int write;
    int64_t sector_num;
    int error;
    struct BDRVRBDState *s;
    int cancelled;
} RBDAIOCB;

typedef struct RADOSCB {
    int rcbid;
    RBDAIOCB *acb;
    struct BDRVRBDState *s;
    int done;
74
    int64_t size;
75 76 77 78 79 80 81 82 83
    char *buf;
    int ret;
} RADOSCB;

#define RBD_FD_READ 0
#define RBD_FD_WRITE 1

typedef struct BDRVRBDState {
    int fds[2];
84 85 86 87
    rados_t cluster;
    rados_ioctx_t io_ctx;
    rbd_image_t image;
    char name[RBD_MAX_IMAGE_NAME_SIZE];
88
    int qemu_aio_count;
89
    char *snap;
90 91 92 93 94 95
    int event_reader_pos;
    RADOSCB *event_rcb;
} BDRVRBDState;

static void rbd_aio_bh_cb(void *opaque);

96 97 98 99
static int qemu_rbd_next_tok(char *dst, int dst_len,
                             char *src, char delim,
                             const char *name,
                             char **p)
100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126
{
    int l;
    char *end;

    *p = NULL;

    if (delim != '\0') {
        end = strchr(src, delim);
        if (end) {
            *p = end + 1;
            *end = '\0';
        }
    }
    l = strlen(src);
    if (l >= dst_len) {
        error_report("%s too long", name);
        return -EINVAL;
    } else if (l == 0) {
        error_report("%s too short", name);
        return -EINVAL;
    }

    pstrcpy(dst, dst_len, src);

    return 0;
}

127 128 129
static int qemu_rbd_parsename(const char *filename,
                              char *pool, int pool_len,
                              char *snap, int snap_len,
130 131
                              char *name, int name_len,
                              char *conf, int conf_len)
132 133 134 135 136 137 138 139 140
{
    const char *start;
    char *p, *buf;
    int ret;

    if (!strstart(filename, "rbd:", &start)) {
        return -EINVAL;
    }

141
    buf = g_strdup(start);
142
    p = buf;
143 144
    *snap = '\0';
    *conf = '\0';
145

146
    ret = qemu_rbd_next_tok(pool, pool_len, p, '/', "pool name", &p);
147 148 149 150
    if (ret < 0 || !p) {
        ret = -EINVAL;
        goto done;
    }
151 152 153 154 155 156 157 158 159

    if (strchr(p, '@')) {
        ret = qemu_rbd_next_tok(name, name_len, p, '@', "object name", &p);
        if (ret < 0) {
            goto done;
        }
        ret = qemu_rbd_next_tok(snap, snap_len, p, ':', "snap name", &p);
    } else {
        ret = qemu_rbd_next_tok(name, name_len, p, ':', "object name", &p);
160
    }
161
    if (ret < 0 || !p) {
162 163 164
        goto done;
    }

165
    ret = qemu_rbd_next_tok(conf, conf_len, p, '\0', "configuration", &p);
166 167

done:
168
    g_free(buf);
169 170 171
    return ret;
}

172 173 174 175 176 177 178
static int qemu_rbd_set_conf(rados_t cluster, const char *conf)
{
    char *p, *buf;
    char name[RBD_MAX_CONF_NAME_SIZE];
    char value[RBD_MAX_CONF_VAL_SIZE];
    int ret = 0;

179
    buf = g_strdup(conf);
180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216
    p = buf;

    while (p) {
        ret = qemu_rbd_next_tok(name, sizeof(name), p,
                                '=', "conf option name", &p);
        if (ret < 0) {
            break;
        }

        if (!p) {
            error_report("conf option %s has no value", name);
            ret = -EINVAL;
            break;
        }

        ret = qemu_rbd_next_tok(value, sizeof(value), p,
                                ':', "conf option value", &p);
        if (ret < 0) {
            break;
        }

        if (strcmp(name, "conf")) {
            ret = rados_conf_set(cluster, name, value);
            if (ret < 0) {
                error_report("invalid conf option %s", name);
                ret = -EINVAL;
                break;
            }
        } else {
            ret = rados_conf_read_file(cluster, value);
            if (ret < 0) {
                error_report("error reading conf file %s", value);
                break;
            }
        }
    }

217
    g_free(buf);
218 219 220
    return ret;
}

221
static int qemu_rbd_create(const char *filename, QEMUOptionParameter *options)
222 223 224
{
    int64_t bytes = 0;
    int64_t objsize;
225 226 227 228
    int obj_order = 0;
    char pool[RBD_MAX_POOL_NAME_SIZE];
    char name[RBD_MAX_IMAGE_NAME_SIZE];
    char snap_buf[RBD_MAX_SNAP_NAME_SIZE];
229
    char conf[RBD_MAX_CONF_SIZE];
230 231
    rados_t cluster;
    rados_ioctx_t io_ctx;
232 233
    int ret;

234 235
    if (qemu_rbd_parsename(filename, pool, sizeof(pool),
                           snap_buf, sizeof(snap_buf),
236 237
                           name, sizeof(name),
                           conf, sizeof(conf)) < 0) {
238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255
        return -EINVAL;
    }

    /* Read out options */
    while (options && options->name) {
        if (!strcmp(options->name, BLOCK_OPT_SIZE)) {
            bytes = options->value.n;
        } else if (!strcmp(options->name, BLOCK_OPT_CLUSTER_SIZE)) {
            if (options->value.n) {
                objsize = options->value.n;
                if ((objsize - 1) & objsize) {    /* not a power of 2? */
                    error_report("obj size needs to be power of 2");
                    return -EINVAL;
                }
                if (objsize < 4096) {
                    error_report("obj size too small");
                    return -EINVAL;
                }
256
                obj_order = ffs(objsize) - 1;
257 258 259 260 261
            }
        }
        options++;
    }

262
    if (rados_create(&cluster, NULL) < 0) {
263 264 265 266
        error_report("error initializing");
        return -EIO;
    }

267 268 269 270 271 272 273 274 275 276 277
    if (strstr(conf, "conf=") == NULL) {
        if (rados_conf_read_file(cluster, NULL) < 0) {
            error_report("error reading config file");
            rados_shutdown(cluster);
            return -EIO;
        }
    }

    if (conf[0] != '\0' &&
        qemu_rbd_set_conf(cluster, conf) < 0) {
        error_report("error setting config options");
278
        rados_shutdown(cluster);
279 280 281
        return -EIO;
    }

282 283 284
    if (rados_connect(cluster) < 0) {
        error_report("error connecting");
        rados_shutdown(cluster);
285 286 287
        return -EIO;
    }

288 289 290 291
    if (rados_ioctx_create(cluster, pool, &io_ctx) < 0) {
        error_report("error opening pool %s", pool);
        rados_shutdown(cluster);
        return -EIO;
292 293
    }

294 295 296
    ret = rbd_create(io_ctx, name, bytes, &obj_order);
    rados_ioctx_destroy(io_ctx);
    rados_shutdown(cluster);
297 298 299 300 301

    return ret;
}

/*
302 303
 * This aio completion is being called from qemu_rbd_aio_event_reader()
 * and runs in qemu context. It schedules a bh, but just in case the aio
304 305
 * was not cancelled before.
 */
306
static void qemu_rbd_complete_aio(RADOSCB *rcb)
307 308 309 310 311
{
    RBDAIOCB *acb = rcb->acb;
    int64_t r;

    if (acb->cancelled) {
312 313
        qemu_vfree(acb->bounce);
        qemu_aio_release(acb);
314 315 316 317 318 319 320 321 322 323
        goto done;
    }

    r = rcb->ret;

    if (acb->write) {
        if (r < 0) {
            acb->ret = r;
            acb->error = 1;
        } else if (!acb->error) {
324
            acb->ret = rcb->size;
325 326
        }
    } else {
327 328
        if (r < 0) {
            memset(rcb->buf, 0, rcb->size);
329 330
            acb->ret = r;
            acb->error = 1;
331 332
        } else if (r < rcb->size) {
            memset(rcb->buf + r, 0, rcb->size - r);
333
            if (!acb->error) {
334
                acb->ret = rcb->size;
335 336
            }
        } else if (!acb->error) {
337
            acb->ret = r;
338 339 340
        }
    }
    /* Note that acb->bh can be NULL in case where the aio was cancelled */
341 342
    acb->bh = qemu_bh_new(rbd_aio_bh_cb, acb);
    qemu_bh_schedule(acb->bh);
343
done:
344
    g_free(rcb);
345 346 347 348 349 350
}

/*
 * aio fd read handler. It runs in the qemu context and calls the
 * completion handling of completed rados aio operations.
 */
351
static void qemu_rbd_aio_event_reader(void *opaque)
352 353 354 355 356 357 358 359 360 361 362 363 364 365 366
{
    BDRVRBDState *s = opaque;

    ssize_t ret;

    do {
        char *p = (char *)&s->event_rcb;

        /* now read the rcb pointer that was sent from a non qemu thread */
        if ((ret = read(s->fds[RBD_FD_READ], p + s->event_reader_pos,
                        sizeof(s->event_rcb) - s->event_reader_pos)) > 0) {
            if (ret > 0) {
                s->event_reader_pos += ret;
                if (s->event_reader_pos == sizeof(s->event_rcb)) {
                    s->event_reader_pos = 0;
367 368
                    qemu_rbd_complete_aio(s->event_rcb);
                    s->qemu_aio_count--;
369 370 371 372 373 374
                }
            }
        }
    } while (ret < 0 && errno == EINTR);
}

375
static int qemu_rbd_aio_flush_cb(void *opaque)
376 377 378 379 380 381
{
    BDRVRBDState *s = opaque;

    return (s->qemu_aio_count > 0);
}

382
static int qemu_rbd_open(BlockDriverState *bs, const char *filename, int flags)
383 384
{
    BDRVRBDState *s = bs->opaque;
385 386
    char pool[RBD_MAX_POOL_NAME_SIZE];
    char snap_buf[RBD_MAX_SNAP_NAME_SIZE];
387
    char conf[RBD_MAX_CONF_SIZE];
388 389
    int r;

390 391
    if (qemu_rbd_parsename(filename, pool, sizeof(pool),
                           snap_buf, sizeof(snap_buf),
392 393
                           s->name, sizeof(s->name),
                           conf, sizeof(conf)) < 0) {
394 395
        return -EINVAL;
    }
396
    s->snap = NULL;
397
    if (snap_buf[0] != '\0') {
398
        s->snap = g_strdup(snap_buf);
399 400
    }

401 402
    r = rados_create(&s->cluster, NULL);
    if (r < 0) {
403 404 405 406
        error_report("error initializing");
        return r;
    }

407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422
    if (strstr(conf, "conf=") == NULL) {
        r = rados_conf_read_file(s->cluster, NULL);
        if (r < 0) {
            error_report("error reading config file");
            rados_shutdown(s->cluster);
            return r;
        }
    }

    if (conf[0] != '\0') {
        r = qemu_rbd_set_conf(s->cluster, conf);
        if (r < 0) {
            error_report("error setting config options");
            rados_shutdown(s->cluster);
            return r;
        }
423 424
    }

425 426 427 428
    r = rados_connect(s->cluster);
    if (r < 0) {
        error_report("error connecting");
        rados_shutdown(s->cluster);
429 430 431
        return r;
    }

432 433 434 435 436
    r = rados_ioctx_create(s->cluster, pool, &s->io_ctx);
    if (r < 0) {
        error_report("error opening pool %s", pool);
        rados_shutdown(s->cluster);
        return r;
437 438
    }

439
    r = rbd_open(s->io_ctx, s->name, &s->image, s->snap);
440
    if (r < 0) {
441 442 443 444
        error_report("error reading header from %s", s->name);
        rados_ioctx_destroy(s->io_ctx);
        rados_shutdown(s->cluster);
        return r;
445 446
    }

447
    bs->read_only = (s->snap != NULL);
448 449 450 451 452 453 454 455 456

    s->event_reader_pos = 0;
    r = qemu_pipe(s->fds);
    if (r < 0) {
        error_report("error opening eventfd");
        goto failed;
    }
    fcntl(s->fds[0], F_SETFL, O_NONBLOCK);
    fcntl(s->fds[1], F_SETFL, O_NONBLOCK);
457 458
    qemu_aio_set_fd_handler(s->fds[RBD_FD_READ], qemu_rbd_aio_event_reader,
                            NULL, qemu_rbd_aio_flush_cb, NULL, s);
459 460 461 462 463


    return 0;

failed:
464 465 466
    rbd_close(s->image);
    rados_ioctx_destroy(s->io_ctx);
    rados_shutdown(s->cluster);
467 468 469
    return r;
}

470
static void qemu_rbd_close(BlockDriverState *bs)
471 472 473 474 475 476 477 478
{
    BDRVRBDState *s = bs->opaque;

    close(s->fds[0]);
    close(s->fds[1]);
    qemu_aio_set_fd_handler(s->fds[RBD_FD_READ], NULL , NULL, NULL, NULL,
        NULL);

479 480
    rbd_close(s->image);
    rados_ioctx_destroy(s->io_ctx);
481
    g_free(s->snap);
482
    rados_shutdown(s->cluster);
483 484 485 486 487 488
}

/*
 * Cancel aio. Since we don't reference acb in a non qemu threads,
 * it is safe to access it here.
 */
489
static void qemu_rbd_aio_cancel(BlockDriverAIOCB *blockacb)
490 491 492 493 494 495 496
{
    RBDAIOCB *acb = (RBDAIOCB *) blockacb;
    acb->cancelled = 1;
}

static AIOPool rbd_aio_pool = {
    .aiocb_size = sizeof(RBDAIOCB),
497
    .cancel = qemu_rbd_aio_cancel,
498 499
};

500
static int qemu_rbd_send_pipe(BDRVRBDState *s, RADOSCB *rcb)
501
{
502
    int ret = 0;
503 504
    while (1) {
        fd_set wfd;
505
        int fd = s->fds[RBD_FD_WRITE];
506

507 508
        /* send the op pointer to the qemu thread that is responsible
           for the aio/op completion. Must do it in a qemu thread context */
509 510 511 512 513 514
        ret = write(fd, (void *)&rcb, sizeof(rcb));
        if (ret >= 0) {
            break;
        }
        if (errno == EINTR) {
            continue;
515
        }
516 517
        if (errno != EAGAIN) {
            break;
518
        }
519 520 521 522 523 524 525 526

        FD_ZERO(&wfd);
        FD_SET(fd, &wfd);
        do {
            ret = select(fd + 1, NULL, &wfd, NULL, NULL);
        } while (ret < 0 && errno == EINTR);
    }

527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544
    return ret;
}

/*
 * This is the callback function for rbd_aio_read and _write
 *
 * Note: this function is being called from a non qemu thread so
 * we need to be careful about what we do here. Generally we only
 * write to the block notification pipe, and do the rest of the
 * io completion handling from qemu_rbd_aio_event_reader() which
 * runs in a qemu context.
 */
static void rbd_finish_aiocb(rbd_completion_t c, RADOSCB *rcb)
{
    int ret;
    rcb->ret = rbd_aio_get_return_value(c);
    rbd_aio_release(c);
    ret = qemu_rbd_send_pipe(rcb->s, rcb);
545
    if (ret < 0) {
546
        error_report("failed writing to acb->s->fds");
547
        g_free(rcb);
548 549 550
    }
}

551
/* Callback when all queued rbd_aio requests are complete */
552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576

static void rbd_aio_bh_cb(void *opaque)
{
    RBDAIOCB *acb = opaque;

    if (!acb->write) {
        qemu_iovec_from_buffer(acb->qiov, acb->bounce, acb->qiov->size);
    }
    qemu_vfree(acb->bounce);
    acb->common.cb(acb->common.opaque, (acb->ret > 0 ? 0 : acb->ret));
    qemu_bh_delete(acb->bh);
    acb->bh = NULL;

    qemu_aio_release(acb);
}

static BlockDriverAIOCB *rbd_aio_rw_vector(BlockDriverState *bs,
                                           int64_t sector_num,
                                           QEMUIOVector *qiov,
                                           int nb_sectors,
                                           BlockDriverCompletionFunc *cb,
                                           void *opaque, int write)
{
    RBDAIOCB *acb;
    RADOSCB *rcb;
577
    rbd_completion_t c;
578 579
    int64_t off, size;
    char *buf;
580
    int r;
581 582 583 584

    BDRVRBDState *s = bs->opaque;

    acb = qemu_aio_get(&rbd_aio_pool, bs, cb, opaque);
585 586 587
    if (!acb) {
        return NULL;
    }
588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605
    acb->write = write;
    acb->qiov = qiov;
    acb->bounce = qemu_blockalign(bs, qiov->size);
    acb->ret = 0;
    acb->error = 0;
    acb->s = s;
    acb->cancelled = 0;
    acb->bh = NULL;

    if (write) {
        qemu_iovec_to_buffer(acb->qiov, acb->bounce);
    }

    buf = acb->bounce;

    off = sector_num * BDRV_SECTOR_SIZE;
    size = nb_sectors * BDRV_SECTOR_SIZE;

606
    s->qemu_aio_count++; /* All the RADOSCB */
607

608
    rcb = g_malloc(sizeof(RADOSCB));
609 610 611 612 613
    rcb->done = 0;
    rcb->acb = acb;
    rcb->buf = buf;
    rcb->s = acb->s;
    rcb->size = size;
614 615 616 617
    r = rbd_aio_create_completion(rcb, (rbd_callback_t) rbd_finish_aiocb, &c);
    if (r < 0) {
        goto failed;
    }
618

619
    if (write) {
620
        r = rbd_aio_write(s->image, off, size, buf, c);
621
    } else {
622 623 624 625 626
        r = rbd_aio_read(s->image, off, size, buf, c);
    }

    if (r < 0) {
        goto failed;
627 628 629
    }

    return &acb->common;
630 631

failed:
632
    g_free(rcb);
633 634 635
    s->qemu_aio_count--;
    qemu_aio_release(acb);
    return NULL;
636 637
}

638 639 640 641 642 643
static BlockDriverAIOCB *qemu_rbd_aio_readv(BlockDriverState *bs,
                                            int64_t sector_num,
                                            QEMUIOVector *qiov,
                                            int nb_sectors,
                                            BlockDriverCompletionFunc *cb,
                                            void *opaque)
644 645 646 647
{
    return rbd_aio_rw_vector(bs, sector_num, qiov, nb_sectors, cb, opaque, 0);
}

648 649 650 651 652 653
static BlockDriverAIOCB *qemu_rbd_aio_writev(BlockDriverState *bs,
                                             int64_t sector_num,
                                             QEMUIOVector *qiov,
                                             int nb_sectors,
                                             BlockDriverCompletionFunc *cb,
                                             void *opaque)
654 655 656 657
{
    return rbd_aio_rw_vector(bs, sector_num, qiov, nb_sectors, cb, opaque, 1);
}

658
static int qemu_rbd_getinfo(BlockDriverState *bs, BlockDriverInfo *bdi)
659 660
{
    BDRVRBDState *s = bs->opaque;
661 662 663 664 665 666 667 668 669
    rbd_image_info_t info;
    int r;

    r = rbd_stat(s->image, &info, sizeof(info));
    if (r < 0) {
        return r;
    }

    bdi->cluster_size = info.obj_size;
670 671 672
    return 0;
}

673
static int64_t qemu_rbd_getlength(BlockDriverState *bs)
674 675
{
    BDRVRBDState *s = bs->opaque;
676 677
    rbd_image_info_t info;
    int r;
678

679 680 681 682 683 684
    r = rbd_stat(s->image, &info, sizeof(info));
    if (r < 0) {
        return r;
    }

    return info.size;
685 686
}

687 688 689 690 691 692 693 694 695 696 697 698 699
static int qemu_rbd_truncate(BlockDriverState *bs, int64_t offset)
{
    BDRVRBDState *s = bs->opaque;
    int r;

    r = rbd_resize(s->image, offset);
    if (r < 0) {
        return r;
    }

    return 0;
}

700 701
static int qemu_rbd_snap_create(BlockDriverState *bs,
                                QEMUSnapshotInfo *sn_info)
702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722
{
    BDRVRBDState *s = bs->opaque;
    int r;

    if (sn_info->name[0] == '\0') {
        return -EINVAL; /* we need a name for rbd snapshots */
    }

    /*
     * rbd snapshots are using the name as the user controlled unique identifier
     * we can't use the rbd snapid for that purpose, as it can't be set
     */
    if (sn_info->id_str[0] != '\0' &&
        strcmp(sn_info->id_str, sn_info->name) != 0) {
        return -EINVAL;
    }

    if (strlen(sn_info->name) >= sizeof(sn_info->id_str)) {
        return -ERANGE;
    }

723
    r = rbd_snap_create(s->image, sn_info->name);
724
    if (r < 0) {
725
        error_report("failed to create snap: %s", strerror(-r));
726 727 728 729 730 731
        return r;
    }

    return 0;
}

732 733
static int qemu_rbd_snap_list(BlockDriverState *bs,
                              QEMUSnapshotInfo **psn_tab)
734 735 736
{
    BDRVRBDState *s = bs->opaque;
    QEMUSnapshotInfo *sn_info, *sn_tab = NULL;
737 738 739
    int i, snap_count;
    rbd_snap_info_t *snaps;
    int max_snaps = RBD_MAX_SNAPS;
740

741
    do {
742
        snaps = g_malloc(sizeof(*snaps) * max_snaps);
743 744
        snap_count = rbd_snap_list(s->image, snaps, &max_snaps);
        if (snap_count < 0) {
745
            g_free(snaps);
746
        }
747
    } while (snap_count == -ERANGE);
748

749 750
    if (snap_count <= 0) {
        return snap_count;
751 752
    }

753
    sn_tab = g_malloc0(snap_count * sizeof(QEMUSnapshotInfo));
754

755 756
    for (i = 0; i < snap_count; i++) {
        const char *snap_name = snaps[i].name;
757 758 759 760 761

        sn_info = sn_tab + i;
        pstrcpy(sn_info->id_str, sizeof(sn_info->id_str), snap_name);
        pstrcpy(sn_info->name, sizeof(sn_info->name), snap_name);

762
        sn_info->vm_state_size = snaps[i].size;
763 764 765 766
        sn_info->date_sec = 0;
        sn_info->date_nsec = 0;
        sn_info->vm_clock_nsec = 0;
    }
767 768
    rbd_snap_list_end(snaps);

769 770 771 772
    *psn_tab = sn_tab;
    return snap_count;
}

773
static QEMUOptionParameter qemu_rbd_create_options[] = {
774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789
    {
     .name = BLOCK_OPT_SIZE,
     .type = OPT_SIZE,
     .help = "Virtual disk size"
    },
    {
     .name = BLOCK_OPT_CLUSTER_SIZE,
     .type = OPT_SIZE,
     .help = "RBD object size"
    },
    {NULL}
};

static BlockDriver bdrv_rbd = {
    .format_name        = "rbd",
    .instance_size      = sizeof(BDRVRBDState),
790 791 792 793 794 795
    .bdrv_file_open     = qemu_rbd_open,
    .bdrv_close         = qemu_rbd_close,
    .bdrv_create        = qemu_rbd_create,
    .bdrv_get_info      = qemu_rbd_getinfo,
    .create_options     = qemu_rbd_create_options,
    .bdrv_getlength     = qemu_rbd_getlength,
796
    .bdrv_truncate      = qemu_rbd_truncate,
797 798
    .protocol_name      = "rbd",

799 800
    .bdrv_aio_readv     = qemu_rbd_aio_readv,
    .bdrv_aio_writev    = qemu_rbd_aio_writev,
801

802 803
    .bdrv_snapshot_create = qemu_rbd_snap_create,
    .bdrv_snapshot_list = qemu_rbd_snap_list,
804 805 806 807 808 809 810 811
};

static void bdrv_rbd_init(void)
{
    bdrv_register(&bdrv_rbd);
}

block_init(bdrv_rbd_init);