storage_backend_rbd.c 19.3 KB
Newer Older
1 2 3
/*
 * storage_backend_rbd.c: storage backend for RBD (RADOS Block Device) handling
 *
4
 * Copyright (C) 2013-2014 Red Hat, Inc.
5 6 7 8 9 10 11 12 13 14 15 16 17
 * Copyright (C) 2012 Wido den Hollander
 *
 * This library is free software; you can redistribute it and/or
 * modify it under the terms of the GNU Lesser General Public
 * License as published by the Free Software Foundation; either
 * version 2.1 of the License, or (at your option) any later version.
 *
 * This library is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * Lesser General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public
18
 * License along with this library.  If not, see
O
Osier Yang 已提交
19
 * <http://www.gnu.org/licenses/>.
20 21 22 23 24 25
 *
 * Author: Wido den Hollander <wido@widodh.nl>
 */

#include <config.h>

26
#include "datatypes.h"
27
#include "virerror.h"
28 29
#include "storage_backend_rbd.h"
#include "storage_conf.h"
30
#include "viralloc.h"
31
#include "virlog.h"
32
#include "base64.h"
33
#include "viruuid.h"
34
#include "virstring.h"
35 36 37 38 39
#include "rados/librados.h"
#include "rbd/librbd.h"

#define VIR_FROM_THIS VIR_FROM_STORAGE

40 41
VIR_LOG_INIT("storage.storage_backend_rbd");

42 43 44 45 46 47 48
struct _virStorageBackendRBDState {
    rados_t cluster;
    rados_ioctx_t ioctx;
    time_t starttime;
};

typedef struct _virStorageBackendRBDState virStorageBackendRBDState;
E
Eric Blake 已提交
49
typedef virStorageBackendRBDState *virStorageBackendRBDStatePtr;
50

E
Eric Blake 已提交
51
static int virStorageBackendRBDOpenRADOSConn(virStorageBackendRBDStatePtr ptr,
52
                                             virConnectPtr conn,
53
                                             virStoragePoolSourcePtr source)
54 55
{
    int ret = -1;
56
    int r = 0;
57
    virStorageAuthDefPtr authdef = source->auth;
58 59 60 61 62 63
    unsigned char *secret_value = NULL;
    size_t secret_value_size;
    char *rados_key = NULL;
    virBuffer mon_host = VIR_BUFFER_INITIALIZER;
    virSecretPtr secret = NULL;
    char secretUuid[VIR_UUID_STRING_BUFLEN];
64
    size_t i;
65
    char *mon_buff = NULL;
66 67 68
    const char *client_mount_timeout = "30";
    const char *mon_op_timeout = "30";
    const char *osd_op_timeout = "30";
69

70 71 72
    if (authdef) {
        VIR_DEBUG("Using cephx authorization, username: %s", authdef->username);
        r = rados_create(&ptr->cluster, authdef->username);
73 74
        if (r < 0) {
            virReportSystemError(-r, "%s", _("failed to initialize RADOS"));
75 76 77
            goto cleanup;
        }

78 79 80 81 82 83 84
        if (!conn) {
            virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
                           _("'ceph' authentication not supported "
                             "for autostarted pools"));
            return -1;
        }

85 86
        if (authdef->secretType == VIR_STORAGE_SECRET_TYPE_UUID) {
            virUUIDFormat(authdef->secret.uuid, secretUuid);
87 88
            VIR_DEBUG("Looking up secret by UUID: %s", secretUuid);
            secret = virSecretLookupByUUIDString(conn, secretUuid);
89
        } else if (authdef->secret.usage != NULL) {
90
            VIR_DEBUG("Looking up secret by usage: %s",
91
                      authdef->secret.usage);
92
            secret = virSecretLookupByUsage(conn, VIR_SECRET_USAGE_TYPE_CEPH,
93
                                            authdef->secret.usage);
94 95 96
        }

        if (secret == NULL) {
97
            if (authdef->secretType == VIR_STORAGE_SECRET_TYPE_UUID) {
98 99
                virReportError(VIR_ERR_NO_SECRET,
                               _("no secret matches uuid '%s'"),
100
                                 secretUuid);
101 102 103
            } else {
                virReportError(VIR_ERR_NO_SECRET,
                               _("no secret matches usage value '%s'"),
104
                                 authdef->secret.usage);
105
            }
106 107 108
            goto cleanup;
        }

109 110
        secret_value = conn->secretDriver->secretGetValue(secret,
                                                          &secret_value_size, 0,
111 112 113
                                                          VIR_SECRET_GET_VALUE_INTERNAL_CALL);

        if (!secret_value) {
114
            if (authdef->secretType == VIR_STORAGE_SECRET_TYPE_UUID) {
115 116 117
                virReportError(VIR_ERR_INTERNAL_ERROR,
                               _("could not get the value of the secret "
                                 "for username '%s' using uuid '%s'"),
118
                               authdef->username, secretUuid);
119 120 121 122
            } else {
                virReportError(VIR_ERR_INTERNAL_ERROR,
                               _("could not get the value of the secret "
                                 "for username '%s' using usage value '%s'"),
123
                               authdef->username, authdef->secret.usage);
124
            }
125 126 127
            goto cleanup;
        }

128 129 130 131 132
        base64_encode_alloc((char *)secret_value,
                            secret_value_size, &rados_key);
        memset(secret_value, 0, secret_value_size);

        if (rados_key == NULL) {
133
            virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
134
                           _("failed to decode the RADOS key"));
135 136 137 138 139
            goto cleanup;
        }

        VIR_DEBUG("Found cephx key: %s", rados_key);
        if (rados_conf_set(ptr->cluster, "key", rados_key) < 0) {
140 141 142
            virReportError(VIR_ERR_INTERNAL_ERROR,
                           _("failed to set RADOS option: %s"),
                           "rados_key");
143 144 145 146 147 148
            goto cleanup;
        }

        memset(rados_key, 0, strlen(rados_key));

        if (rados_conf_set(ptr->cluster, "auth_supported", "cephx") < 0) {
149 150 151
            virReportError(VIR_ERR_INTERNAL_ERROR,
                           _("failed to set RADOS option: %s"),
                           "auth_supported");
152 153 154 155 156
            goto cleanup;
        }
    } else {
        VIR_DEBUG("Not using cephx authorization");
        if (rados_create(&ptr->cluster, NULL) < 0) {
157
            virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
158
                           _("failed to create the RADOS cluster"));
159 160 161
            goto cleanup;
        }
        if (rados_conf_set(ptr->cluster, "auth_supported", "none") < 0) {
162 163 164
            virReportError(VIR_ERR_INTERNAL_ERROR,
                           _("failed to set RADOS option: %s"),
                           "auth_supported");
165 166 167 168 169
            goto cleanup;
        }
    }

    VIR_DEBUG("Found %zu RADOS cluster monitors in the pool configuration",
170
              source->nhost);
171

172 173 174
    for (i = 0; i < source->nhost; i++) {
        if (source->hosts[i].name != NULL &&
            !source->hosts[i].port) {
175
            virBufferAsprintf(&mon_host, "%s:6789,",
176 177 178
                              source->hosts[i].name);
        } else if (source->hosts[i].name != NULL &&
            source->hosts[i].port) {
179
            virBufferAsprintf(&mon_host, "%s:%d,",
180 181
                              source->hosts[i].name,
                              source->hosts[i].port);
182
        } else {
183 184
            virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
                           _("received malformed monitor, check the XML definition"));
185 186 187
        }
    }

188 189
    if (virBufferCheckError(&mon_host) < 0)
        goto cleanup;
190 191 192 193

    mon_buff = virBufferContentAndReset(&mon_host);
    VIR_DEBUG("RADOS mon_host has been set to: %s", mon_buff);
    if (rados_conf_set(ptr->cluster, "mon_host", mon_buff) < 0) {
J
Ján Tomko 已提交
194 195 196
        virReportError(VIR_ERR_INTERNAL_ERROR,
                       _("failed to set RADOS option: %s"),
                       "mon_host");
197 198 199
        goto cleanup;
    }

200 201 202 203 204 205 206 207 208 209 210 211 212 213
    /*
     * Set timeout options for librados.
     * In case the Ceph cluster is down libvirt won't block forever.
     * Operations in librados will return -ETIMEDOUT when the timeout is reached.
     */
    VIR_DEBUG("Setting RADOS option client_mount_timeout to %s", client_mount_timeout);
    rados_conf_set(ptr->cluster, "client_mount_timeout", client_mount_timeout);

    VIR_DEBUG("Setting RADOS option rados_mon_op_timeout to %s", mon_op_timeout);
    rados_conf_set(ptr->cluster, "rados_mon_op_timeout", mon_op_timeout);

    VIR_DEBUG("Setting RADOS option rados_osd_op_timeout to %s", osd_op_timeout);
    rados_conf_set(ptr->cluster, "rados_osd_op_timeout", osd_op_timeout);

214
    ptr->starttime = time(0);
215 216 217 218
    r = rados_connect(ptr->cluster);
    if (r < 0) {
        virReportSystemError(-r, _("failed to connect to the RADOS monitor on: %s"),
                             mon_buff);
219 220 221 222 223
        goto cleanup;
    }

    ret = 0;

224
 cleanup:
225 226
    VIR_FREE(secret_value);
    VIR_FREE(rados_key);
227 228 229 230

    if (secret != NULL)
        virSecretFree(secret);

231 232 233 234 235
    virBufferFreeAndReset(&mon_host);
    VIR_FREE(mon_buff);
    return ret;
}

236 237 238 239 240 241 242 243 244 245
static int virStorageBackendRBDOpenIoCTX(virStorageBackendRBDStatePtr ptr, virStoragePoolObjPtr pool)
{
    int r = rados_ioctx_create(ptr->cluster, pool->def->source.name, &ptr->ioctx);
    if (r < 0) {
        virReportSystemError(-r, _("failed to create the RBD IoCTX. Does the pool '%s' exist?"),
                             pool->def->source.name);
    }
    return r;
}

246 247 248 249
static int virStorageBackendRBDCloseRADOSConn(virStorageBackendRBDStatePtr ptr)
{
    int ret = 0;

E
Eric Blake 已提交
250
    if (ptr->ioctx != NULL) {
251
        VIR_DEBUG("Closing RADOS IoCTX");
E
Eric Blake 已提交
252
        rados_ioctx_destroy(ptr->ioctx);
253 254
        ret = -1;
    }
E
Eric Blake 已提交
255
    ptr->ioctx = NULL;
256

E
Eric Blake 已提交
257
    if (ptr->cluster != NULL) {
258
        VIR_DEBUG("Closing RADOS connection");
E
Eric Blake 已提交
259
        rados_shutdown(ptr->cluster);
260 261
        ret = -2;
    }
E
Eric Blake 已提交
262
    ptr->cluster = NULL;
263

E
Eric Blake 已提交
264
    time_t runtime = time(0) - ptr->starttime;
265 266 267 268 269 270 271 272 273 274
    VIR_DEBUG("RADOS connection existed for %ld seconds", runtime);

    return ret;
}

static int volStorageBackendRBDRefreshVolInfo(virStorageVolDefPtr vol,
                                              virStoragePoolObjPtr pool,
                                              virStorageBackendRBDStatePtr ptr)
{
    int ret = -1;
275
    int r = 0;
276
    rbd_image_t image;
277 278 279 280 281

    r = rbd_open(ptr->ioctx, vol->name, &image, NULL);
    if (r < 0) {
        virReportSystemError(-r, _("failed to open the RBD image '%s'"),
                             vol->name);
282 283 284 285
        return ret;
    }

    rbd_image_info_t info;
286 287 288 289
    r = rbd_stat(image, &info, sizeof(info));
    if (r < 0) {
        virReportSystemError(-r, _("failed to stat the RBD image '%s'"),
                             vol->name);
290 291 292 293 294 295 296 297
        goto cleanup;
    }

    VIR_DEBUG("Refreshed RBD image %s/%s (size: %llu obj_size: %llu num_objs: %llu)",
              pool->def->source.name, vol->name, (unsigned long long)info.size,
              (unsigned long long)info.obj_size,
              (unsigned long long)info.num_objs);

298 299
    vol->target.capacity = info.size;
    vol->target.allocation = info.obj_size * info.num_objs;
300 301 302
    vol->type = VIR_STORAGE_VOL_NETWORK;

    VIR_FREE(vol->target.path);
303
    if (virAsprintf(&vol->target.path, "%s/%s",
304
                    pool->def->source.name,
305
                    vol->name) == -1)
306 307 308 309 310
        goto cleanup;

    VIR_FREE(vol->key);
    if (virAsprintf(&vol->key, "%s/%s",
                    pool->def->source.name,
311
                    vol->name) == -1)
312 313 314 315
        goto cleanup;

    ret = 0;

316
 cleanup:
317 318 319 320
    rbd_close(image);
    return ret;
}

321
static int virStorageBackendRBDRefreshPool(virConnectPtr conn,
322 323 324 325 326
                                           virStoragePoolObjPtr pool)
{
    size_t max_size = 1024;
    int ret = -1;
    int len = -1;
327
    int r = 0;
328
    char *name, *names = NULL;
E
Eric Blake 已提交
329
    virStorageBackendRBDState ptr;
330 331 332
    ptr.cluster = NULL;
    ptr.ioctx = NULL;

333
    if (virStorageBackendRBDOpenRADOSConn(&ptr, conn, &pool->def->source) < 0)
334 335
        goto cleanup;

336
    if (virStorageBackendRBDOpenIoCTX(&ptr, pool) < 0)
337 338
        goto cleanup;

339
    struct rados_cluster_stat_t clusterstat;
340 341 342
    r = rados_cluster_stat(ptr.cluster, &clusterstat);
    if (r < 0) {
        virReportSystemError(-r, "%s", _("failed to stat the RADOS cluster"));
343 344 345 346
        goto cleanup;
    }

    struct rados_pool_stat_t poolstat;
347 348 349 350
    r = rados_ioctx_pool_stat(ptr.ioctx, &poolstat);
    if (r < 0) {
        virReportSystemError(-r, _("failed to stat the RADOS pool '%s'"),
                             pool->def->source.name);
351 352 353
        goto cleanup;
    }

354 355
    pool->def->capacity = clusterstat.kb * 1024;
    pool->def->available = clusterstat.kb_avail * 1024;
356 357 358
    pool->def->allocation = poolstat.num_bytes;

    VIR_DEBUG("Utilization of RBD pool %s: (kb: %llu kb_avail: %llu num_bytes: %llu)",
359 360
              pool->def->source.name, (unsigned long long)clusterstat.kb,
              (unsigned long long)clusterstat.kb_avail,
361 362 363 364
              (unsigned long long)poolstat.num_bytes);

    while (true) {
        if (VIR_ALLOC_N(names, max_size) < 0)
365
            goto cleanup;
366 367 368 369 370

        len = rbd_list(ptr.ioctx, names, &max_size);
        if (len >= 0)
            break;
        if (len != -ERANGE) {
J
Ján Tomko 已提交
371
            VIR_WARN("%s", _("A problem occurred while listing RBD images"));
372 373
            goto cleanup;
        }
374
        VIR_FREE(names);
375 376
    }

377
    for (name = names; name < names + max_size;) {
378 379 380 381 382
        virStorageVolDefPtr vol;

        if (STREQ(name, ""))
            break;

383
        if (VIR_ALLOC(vol) < 0)
384
            goto cleanup;
385

386
        if (VIR_STRDUP(vol->name, name) < 0) {
387
            VIR_FREE(vol);
388
            goto cleanup;
389
        }
390 391 392

        name += strlen(name) + 1;

E
Eric Blake 已提交
393
        if (volStorageBackendRBDRefreshVolInfo(vol, pool, &ptr) < 0) {
394
            virStorageVolDefFree(vol);
395
            goto cleanup;
396
        }
397

398
        if (VIR_APPEND_ELEMENT(pool->volumes.objs, pool->volumes.count, vol) < 0) {
399
            virStorageVolDefFree(vol);
400 401 402
            virStoragePoolObjClearVols(pool);
            goto cleanup;
        }
403 404
    }

405
    VIR_DEBUG("Found %zu images in RBD pool %s",
406 407 408 409
              pool->volumes.count, pool->def->source.name);

    ret = 0;

410
 cleanup:
411
    VIR_FREE(names);
E
Eric Blake 已提交
412
    virStorageBackendRBDCloseRADOSConn(&ptr);
413 414 415 416 417 418 419 420 421
    return ret;
}

static int virStorageBackendRBDDeleteVol(virConnectPtr conn,
                                         virStoragePoolObjPtr pool,
                                         virStorageVolDefPtr vol,
                                         unsigned int flags)
{
    int ret = -1;
422
    int r = 0;
E
Eric Blake 已提交
423
    virStorageBackendRBDState ptr;
424 425 426 427 428
    ptr.cluster = NULL;
    ptr.ioctx = NULL;

    VIR_DEBUG("Removing RBD image %s/%s", pool->def->source.name, vol->name);

429
    if (flags & VIR_STORAGE_VOL_DELETE_ZEROED)
430 431
        VIR_WARN("%s", _("This storage backend does not supported zeroed removal of volumes"));

432
    if (virStorageBackendRBDOpenRADOSConn(&ptr, conn, &pool->def->source) < 0)
433 434
        goto cleanup;

435
    if (virStorageBackendRBDOpenIoCTX(&ptr, pool) < 0)
436 437
        goto cleanup;

438 439 440 441
    r = rbd_remove(ptr.ioctx, vol->name);
    if (r < 0) {
        virReportSystemError(-r, _("failed to remove volume '%s/%s'"),
                             pool->def->source.name, vol->name);
442 443 444 445 446
        goto cleanup;
    }

    ret = 0;

447
 cleanup:
E
Eric Blake 已提交
448
    virStorageBackendRBDCloseRADOSConn(&ptr);
449 450 451
    return ret;
}

452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474

static int
virStorageBackendRBDCreateVol(virConnectPtr conn ATTRIBUTE_UNUSED,
                              virStoragePoolObjPtr pool,
                              virStorageVolDefPtr vol)
{
    vol->type = VIR_STORAGE_VOL_NETWORK;

    VIR_FREE(vol->target.path);
    if (virAsprintf(&vol->target.path, "%s/%s",
                    pool->def->source.name,
                    vol->name) == -1)
        return -1;

    VIR_FREE(vol->key);
    if (virAsprintf(&vol->key, "%s/%s",
                    pool->def->source.name,
                    vol->name) == -1)
        return -1;

    return 0;
}

475 476 477 478 479 480 481 482 483 484
static int virStorageBackendRBDCreateImage(rados_ioctx_t io,
                                           char *name, long capacity)
{
    int order = 0;
#if LIBRBD_VERSION_CODE > 260
    uint64_t features = 3;
    uint64_t stripe_count = 1;
    uint64_t stripe_unit = 4194304;

    if (rbd_create3(io, name, capacity, features, &order,
485
                    stripe_unit, stripe_count) < 0) {
486 487 488 489 490 491 492 493
#else
    if (rbd_create(io, name, capacity, &order) < 0) {
#endif
        return -1;
    }

    return 0;
}
494 495 496 497 498 499

static int
virStorageBackendRBDBuildVol(virConnectPtr conn,
                             virStoragePoolObjPtr pool,
                             virStorageVolDefPtr vol,
                             unsigned int flags)
500
{
E
Eric Blake 已提交
501
    virStorageBackendRBDState ptr;
502 503 504
    ptr.cluster = NULL;
    ptr.ioctx = NULL;
    int ret = -1;
505
    int r = 0;
506 507 508

    VIR_DEBUG("Creating RBD image %s/%s with size %llu",
              pool->def->source.name,
509
              vol->name, vol->target.capacity);
510

511 512
    virCheckFlags(0, -1);

513
    if (virStorageBackendRBDOpenRADOSConn(&ptr, conn, &pool->def->source) < 0)
514 515
        goto cleanup;

516
    if (virStorageBackendRBDOpenIoCTX(&ptr, pool) < 0)
517 518 519
        goto cleanup;

    if (vol->target.encryption != NULL) {
520 521
        virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s",
                       _("storage pool does not support encrypted volumes"));
522 523 524
        goto cleanup;
    }

525 526
    r = virStorageBackendRBDCreateImage(ptr.ioctx, vol->name,
                                        vol->target.capacity);
527 528 529 530
    if (r < 0) {
        virReportSystemError(-r, _("failed to create volume '%s/%s'"),
                             pool->def->source.name,
                             vol->name);
531 532 533
        goto cleanup;
    }

534
    if (volStorageBackendRBDRefreshVolInfo(vol, pool, &ptr) < 0)
535 536 537 538
        goto cleanup;

    ret = 0;

539
 cleanup:
E
Eric Blake 已提交
540
    virStorageBackendRBDCloseRADOSConn(&ptr);
541 542 543 544 545 546 547
    return ret;
}

static int virStorageBackendRBDRefreshVol(virConnectPtr conn,
                                          virStoragePoolObjPtr pool ATTRIBUTE_UNUSED,
                                          virStorageVolDefPtr vol)
{
E
Eric Blake 已提交
548
    virStorageBackendRBDState ptr;
549 550 551 552
    ptr.cluster = NULL;
    ptr.ioctx = NULL;
    int ret = -1;

553
    if (virStorageBackendRBDOpenRADOSConn(&ptr, conn, &pool->def->source) < 0)
554 555
        goto cleanup;

556
    if (virStorageBackendRBDOpenIoCTX(&ptr, pool) < 0)
557 558
        goto cleanup;

559
    if (volStorageBackendRBDRefreshVolInfo(vol, pool, &ptr) < 0)
560 561 562 563
        goto cleanup;

    ret = 0;

564
 cleanup:
E
Eric Blake 已提交
565
    virStorageBackendRBDCloseRADOSConn(&ptr);
566 567 568 569 570 571 572 573 574
    return ret;
}

static int virStorageBackendRBDResizeVol(virConnectPtr conn ATTRIBUTE_UNUSED,
                                     virStoragePoolObjPtr pool ATTRIBUTE_UNUSED,
                                     virStorageVolDefPtr vol,
                                     unsigned long long capacity,
                                     unsigned int flags)
{
E
Eric Blake 已提交
575
    virStorageBackendRBDState ptr;
576 577 578 579
    ptr.cluster = NULL;
    ptr.ioctx = NULL;
    rbd_image_t image = NULL;
    int ret = -1;
580
    int r = 0;
581 582 583

    virCheckFlags(0, -1);

584
    if (virStorageBackendRBDOpenRADOSConn(&ptr, conn, &pool->def->source) < 0)
585 586
        goto cleanup;

587
    if (virStorageBackendRBDOpenIoCTX(&ptr, pool) < 0)
588 589
        goto cleanup;

590 591 592 593
    r = rbd_open(ptr.ioctx, vol->name, &image, NULL);
    if (r < 0) {
       virReportSystemError(-r, _("failed to open the RBD image '%s'"),
                            vol->name);
594 595 596
       goto cleanup;
    }

597 598 599 600
    r = rbd_resize(image, capacity);
    if (r < 0) {
        virReportSystemError(-r, _("failed to resize the RBD image '%s'"),
                             vol->name);
601 602 603 604 605
        goto cleanup;
    }

    ret = 0;

606
 cleanup:
607 608
    if (image != NULL)
       rbd_close(image);
E
Eric Blake 已提交
609
    virStorageBackendRBDCloseRADOSConn(&ptr);
610 611 612 613 614 615 616 617
    return ret;
}

virStorageBackend virStorageBackendRBD = {
    .type = VIR_STORAGE_POOL_RBD,

    .refreshPool = virStorageBackendRBDRefreshPool,
    .createVol = virStorageBackendRBDCreateVol,
618
    .buildVol = virStorageBackendRBDBuildVol,
619 620 621 622
    .refreshVol = virStorageBackendRBDRefreshVol,
    .deleteVol = virStorageBackendRBDDeleteVol,
    .resizeVol = virStorageBackendRBDResizeVol,
};