lock_driver_sanlock.c 35.2 KB
Newer Older
1 2 3
/*
 * lock_driver_sanlock.c: A lock driver for Sanlock
 *
4
 * Copyright (C) 2010-2014 Red Hat, Inc.
5 6 7 8 9 10 11 12 13 14 15 16
 *
 * This library is free software; you can redistribute it and/or
 * modify it under the terms of the GNU Lesser General Public
 * License as published by the Free Software Foundation; either
 * version 2.1 of the License, or (at your option) any later version.
 *
 * This library is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * Lesser General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public
17
 * License along with this library.  If not, see
O
Osier Yang 已提交
18
 * <http://www.gnu.org/licenses/>.
19 20 21 22 23 24 25 26 27 28 29 30
 *
 */

#include <config.h>

#include <stdlib.h>
#include <stdint.h>
#include <unistd.h>
#include <string.h>
#include <stdio.h>
#include <errno.h>
#include <sys/types.h>
31 32
#include <sys/stat.h>
#include <fcntl.h>
33 34 35

#include <sanlock.h>
#include <sanlock_resource.h>
36
#include <sanlock_admin.h>
37

38
#include "dirname.h"
39
#include "lock_driver.h"
40
#include "virlog.h"
41
#include "virerror.h"
42
#include "viralloc.h"
43
#include "vircrypto.h"
E
Eric Blake 已提交
44
#include "virfile.h"
45
#include "virconf.h"
46
#include "virstring.h"
47 48

#include "configmake.h"
49 50 51

#define VIR_FROM_THIS VIR_FROM_LOCKING

52 53
VIR_LOG_INIT("locking.lock_driver_sanlock");

54
#define VIR_LOCK_MANAGER_SANLOCK_AUTO_DISK_LOCKSPACE "__LIBVIRT__DISKS__"
55
#define VIR_LOCK_MANAGER_SANLOCK_KILLPATH LIBEXECDIR "/libvirt_sanlock_helper"
56

57 58 59 60 61
/*
 * temporary fix for the case where the sanlock devel package is
 * too old to provide that define, and probably the functionality too
 */
#ifndef SANLK_RES_SHARED
M
Michal Privoznik 已提交
62
# define SANLK_RES_SHARED    0x4
63 64
#endif

65 66 67
typedef struct _virLockManagerSanlockDriver virLockManagerSanlockDriver;
typedef virLockManagerSanlockDriver *virLockManagerSanlockDriverPtr;

68 69 70
typedef struct _virLockManagerSanlockPrivate virLockManagerSanlockPrivate;
typedef virLockManagerSanlockPrivate *virLockManagerSanlockPrivatePtr;

71 72
struct _virLockManagerSanlockDriver {
    bool requireLeaseForDisks;
73 74 75
    int hostID;
    bool autoDiskLease;
    char *autoDiskLeasePath;
76 77 78 79

    /* under which permissions does sanlock run */
    uid_t user;
    gid_t group;
80 81 82 83
};

static virLockManagerSanlockDriver *driver = NULL;

84
struct _virLockManagerSanlockPrivate {
85
    const char *vm_uri;
86
    char *vm_name;
87
    unsigned char vm_uuid[VIR_UUID_BUFLEN];
88 89 90 91 92 93 94 95 96 97 98
    unsigned int vm_id;
    unsigned int vm_pid;
    unsigned int flags;
    bool hasRWDisks;
    int res_count;
    struct sanlk_resource *res_args[SANLK_MAX_RESOURCES];
};

/*
 * sanlock plugin for the libvirt virLockManager API
 */
99 100 101 102
static int virLockManagerSanlockLoadConfig(const char *configFile)
{
    virConfPtr conf;
    virConfValuePtr p;
103
    char *tmp;
104 105 106 107 108 109 110 111 112 113 114 115 116 117 118

    if (access(configFile, R_OK) == -1) {
        if (errno != ENOENT) {
            virReportSystemError(errno,
                                 _("Unable to access config file %s"),
                                 configFile);
            return -1;
        }
        return 0;
    }

    if (!(conf = virConfReadFile(configFile, 0)))
        return -1;

#define CHECK_TYPE(name,typ) if (p && p->type != (typ)) {               \
119 120 121
        virReportError(VIR_ERR_INTERNAL_ERROR,                          \
                       "%s: %s: expected type " #typ,                   \
                       configFile, (name));                             \
122 123 124 125
        virConfFree(conf);                                              \
        return -1;                                                      \
    }

126 127 128 129 130 131 132 133
    p = virConfGetValue(conf, "auto_disk_leases");
    CHECK_TYPE("auto_disk_leases", VIR_CONF_LONG);
    if (p) driver->autoDiskLease = p->l;

    p = virConfGetValue(conf, "disk_lease_dir");
    CHECK_TYPE("disk_lease_dir", VIR_CONF_STRING);
    if (p && p->str) {
        VIR_FREE(driver->autoDiskLeasePath);
134
        if (VIR_STRDUP(driver->autoDiskLeasePath, p->str) < 0) {
135 136 137 138 139 140 141 142 143
            virConfFree(conf);
            return -1;
        }
    }

    p = virConfGetValue(conf, "host_id");
    CHECK_TYPE("host_id", VIR_CONF_LONG);
    if (p) driver->hostID = p->l;

144 145 146 147
    p = virConfGetValue(conf, "require_lease_for_disks");
    CHECK_TYPE("require_lease_for_disks", VIR_CONF_LONG);
    if (p)
        driver->requireLeaseForDisks = p->l;
148 149
    else
        driver->requireLeaseForDisks = !driver->autoDiskLease;
150

151 152 153
    p = virConfGetValue(conf, "user");
    CHECK_TYPE("user", VIR_CONF_STRING);
    if (p) {
154
        if (VIR_STRDUP(tmp, p->str) < 0) {
155 156 157 158 159 160 161 162 163 164 165 166
            virConfFree(conf);
            return -1;
        }

        if (virGetUserID(tmp, &driver->user) < 0) {
            VIR_FREE(tmp);
            virConfFree(conf);
            return -1;
        }
        VIR_FREE(tmp);
    }

167 168
    p = virConfGetValue(conf, "group");
    CHECK_TYPE("group", VIR_CONF_STRING);
169
    if (p) {
170
        if (VIR_STRDUP(tmp, p->str) < 0) {
171 172 173 174 175 176 177 178 179 180 181
            virConfFree(conf);
            return -1;
        }
        if (virGetGroupID(tmp, &driver->group) < 0) {
            VIR_FREE(tmp);
            virConfFree(conf);
            return -1;
        }
        VIR_FREE(tmp);
    }

182 183 184 185
    virConfFree(conf);
    return 0;
}

186 187 188 189 190
/* How much ms sleep before retrying to add a lockspace? */
#define LOCKSPACE_SLEEP 100
/* How many times try adding a lockspace? */
#define LOCKSPACE_RETRIES 10

191 192 193 194 195 196 197
static int virLockManagerSanlockSetupLockspace(void)
{
    int fd = -1;
    struct stat st;
    int rv;
    struct sanlk_lockspace ls;
    char *path = NULL;
198
    char *dir = NULL;
199
    int retries = LOCKSPACE_RETRIES;
200 201 202

    if (virAsprintf(&path, "%s/%s",
                    driver->autoDiskLeasePath,
203
                    VIR_LOCK_MANAGER_SANLOCK_AUTO_DISK_LOCKSPACE) < 0)
204 205
        goto error;

206 207 208 209 210 211 212 213
    if (!virStrcpyStatic(ls.name,
                         VIR_LOCK_MANAGER_SANLOCK_AUTO_DISK_LOCKSPACE)) {
        virReportError(VIR_ERR_INTERNAL_ERROR,
                       _("Lockspace path '%s' exceeded %d characters"),
                       VIR_LOCK_MANAGER_SANLOCK_AUTO_DISK_LOCKSPACE,
                       SANLK_PATH_LEN);
        goto error;
    }
214 215
    ls.host_id = 0; /* Doesn't matter for initialization */
    ls.flags = 0;
216
    if (!virStrcpy(ls.host_id_disk.path, path, SANLK_PATH_LEN)) {
217 218 219
        virReportError(VIR_ERR_INTERNAL_ERROR,
                       _("Lockspace path '%s' exceeded %d characters"),
                       path, SANLK_PATH_LEN);
220 221 222 223 224 225 226 227
        goto error;
    }
    ls.host_id_disk.offset = 0;

    /* Stage 1: Ensure the lockspace file exists on disk, has
     * space allocated for it and is initialized with lease
     */
    if (stat(path, &st) < 0) {
228
        int perms = 0600;
229
        VIR_DEBUG("Lockspace %s does not yet exist", path);
230 231 232 233 234 235 236 237 238 239 240 241 242

        if (!(dir = mdir_name(path))) {
            virReportOOMError();
            goto error;
        }
        if (stat(dir, &st) < 0 || !S_ISDIR(st.st_mode)) {
            virReportError(VIR_ERR_INTERNAL_ERROR,
                           _("Unable to create lockspace %s: parent directory"
                             " does not exist or is not a directory"),
                           path);
            goto error;
        }

243
        if (driver->group != (gid_t) -1)
244 245 246
            perms |= 0060;

        if ((fd = open(path, O_WRONLY|O_CREAT|O_EXCL, perms)) < 0) {
247 248 249 250 251 252 253 254
            if (errno != EEXIST) {
                virReportSystemError(errno,
                                     _("Unable to create lockspace %s"),
                                     path);
                goto error;
            }
            VIR_DEBUG("Someone else just created lockspace %s", path);
        } else {
255
            /* chown() the path to make sure sanlock can access it */
256
            if ((driver->user != (uid_t) -1 || driver->group != (gid_t) -1) &&
257 258 259 260 261 262 263 264 265
                (fchown(fd, driver->user, driver->group) < 0)) {
                virReportSystemError(errno,
                                     _("cannot chown '%s' to (%u, %u)"),
                                     path,
                                     (unsigned int) driver->user,
                                     (unsigned int) driver->group);
                goto error_unlink;
            }

266
            if ((rv = sanlock_align(&ls.host_id_disk)) < 0) {
267
                if (rv <= -200)
268 269 270
                    virReportError(VIR_ERR_INTERNAL_ERROR,
                                   _("Unable to query sector size %s: error %d"),
                                   path, rv);
271 272 273 274 275 276 277 278 279 280
                else
                    virReportSystemError(-rv,
                                         _("Unable to query sector size %s"),
                                         path);
                goto error_unlink;
            }

            /*
             * Pre allocate enough data for 1 block of leases at preferred alignment
             */
E
Eric Blake 已提交
281
            if (safezero(fd, 0, rv) < 0) {
282 283 284 285 286 287 288 289 290 291 292 293
                virReportSystemError(errno,
                                     _("Unable to allocate lockspace %s"),
                                     path);
                goto error_unlink;
            }

            if (VIR_CLOSE(fd) < 0) {
                virReportSystemError(errno,
                                     _("Unable to save lockspace %s"),
                                     path);
                goto error_unlink;
            }
294

295
            if ((rv = sanlock_init(&ls, NULL, 0, 0)) < 0) {
296
                if (rv <= -200)
297 298 299
                    virReportError(VIR_ERR_INTERNAL_ERROR,
                                   _("Unable to initialize lockspace %s: error %d"),
                                   path, rv);
300 301 302 303 304 305 306 307
                else
                    virReportSystemError(-rv,
                                         _("Unable to initialize lockspace %s"),
                                         path);
                goto error_unlink;
            }
            VIR_DEBUG("Lockspace %s has been initialized", path);
        }
308 309
    } else if (S_ISREG(st.st_mode)) {
        /* okay, the lease file exists. Check the permissions */
310 311
        if (((driver->user != (uid_t) -1 && driver->user != st.st_uid) ||
             (driver->group != (gid_t) -1 && driver->group != st.st_gid)) &&
312 313 314 315 316 317 318 319 320
            (chown(path, driver->user, driver->group) < 0)) {
            virReportSystemError(errno,
                                 _("cannot chown '%s' to (%u, %u)"),
                                 path,
                                 (unsigned int) driver->user,
                                 (unsigned int) driver->group);
            goto error;
        }

321
        if ((driver->group != (gid_t) -1 && (st.st_mode & 0060) != 0060) &&
322 323 324 325 326 327
            chmod(path, 0660) < 0) {
            virReportSystemError(errno,
                                 _("cannot chmod '%s' to 0660"),
                                 path);
            goto error;
        }
328 329 330
    }

    ls.host_id = driver->hostID;
331 332 333 334 335
    /* Stage 2: Try to register the lockspace with the daemon.  If the lockspace
     * is already registered, we should get EEXIST back in which case we can
     * just carry on with life. If EINPROGRESS is returned, we have two options:
     * either call a sanlock API that blocks us until lockspace changes state,
     * or we can fallback to polling.
336
     */
337
retry:
338
    if ((rv = sanlock_add_lockspace(&ls, 0)) < 0) {
339
        if (-rv == EINPROGRESS && --retries) {
340 341 342
#ifdef HAVE_SANLOCK_INQ_LOCKSPACE
            /* we have this function which blocks until lockspace change the
             * state. It returns 0 if lockspace has been added, -ENOENT if it
343
             * hasn't. */
344
            VIR_DEBUG("Inquiring lockspace");
345 346
            if (sanlock_inq_lockspace(&ls, SANLK_INQ_WAIT) < 0)
                VIR_DEBUG("Unable to inquire lockspace");
347 348
#else
            /* fall back to polling */
349 350
            VIR_DEBUG("Sleeping for %dms", LOCKSPACE_SLEEP);
            usleep(LOCKSPACE_SLEEP * 1000);
351
#endif
352 353
            VIR_DEBUG("Retrying to add lockspace (left %d)", retries);
            goto retry;
354
        }
355 356
        if (-rv != EEXIST) {
            if (rv <= -200)
357 358 359
                virReportError(VIR_ERR_INTERNAL_ERROR,
                               _("Unable to add lockspace %s: error %d"),
                               path, rv);
360 361 362 363
            else
                virReportSystemError(-rv,
                                     _("Unable to add lockspace %s"),
                                     path);
364
            goto error;
365 366 367 368 369 370 371
        } else {
            VIR_DEBUG("Lockspace %s is already registered", path);
        }
    } else {
        VIR_DEBUG("Lockspace %s has been registered", path);
    }

372 373
    VIR_FREE(path);
    VIR_FREE(dir);
374 375 376
    return 0;

error_unlink:
E
Eric Blake 已提交
377
    unlink(path);
378 379 380
error:
    VIR_FORCE_CLOSE(fd);
    VIR_FREE(path);
381
    VIR_FREE(dir);
382 383 384 385 386
    return -1;
}


static int virLockManagerSanlockDeinit(void);
387 388
static int virLockManagerSanlockInit(unsigned int version,
                                     const char *configFile,
389 390
                                     unsigned int flags)
{
391 392
    VIR_DEBUG("version=%u configFile=%s flags=%x",
              version, NULLSTR(configFile), flags);
393
    virCheckFlags(0, -1);
394 395 396 397

    if (driver)
        return 0;

398
    if (VIR_ALLOC(driver) < 0)
399 400 401
        return -1;

    driver->requireLeaseForDisks = true;
402 403
    driver->hostID = 0;
    driver->autoDiskLease = false;
404 405
    driver->user = (uid_t) -1;
    driver->group = (gid_t) -1;
406
    if (VIR_STRDUP(driver->autoDiskLeasePath, LOCALSTATEDIR "/lib/libvirt/sanlock") < 0) {
407 408 409
        VIR_FREE(driver);
        goto error;
    }
410 411

    if (virLockManagerSanlockLoadConfig(configFile) < 0)
412 413 414
        goto error;

    if (driver->autoDiskLease && !driver->hostID) {
415 416
        virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
                       _("Automatic disk lease mode enabled, but no host ID is set"));
417 418 419
        goto error;
    }

420 421 422 423
    if (driver->autoDiskLease) {
        if (virLockManagerSanlockSetupLockspace() < 0)
            goto error;
    }
424

425
    return 0;
426 427 428 429

error:
    virLockManagerSanlockDeinit();
    return -1;
430 431 432 433
}

static int virLockManagerSanlockDeinit(void)
{
434 435 436
    if (!driver)
        return 0;

437 438 439 440
    VIR_FREE(driver->autoDiskLeasePath);
    VIR_FREE(driver);

    return 0;
441 442
}

443

444 445 446 447 448 449 450 451
static int virLockManagerSanlockNew(virLockManagerPtr lock,
                                    unsigned int type,
                                    size_t nparams,
                                    virLockManagerParamPtr params,
                                    unsigned int flags)
{
    virLockManagerParamPtr param;
    virLockManagerSanlockPrivatePtr priv;
452
    size_t i;
453 454 455

    virCheckFlags(0, -1);

456
    if (!driver) {
457 458
        virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
                       _("Sanlock plugin is not initialized"));
459 460 461
        return -1;
    }

462
    if (type != VIR_LOCK_MANAGER_OBJECT_TYPE_DOMAIN) {
463 464
        virReportError(VIR_ERR_INTERNAL_ERROR,
                       _("Unsupported object type %d"), type);
465 466 467
        return -1;
    }

468
    if (VIR_ALLOC(priv) < 0)
469 470 471 472 473 474 475 476 477 478
        return -1;

    priv->flags = flags;

    for (i = 0; i < nparams; i++) {
        param = &params[i];

        if (STREQ(param->key, "uuid")) {
            memcpy(priv->vm_uuid, param->value.uuid, 16);
        } else if (STREQ(param->key, "name")) {
479
            if (VIR_STRDUP(priv->vm_name, param->value.str) < 0)
480 481 482 483 484
                goto error;
        } else if (STREQ(param->key, "pid")) {
            priv->vm_pid = param->value.ui;
        } else if (STREQ(param->key, "id")) {
            priv->vm_id = param->value.ui;
485 486
        } else if (STREQ(param->key, "uri")) {
            priv->vm_uri = param->value.cstr;
487 488 489 490 491 492 493 494 495 496 497 498 499 500
        }
    }

    lock->privateData = priv;
    return 0;

error:
    VIR_FREE(priv);
    return -1;
}

static void virLockManagerSanlockFree(virLockManagerPtr lock)
{
    virLockManagerSanlockPrivatePtr priv = lock->privateData;
501
    size_t i;
502 503 504 505

    if (!priv)
        return;

506
    VIR_FREE(priv->vm_name);
507 508 509 510 511 512
    for (i = 0; i < priv->res_count; i++)
        VIR_FREE(priv->res_args[i]);
    VIR_FREE(priv);
    lock->privateData = NULL;
}

513 514 515 516

static int virLockManagerSanlockAddLease(virLockManagerPtr lock,
                                         const char *name,
                                         size_t nparams,
517 518
                                         virLockManagerParamPtr params,
                                         bool shared)
519 520 521 522
{
    virLockManagerSanlockPrivatePtr priv = lock->privateData;
    int ret = -1;
    struct sanlk_resource *res = NULL;
523
    size_t i;
524

525
    if (VIR_ALLOC_VAR(res, struct sanlk_disk, 1) < 0)
526
        goto cleanup;
527

528
    res->flags = shared ? SANLK_RES_SHARED : 0;
529 530
    res->num_disks = 1;
    if (!virStrcpy(res->name, name, SANLK_NAME_LEN)) {
531 532 533
        virReportError(VIR_ERR_INTERNAL_ERROR,
                       _("Resource name '%s' exceeds %d characters"),
                       name, SANLK_NAME_LEN);
534
        goto cleanup;
535 536 537 538 539
    }

    for (i = 0; i < nparams; i++) {
        if (STREQ(params[i].key, "path")) {
            if (!virStrcpy(res->disks[0].path, params[i].value.str, SANLK_PATH_LEN)) {
540 541 542
                virReportError(VIR_ERR_INTERNAL_ERROR,
                               _("Lease path '%s' exceeds %d characters"),
                               params[i].value.str, SANLK_PATH_LEN);
543
                goto cleanup;
544 545 546 547 548
            }
        } else if (STREQ(params[i].key, "offset")) {
            res->disks[0].offset = params[i].value.ul;
        } else if (STREQ(params[i].key, "lockspace")) {
            if (!virStrcpy(res->lockspace_name, params[i].value.str, SANLK_NAME_LEN)) {
549 550 551
                virReportError(VIR_ERR_INTERNAL_ERROR,
                               _("Resource lockspace '%s' exceeds %d characters"),
                               params[i].value.str, SANLK_NAME_LEN);
552
                goto cleanup;
553 554 555 556 557 558
            }
        }
    }

    priv->res_args[priv->res_count] = res;
    priv->res_count++;
559 560 561 562 563 564 565 566 567 568 569 570 571 572 573

    ret = 0;

cleanup:
    if (ret == -1)
        VIR_FREE(res);
    return ret;
}




static int virLockManagerSanlockAddDisk(virLockManagerPtr lock,
                                        const char *name,
                                        size_t nparams,
574 575
                                        virLockManagerParamPtr params ATTRIBUTE_UNUSED,
                                        bool shared)
576 577 578 579 580
{
    virLockManagerSanlockPrivatePtr priv = lock->privateData;
    int ret = -1;
    struct sanlk_resource *res = NULL;
    char *path = NULL;
581
    char *hash = NULL;
582 583

    if (nparams) {
584 585
        virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
                       _("Unexpected lock parameters for disk resource"));
586 587 588
        return -1;
    }

589
    if (VIR_ALLOC_VAR(res, struct sanlk_disk, 1) < 0)
590 591
        goto cleanup;

592
    res->flags = shared ? SANLK_RES_SHARED : 0;
593
    res->num_disks = 1;
594 595 596 597 598 599
    if (virCryptoHashString(VIR_CRYPTO_HASH_MD5, name, &hash) < 0)
        goto cleanup;
    if (!virStrcpy(res->name, hash, SANLK_NAME_LEN)) {
        virReportError(VIR_ERR_INTERNAL_ERROR,
                       _("MD5 hash '%s' unexpectedly larger than %d characters"),
                       hash, (SANLK_NAME_LEN - 1));
600
        goto cleanup;
601
    }
602 603

    if (virAsprintf(&path, "%s/%s",
604
                    driver->autoDiskLeasePath, res->name) < 0)
605 606
        goto cleanup;
    if (!virStrcpy(res->disks[0].path, path, SANLK_PATH_LEN)) {
607 608 609
        virReportError(VIR_ERR_INTERNAL_ERROR,
                       _("Lease path '%s' exceeds %d characters"),
                       path, SANLK_PATH_LEN);
610 611 612 613 614 615
        goto cleanup;
    }

    if (!virStrcpy(res->lockspace_name,
                   VIR_LOCK_MANAGER_SANLOCK_AUTO_DISK_LOCKSPACE,
                   SANLK_NAME_LEN)) {
616 617 618
        virReportError(VIR_ERR_INTERNAL_ERROR,
                       _("Resource lockspace '%s' exceeds %d characters"),
                       VIR_LOCK_MANAGER_SANLOCK_AUTO_DISK_LOCKSPACE, SANLK_NAME_LEN);
619 620 621 622 623 624 625 626 627 628 629 630
        goto cleanup;
    }

    priv->res_args[priv->res_count] = res;
    priv->res_count++;

    ret = 0;

cleanup:
    if (ret == -1)
        VIR_FREE(res);
    VIR_FREE(path);
631
    VIR_FREE(hash);
632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652
    return ret;
}


static int virLockManagerSanlockCreateLease(struct sanlk_resource *res)
{
    int fd = -1;
    struct stat st;
    int rv;

    if (stat(res->disks[0].path, &st) < 0) {
        VIR_DEBUG("Lockspace %s does not yet exist", res->disks[0].path);
        if ((fd = open(res->disks[0].path, O_WRONLY|O_CREAT|O_EXCL, 0600)) < 0) {
            if (errno != EEXIST) {
                virReportSystemError(errno,
                                     _("Unable to create lockspace %s"),
                                     res->disks[0].path);
                return -1;
            }
            VIR_DEBUG("Someone else just created lockspace %s", res->disks[0].path);
        } else {
653
            /* chown() the path to make sure sanlock can access it */
654
            if ((driver->user != (uid_t) -1 || driver->group != (gid_t) -1) &&
655 656 657 658 659 660 661 662 663
                (fchown(fd, driver->user, driver->group) < 0)) {
                virReportSystemError(errno,
                                     _("cannot chown '%s' to (%u, %u)"),
                                     res->disks[0].path,
                                     (unsigned int) driver->user,
                                     (unsigned int) driver->group);
                goto error_unlink;
            }

664
            if ((rv = sanlock_align(&res->disks[0])) < 0) {
665
                if (rv <= -200)
666 667 668
                    virReportError(VIR_ERR_INTERNAL_ERROR,
                                   _("Unable to query sector size %s: error %d"),
                                   res->disks[0].path, rv);
669 670 671 672 673 674 675 676 677 678
                else
                    virReportSystemError(-rv,
                                         _("Unable to query sector size %s"),
                                         res->disks[0].path);
                goto error_unlink;
            }

            /*
             * Pre allocate enough data for 1 block of leases at preferred alignment
             */
E
Eric Blake 已提交
679
            if (safezero(fd, 0, rv) < 0) {
680 681 682 683 684 685 686 687 688 689 690 691 692
                virReportSystemError(errno,
                                     _("Unable to allocate lease %s"),
                                     res->disks[0].path);
                goto error_unlink;
            }

            if (VIR_CLOSE(fd) < 0) {
                virReportSystemError(errno,
                                     _("Unable to save lease %s"),
                                     res->disks[0].path);
                goto error_unlink;
            }

693
            if ((rv = sanlock_init(NULL, res, 0, 0)) < 0) {
694
                if (rv <= -200)
695 696 697
                    virReportError(VIR_ERR_INTERNAL_ERROR,
                                   _("Unable to initialize lease %s: error %d"),
                                   res->disks[0].path, rv);
698 699 700 701 702 703 704 705 706 707
                else
                    virReportSystemError(-rv,
                                         _("Unable to initialize lease %s"),
                                         res->disks[0].path);
                goto error_unlink;
            }
            VIR_DEBUG("Lease %s has been initialized", res->disks[0].path);
        }
    }

708 709
    return 0;

710 711 712
error_unlink:
    unlink(res->disks[0].path);
    VIR_FORCE_CLOSE(fd);
713 714 715
    return -1;
}

716 717 718 719 720 721 722 723 724 725 726 727 728 729

static int virLockManagerSanlockAddResource(virLockManagerPtr lock,
                                            unsigned int type,
                                            const char *name,
                                            size_t nparams,
                                            virLockManagerParamPtr params,
                                            unsigned int flags)
{
    virLockManagerSanlockPrivatePtr priv = lock->privateData;

    virCheckFlags(VIR_LOCK_MANAGER_RESOURCE_READONLY |
                  VIR_LOCK_MANAGER_RESOURCE_SHARED, -1);

    if (priv->res_count == SANLK_MAX_RESOURCES) {
730 731 732
        virReportError(VIR_ERR_INTERNAL_ERROR,
                       _("Too many resources %d for object"),
                       SANLK_MAX_RESOURCES);
733 734 735
        return -1;
    }

736 737 738
    /* Treat R/O resources as a no-op lock request */
    if (flags & VIR_LOCK_MANAGER_RESOURCE_READONLY)
        return 0;
739 740 741 742

    switch (type) {
    case VIR_LOCK_MANAGER_RESOURCE_TYPE_DISK:
        if (driver->autoDiskLease) {
743 744
            if (virLockManagerSanlockAddDisk(lock, name, nparams, params,
                                             !!(flags & VIR_LOCK_MANAGER_RESOURCE_SHARED)) < 0)
745 746 747 748 749 750 751 752 753 754 755 756 757
                return -1;

            if (virLockManagerSanlockCreateLease(priv->res_args[priv->res_count-1]) < 0)
                return -1;
        } else {
            if (!(flags & (VIR_LOCK_MANAGER_RESOURCE_SHARED |
                           VIR_LOCK_MANAGER_RESOURCE_READONLY)))
                priv->hasRWDisks = true;
            /* Ignore disk resources without error */
        }
        break;

    case VIR_LOCK_MANAGER_RESOURCE_TYPE_LEASE:
758 759
        if (virLockManagerSanlockAddLease(lock, name, nparams, params,
                                          !!(flags & VIR_LOCK_MANAGER_RESOURCE_SHARED)) < 0)
760 761 762 763 764 765 766 767 768 769 770
            return -1;
        break;

    default:
        /* Ignore other resources, without error */
        break;
    }

    return 0;
}

771
#if HAVE_SANLOCK_KILLPATH
772 773 774 775 776 777 778 779 780 781 782 783
static int
virLockManagerSanlockRegisterKillscript(int sock,
                                        const char *vmuri,
                                        const char *uuidstr,
                                        virDomainLockFailureAction action)
{
    virBuffer buf = VIR_BUFFER_INITIALIZER;
    char *path;
    char *args = NULL;
    int ret = -1;
    int rv;

784 785 786 787 788 789 790 791
    switch (action) {
    case VIR_DOMAIN_LOCK_FAILURE_DEFAULT:
        return 0;

    case VIR_DOMAIN_LOCK_FAILURE_POWEROFF:
    case VIR_DOMAIN_LOCK_FAILURE_PAUSE:
        break;

792
    case VIR_DOMAIN_LOCK_FAILURE_RESTART:
793 794
    case VIR_DOMAIN_LOCK_FAILURE_IGNORE:
    case VIR_DOMAIN_LOCK_FAILURE_LAST:
795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856
        virReportError(VIR_ERR_CONFIG_UNSUPPORTED,
                       _("Failure action %s is not supported by sanlock"),
                       virDomainLockFailureTypeToString(action));
        goto cleanup;
    }

    virBufferEscape(&buf, '\\', "\\ ", "%s", vmuri);
    virBufferAddLit(&buf, " ");
    virBufferEscape(&buf, '\\', "\\ ", "%s", uuidstr);
    virBufferAddLit(&buf, " ");
    virBufferEscape(&buf, '\\', "\\ ", "%s",
                    virDomainLockFailureTypeToString(action));

    if (virBufferError(&buf)) {
        virBufferFreeAndReset(&buf);
        virReportOOMError();
        goto cleanup;
    }

    /* Unfortunately, sanlock_killpath() does not use const for either
     * path or args even though it will just copy them into its own
     * buffers.
     */
    path = (char *) VIR_LOCK_MANAGER_SANLOCK_KILLPATH;
    args = virBufferContentAndReset(&buf);

    VIR_DEBUG("Register sanlock killpath: %s %s", path, args);

    /* sanlock_killpath() would just crop the strings */
    if (strlen(path) >= SANLK_HELPER_PATH_LEN) {
        virReportError(VIR_ERR_INTERNAL_ERROR,
                       _("Sanlock helper path is longer than %d: '%s'"),
                       SANLK_HELPER_PATH_LEN - 1, path);
        goto cleanup;
    }
    if (strlen(args) >= SANLK_HELPER_ARGS_LEN) {
        virReportError(VIR_ERR_INTERNAL_ERROR,
                       _("Sanlock helper arguments are longer than %d:"
                         " '%s'"),
                       SANLK_HELPER_ARGS_LEN - 1, args);
        goto cleanup;
    }

    if ((rv = sanlock_killpath(sock, 0, path, args)) < 0) {
        if (rv <= -200) {
            virReportError(VIR_ERR_INTERNAL_ERROR,
                           _("Failed to register lock failure action:"
                             " error %d"), rv);
        } else {
            virReportSystemError(-rv, "%s",
                                 _("Failed to register lock failure"
                                   " action"));
        }
        goto cleanup;
    }

    ret = 0;

cleanup:
    VIR_FREE(args);
    return ret;
}
857 858 859 860 861 862 863 864 865 866 867 868
#else
static int
virLockManagerSanlockRegisterKillscript(int sock ATTRIBUTE_UNUSED,
                                        const char *vmuri ATTRIBUTE_UNUSED,
                                        const char *uuidstr ATTRIBUTE_UNUSED,
                                        virDomainLockFailureAction action ATTRIBUTE_UNUSED)
{
    virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s",
                   _("sanlock is too old to support lock failure action"));
    return -1;
}
#endif
869

870 871
static int virLockManagerSanlockAcquire(virLockManagerPtr lock,
                                        const char *state,
872
                                        unsigned int flags,
873
                                        virDomainLockFailureAction action,
874
                                        int *fd)
875 876 877 878 879 880 881 882
{
    virLockManagerSanlockPrivatePtr priv = lock->privateData;
    struct sanlk_options *opt;
    struct sanlk_resource **res_args;
    int res_count;
    bool res_free = false;
    int sock = -1;
    int rv;
883
    size_t i;
884

885 886
    virCheckFlags(VIR_LOCK_MANAGER_ACQUIRE_RESTRICT |
                  VIR_LOCK_MANAGER_ACQUIRE_REGISTER_ONLY, -1);
887 888

    if (priv->res_count == 0 &&
889 890
        priv->hasRWDisks &&
        driver->requireLeaseForDisks) {
891 892
        virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s",
                       _("Read/write, exclusive access, disks were present, but no leases specified"));
893 894 895
        return -1;
    }

896
    if (VIR_ALLOC(opt) < 0)
897 898
        return -1;

899 900 901 902
    /* sanlock doesn't use owner_name for anything, so it's safe to take just
     * the first SANLK_NAME_LEN - 1 characters from vm_name */
    ignore_value(virStrncpy(opt->owner_name, priv->vm_name,
                            SANLK_NAME_LEN - 1, SANLK_NAME_LEN));
903

904
    if (state && STRNEQ(state, "")) {
905 906 907 908
        if ((rv = sanlock_state_to_args((char *)state,
                                        &res_count,
                                        &res_args)) < 0) {
            if (rv <= -200)
909 910 911
                virReportError(VIR_ERR_INTERNAL_ERROR,
                               _("Unable to parse lock state %s: error %d"),
                               state, rv);
912 913 914 915 916 917 918 919 920 921 922 923 924 925 926
            else
                virReportSystemError(-rv,
                                     _("Unable to parse lock state %s"),
                                     state);
            goto error;
        }
        res_free = true;
    } else {
        res_args = priv->res_args;
        res_count = priv->res_count;
    }

    /* We only initialize 'sock' if we are in the real
     * child process and we need it to be inherited
     *
927
     * If sock == -1, then sanlock auto-open/closes a
928 929
     * temporary sock
     */
930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949
    if (priv->vm_pid == getpid()) {
        VIR_DEBUG("Register sanlock %d", flags);
        if ((sock = sanlock_register()) < 0) {
            if (sock <= -200)
                virReportError(VIR_ERR_INTERNAL_ERROR,
                               _("Failed to open socket to sanlock daemon: error %d"),
                               sock);
            else
                virReportSystemError(-sock, "%s",
                                     _("Failed to open socket to sanlock daemon"));
            goto error;
        }

        if (action != VIR_DOMAIN_LOCK_FAILURE_DEFAULT) {
            char uuidstr[VIR_UUID_STRING_BUFLEN];
            virUUIDFormat(priv->vm_uuid, uuidstr);
            if (virLockManagerSanlockRegisterKillscript(sock, priv->vm_uri,
                                                        uuidstr, action) < 0)
                goto error;
        }
950 951 952 953 954 955 956 957
    }

    if (!(flags & VIR_LOCK_MANAGER_ACQUIRE_REGISTER_ONLY)) {
        VIR_DEBUG("Acquiring object %u", priv->res_count);
        if ((rv = sanlock_acquire(sock, priv->vm_pid, 0,
                                  priv->res_count, priv->res_args,
                                  opt)) < 0) {
            if (rv <= -200)
958 959
                virReportError(VIR_ERR_INTERNAL_ERROR,
                               _("Failed to acquire lock: error %d"), rv);
960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978
            else
                virReportSystemError(-rv, "%s",
                                     _("Failed to acquire lock"));
            goto error;
        }
    }

    VIR_FREE(opt);

    /*
     * We are *intentionally* "leaking" sock file descriptor
     * because we want it to be inherited by QEMU. When the
     * sock FD finally closes upon QEMU exit (or crash) then
     * sanlock will notice EOF and release the lock
     */
    if (sock != -1 &&
        virSetInherit(sock, true) < 0)
        goto error;

979 980 981
    if (flags & VIR_LOCK_MANAGER_ACQUIRE_RESTRICT) {
        if ((rv = sanlock_restrict(sock, SANLK_RESTRICT_ALL)) < 0) {
            if (rv <= -200)
982 983
                virReportError(VIR_ERR_INTERNAL_ERROR,
                               _("Failed to restrict process: error %d"), rv);
984 985 986 987 988 989 990
            else
                virReportSystemError(-rv, "%s",
                                     _("Failed to restrict process"));
            goto error;
        }
    }

991 992 993
    VIR_DEBUG("Acquire completed fd=%d", sock);

    if (res_free) {
994
        for (i = 0; i < res_count; i++) {
995 996 997 998 999
            VIR_FREE(res_args[i]);
        }
        VIR_FREE(res_args);
    }

1000 1001 1002
    if (fd)
        *fd = sock;

1003 1004 1005 1006
    return 0;

error:
    if (res_free) {
1007
        for (i = 0; i < res_count; i++) {
1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022
            VIR_FREE(res_args[i]);
        }
        VIR_FREE(res_args);
    }
    VIR_FREE(opt);
    VIR_FORCE_CLOSE(sock);
    return -1;
}


static int virLockManagerSanlockRelease(virLockManagerPtr lock,
                                        char **state,
                                        unsigned int flags)
{
    virLockManagerSanlockPrivatePtr priv = lock->privateData;
1023
    int res_count = priv->res_count;
1024 1025 1026 1027
    int rv;

    virCheckFlags(0, -1);

1028 1029 1030
    if (state) {
        if ((rv = sanlock_inquire(-1, priv->vm_pid, 0, &res_count, state)) < 0) {
            if (rv <= -200)
1031 1032
                virReportError(VIR_ERR_INTERNAL_ERROR,
                               _("Failed to inquire lock: error %d"), rv);
1033 1034 1035 1036 1037
            else
                virReportSystemError(-rv, "%s",
                                     _("Failed to inquire lock"));
            return -1;
        }
1038

1039
        if (STREQ_NULLABLE(*state, ""))
1040 1041
            VIR_FREE(*state);
    }
1042

1043 1044
    if ((rv = sanlock_release(-1, priv->vm_pid, 0, res_count,
                              priv->res_args)) < 0) {
1045
        if (rv <= -200)
1046 1047
            virReportError(VIR_ERR_INTERNAL_ERROR,
                           _("Failed to release lock: error %d"), rv);
1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065
        else
            virReportSystemError(-rv, "%s",
                                 _("Failed to release lock"));
        return -1;
    }

    return 0;
}

static int virLockManagerSanlockInquire(virLockManagerPtr lock,
                                        char **state,
                                        unsigned int flags)
{
    virLockManagerSanlockPrivatePtr priv = lock->privateData;
    int rv, res_count;

    virCheckFlags(0, -1);

1066
    if (!state) {
1067
        virReportError(VIR_ERR_INVALID_ARG, __FUNCTION__);
1068 1069 1070
        return -1;
    }

1071 1072 1073 1074
    VIR_DEBUG("pid=%d", priv->vm_pid);

    if ((rv = sanlock_inquire(-1, priv->vm_pid, 0, &res_count, state)) < 0) {
        if (rv <= -200)
1075 1076
            virReportError(VIR_ERR_INTERNAL_ERROR,
                           _("Failed to inquire lock: error %d"), rv);
1077 1078 1079 1080 1081 1082
        else
            virReportSystemError(-rv, "%s",
                                 _("Failed to inquire lock"));
        return -1;
    }

1083
    if (STREQ_NULLABLE(*state, ""))
1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106
        VIR_FREE(*state);

    return 0;
}

virLockDriver virLockDriverImpl =
{
    .version = VIR_LOCK_MANAGER_VERSION,

    .flags = VIR_LOCK_MANAGER_USES_STATE,

    .drvInit = virLockManagerSanlockInit,
    .drvDeinit = virLockManagerSanlockDeinit,

    .drvNew = virLockManagerSanlockNew,
    .drvFree = virLockManagerSanlockFree,

    .drvAddResource = virLockManagerSanlockAddResource,

    .drvAcquire = virLockManagerSanlockAcquire,
    .drvRelease = virLockManagerSanlockRelease,
    .drvInquire = virLockManagerSanlockInquire,
};