lxc_controller.c 74.6 KB
Newer Older
1
/*
2
 * Copyright (C) 2010-2014 Red Hat, Inc.
3
 * Copyright IBM Corp. 2008
4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20
 *
 * lxc_controller.c: linux container process controller
 *
 * Authors:
 *  David L. Leskovec <dlesko at linux.vnet.ibm.com>
 *
 * This library is free software; you can redistribute it and/or
 * modify it under the terms of the GNU Lesser General Public
 * License as published by the Free Software Foundation; either
 * version 2.1 of the License, or (at your option) any later version.
 *
 * This library is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * Lesser General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public
21
 * License along with this library.  If not, see
O
Osier Yang 已提交
22
 * <http://www.gnu.org/licenses/>.
23 24 25 26 27
 */

#include <config.h>

#include <sys/epoll.h>
28 29
#include <sys/wait.h>
#include <sys/socket.h>
30 31
#include <sys/types.h>
#include <sys/un.h>
32
#include <sys/personality.h>
33
#include <unistd.h>
34
#include <paths.h>
35
#include <errno.h>
36 37
#include <fcntl.h>
#include <signal.h>
38
#include <getopt.h>
39
#include <sys/mount.h>
E
Eric Blake 已提交
40
#include <locale.h>
41 42
#include <grp.h>
#include <sys/stat.h>
43
#include <time.h>
44

45
#if WITH_CAPNG
46
# include <cap-ng.h>
D
Daniel P. Berrange 已提交
47 48
#endif

49
#include "virerror.h"
50
#include "virlog.h"
51 52

#include "lxc_conf.h"
53
#include "lxc_container.h"
54
#include "lxc_cgroup.h"
55
#include "lxc_monitor_protocol.h"
G
Gao feng 已提交
56
#include "lxc_fuse.h"
57 58
#include "virnetdev.h"
#include "virnetdevveth.h"
59
#include "viralloc.h"
E
Eric Blake 已提交
60
#include "virfile.h"
61
#include "virpidfile.h"
62
#include "vircommand.h"
63
#include "nodeinfo.h"
64
#include "virrandom.h"
65
#include "virprocess.h"
66
#include "virnuma.h"
67
#include "virdbus.h"
68
#include "rpc/virnetserver.h"
69
#include "virstring.h"
70

71 72
#define VIR_FROM_THIS VIR_FROM_LXC

73 74
VIR_LOG_INIT("lxc.lxc_controller");

75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94
typedef struct _virLXCControllerConsole virLXCControllerConsole;
typedef virLXCControllerConsole *virLXCControllerConsolePtr;
struct _virLXCControllerConsole {
    int hostWatch;
    int hostFd;  /* PTY FD in the host OS */
    bool hostClosed;
    int hostEpoll;

    int contWatch;
    int contFd;  /* PTY FD in the container */
    bool contClosed;
    int contEpoll;

    int epollWatch;
    int epollFd; /* epoll FD for dealing with EOF */

    size_t fromHostLen;
    char fromHostBuf[1024];
    size_t fromContLen;
    char fromContBuf[1024];
95 96

    virNetServerPtr server;
97 98
};

99 100 101 102
typedef struct _virLXCController virLXCController;
typedef virLXCController *virLXCControllerPtr;
struct _virLXCController {
    char *name;
103
    virDomainObjPtr vm;
104
    virDomainDefPtr def;
105

106 107
    int handshakeFd;

108 109
    pid_t initpid;

110 111
    size_t nveths;
    char **veths;
112

113 114 115
    size_t nnicindexes;
    int *nicindexes;

116 117 118
    size_t npassFDs;
    int *passFDs;

119 120
    size_t nconsoles;
    virLXCControllerConsolePtr consoles;
121
    char *devptmx;
122 123 124

    size_t nloopDevs;
    int *loopDevFds;
125 126

    virSecurityManagerPtr securityManager;
127

128 129
    /* Server socket */
    virNetServerPtr server;
130
    bool firstClient;
131 132 133 134
    virNetServerClientPtr client;
    virNetServerProgramPtr prog;
    bool inShutdown;
    int timerShutdown;
G
Gao feng 已提交
135

136 137
    virCgroupPtr cgroup;

G
Gao feng 已提交
138
    virLXCFusePtr fuse;
139 140
};

141 142
#include "lxc_controller_dispatch.h"

143
static void virLXCControllerFree(virLXCControllerPtr ctrl);
144 145
static int virLXCControllerEventSendInit(virLXCControllerPtr ctrl,
                                         pid_t initpid);
146

147 148 149 150 151 152 153 154 155
static void virLXCControllerQuitTimer(int timer ATTRIBUTE_UNUSED, void *opaque)
{
    virLXCControllerPtr ctrl = opaque;

    VIR_DEBUG("Triggering event loop quit");
    virNetServerQuit(ctrl->server);
}


156 157 158 159
static virLXCControllerPtr virLXCControllerNew(const char *name)
{
    virLXCControllerPtr ctrl = NULL;
    virCapsPtr caps = NULL;
160
    virDomainXMLOptionPtr xmlopt = NULL;
161 162
    char *configFile = NULL;

163
    if (VIR_ALLOC(ctrl) < 0)
164
        goto error;
165

166
    ctrl->timerShutdown = -1;
167
    ctrl->firstClient = true;
168

169 170
    if (VIR_STRDUP(ctrl->name, name) < 0)
        goto error;
171

172
    if (!(caps = virLXCDriverCapsInit(NULL)))
173 174
        goto error;

175
    if (!(xmlopt = lxcDomainXMLConfInit()))
176 177
        goto error;

178 179 180 181
    if ((configFile = virDomainConfigFile(LXC_STATE_DIR,
                                          ctrl->name)) == NULL)
        goto error;

182 183 184
    if ((ctrl->vm = virDomainObjParseFile(configFile,
                                          caps, xmlopt,
                                          0)) == NULL)
185
        goto error;
186
    ctrl->def = ctrl->vm->def;
187

188 189 190 191 192
    if ((ctrl->timerShutdown = virEventAddTimeout(-1,
                                                  virLXCControllerQuitTimer, ctrl,
                                                  NULL)) < 0)
        goto error;

193
 cleanup:
194
    VIR_FREE(configFile);
195
    virObjectUnref(caps);
196
    virObjectUnref(xmlopt);
197 198
    return ctrl;

199
 error:
200 201 202 203 204
    virLXCControllerFree(ctrl);
    ctrl = NULL;
    goto cleanup;
}

205

206
static int virLXCControllerCloseLoopDevices(virLXCControllerPtr ctrl)
207 208 209
{
    size_t i;

210
    for (i = 0; i < ctrl->nloopDevs; i++)
211
        VIR_FORCE_CLOSE(ctrl->loopDevFds[i]);
212 213 214 215 216

    return 0;
}


217 218 219 220 221
static void virLXCControllerStopInit(virLXCControllerPtr ctrl)
{
    if (ctrl->initpid == 0)
        return;

222
    virLXCControllerCloseLoopDevices(ctrl);
223
    virProcessAbort(ctrl->initpid);
224 225 226 227
    ctrl->initpid = 0;
}


228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243
static void virLXCControllerConsoleClose(virLXCControllerConsolePtr console)
{
    if (console->hostWatch != -1)
        virEventRemoveHandle(console->hostWatch);
    VIR_FORCE_CLOSE(console->hostFd);

    if (console->contWatch != -1)
        virEventRemoveHandle(console->contWatch);
    VIR_FORCE_CLOSE(console->contFd);

    if (console->epollWatch != -1)
        virEventRemoveHandle(console->epollWatch);
    VIR_FORCE_CLOSE(console->epollFd);
}


G
Gao feng 已提交
244 245 246 247 248 249 250
static void
virLXCControllerFreeFuse(virLXCControllerPtr ctrl)
{
    return lxcFreeFuse(&ctrl->fuse);
}


251 252
static void virLXCControllerFree(virLXCControllerPtr ctrl)
{
253 254
    size_t i;

255 256 257
    if (!ctrl)
        return;

258 259
    virLXCControllerStopInit(ctrl);

260
    virObjectUnref(ctrl->securityManager);
261

262
    for (i = 0; i < ctrl->nveths; i++)
263 264
        VIR_FREE(ctrl->veths[i]);
    VIR_FREE(ctrl->veths);
265
    VIR_FREE(ctrl->nicindexes);
266

267 268 269 270
    for (i = 0; i < ctrl->npassFDs; i++)
        VIR_FORCE_CLOSE(ctrl->passFDs[i]);
    VIR_FREE(ctrl->passFDs);

271
    for (i = 0; i < ctrl->nconsoles; i++)
272 273 274
        virLXCControllerConsoleClose(&(ctrl->consoles[i]));
    VIR_FREE(ctrl->consoles);

275 276
    VIR_FREE(ctrl->devptmx);

277
    virObjectUnref(ctrl->vm);
278 279
    VIR_FREE(ctrl->name);

280 281 282
    if (ctrl->timerShutdown != -1)
        virEventRemoveTimeout(ctrl->timerShutdown);

283
    virObjectUnref(ctrl->server);
G
Gao feng 已提交
284
    virLXCControllerFreeFuse(ctrl);
285

286 287
    virCgroupFree(&ctrl->cgroup);

288 289
    /* This must always be the last thing to be closed */
    VIR_FORCE_CLOSE(ctrl->handshakeFd);
290 291 292
    VIR_FREE(ctrl);
}

293

294 295 296
static int virLXCControllerAddConsole(virLXCControllerPtr ctrl,
                                      int hostFd)
{
297
    if (VIR_EXPAND_N(ctrl->consoles, ctrl->nconsoles, 1) < 0)
298
        return -1;
299
    ctrl->consoles[ctrl->nconsoles-1].server = ctrl->server;
300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324
    ctrl->consoles[ctrl->nconsoles-1].hostFd = hostFd;
    ctrl->consoles[ctrl->nconsoles-1].hostWatch = -1;

    ctrl->consoles[ctrl->nconsoles-1].contFd = -1;
    ctrl->consoles[ctrl->nconsoles-1].contWatch = -1;

    ctrl->consoles[ctrl->nconsoles-1].epollFd = -1;
    ctrl->consoles[ctrl->nconsoles-1].epollWatch = -1;
    return 0;
}


static int virLXCControllerConsoleSetNonblocking(virLXCControllerConsolePtr console)
{
    if (virSetBlocking(console->hostFd, false) < 0 ||
        virSetBlocking(console->contFd, false) < 0) {
        virReportSystemError(errno, "%s",
                             _("Unable to set console file descriptor non-blocking"));
        return -1;
    }

    return 0;
}


325 326 327 328 329 330 331 332 333 334 335 336
static int virLXCControllerDaemonHandshake(virLXCControllerPtr ctrl)
{
    if (lxcContainerSendContinue(ctrl->handshakeFd) < 0) {
        virReportSystemError(errno, "%s",
                             _("error sending continue signal to daemon"));
        return -1;
    }
    VIR_FORCE_CLOSE(ctrl->handshakeFd);
    return 0;
}


337 338 339
static int virLXCControllerValidateNICs(virLXCControllerPtr ctrl)
{
    if (ctrl->def->nnets != ctrl->nveths) {
340
        virReportError(VIR_ERR_INTERNAL_ERROR,
341
                       _("expecting %zu veths, but got %zu"),
342
                       ctrl->def->nnets, ctrl->nveths);
343 344 345 346 347 348 349
        return -1;
    }

    return 0;
}


350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394
static int virLXCControllerGetNICIndexes(virLXCControllerPtr ctrl)
{
    size_t i;
    int ret = -1;

    VIR_DEBUG("Getting nic indexes");
    for (i = 0; i < ctrl->def->nnets; i++) {
        int nicindex = -1;
        switch (ctrl->def->nets[i]->type) {
        case VIR_DOMAIN_NET_TYPE_BRIDGE:
        case VIR_DOMAIN_NET_TYPE_NETWORK:
            if (ctrl->def->nets[i]->ifname == NULL)
                continue;
            if (virNetDevGetIndex(ctrl->def->nets[i]->ifname,
                                  &nicindex) < 0)
                goto cleanup;
            if (VIR_EXPAND_N(ctrl->nicindexes,
                             ctrl->nnicindexes,
                             1) < 0)
                goto cleanup;
            VIR_DEBUG("Index %d for %s", nicindex,
                      ctrl->def->nets[i]->ifname);
            ctrl->nicindexes[ctrl->nnicindexes-1] = nicindex;
            break;

        case VIR_DOMAIN_NET_TYPE_USER:
        case VIR_DOMAIN_NET_TYPE_ETHERNET:
        case VIR_DOMAIN_NET_TYPE_VHOSTUSER:
        case VIR_DOMAIN_NET_TYPE_SERVER:
        case VIR_DOMAIN_NET_TYPE_CLIENT:
        case VIR_DOMAIN_NET_TYPE_MCAST:
        case VIR_DOMAIN_NET_TYPE_INTERNAL:
        case VIR_DOMAIN_NET_TYPE_DIRECT:
        case VIR_DOMAIN_NET_TYPE_HOSTDEV:
        default:
            break;
        }
    }

    ret = 0;
 cleanup:
    return ret;
}


395 396 397
static int virLXCControllerValidateConsoles(virLXCControllerPtr ctrl)
{
    if (ctrl->def->nconsoles != ctrl->nconsoles) {
398
        virReportError(VIR_ERR_INTERNAL_ERROR,
399
                       _("expecting %zu consoles, but got %zu tty file handlers"),
400
                       ctrl->def->nconsoles, ctrl->nconsoles);
401 402 403 404 405 406 407
        return -1;
    }

    return 0;
}


408
static int virLXCControllerSetupLoopDeviceFS(virDomainFSDefPtr fs)
409
{
410
    int lofd;
411 412
    char *loname = NULL;

413
    if ((lofd = virFileLoopDeviceAssociate(fs->src, &loname)) < 0)
414 415
        return -1;

416 417
    VIR_DEBUG("Changing fs %s to use type=block for dev %s",
              fs->src, loname);
418 419 420 421 422 423 424 425 426 427 428 429 430
    /*
     * We now change it into a block device type, so that
     * the rest of container setup 'just works'
     */
    fs->type = VIR_DOMAIN_FS_TYPE_BLOCK;
    VIR_FREE(fs->src);
    fs->src = loname;
    loname = NULL;

    return lofd;
}


431 432 433 434
static int virLXCControllerSetupLoopDeviceDisk(virDomainDiskDefPtr disk)
{
    int lofd;
    char *loname = NULL;
435
    const char *src = virDomainDiskGetSource(disk);
J
John Ferlan 已提交
436
    int ret = -1;
437

438
    if ((lofd = virFileLoopDeviceAssociate(src, &loname)) < 0)
439 440
        return -1;

441
    VIR_DEBUG("Changing disk %s to use type=block for dev %s",
442
              src, loname);
443

444 445 446 447
    /*
     * We now change it into a block device type, so that
     * the rest of container setup 'just works'
     */
E
Eric Blake 已提交
448
    virDomainDiskSetType(disk, VIR_STORAGE_TYPE_BLOCK);
J
John Ferlan 已提交
449 450 451 452 453 454
    if (virDomainDiskSetSource(disk, loname) < 0)
        goto cleanup;

    ret = 0;

 cleanup:
455
    VIR_FREE(loname);
J
John Ferlan 已提交
456 457
    if (ret < 0)
         VIR_FORCE_CLOSE(lofd);
458 459

    return lofd;
J
John Ferlan 已提交
460

461 462 463
}


464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479
static int virLXCControllerSetupNBDDeviceFS(virDomainFSDefPtr fs)
{
    char *dev;

    if (fs->format <= VIR_STORAGE_FILE_NONE) {
        virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s",
                       _("An explicit disk format must be specified"));
        return -1;
    }

    if (virFileNBDDeviceAssociate(fs->src,
                                  fs->format,
                                  fs->readonly,
                                  &dev) < 0)
        return -1;

480 481
    VIR_DEBUG("Changing fs %s to use type=block for dev %s",
              fs->src, dev);
482 483 484 485
    /*
     * We now change it into a block device type, so that
     * the rest of container setup 'just works'
     */
486
    fs->type = VIR_DOMAIN_FS_TYPE_BLOCK;
487 488 489 490 491 492 493 494 495 496
    VIR_FREE(fs->src);
    fs->src = dev;

    return 0;
}


static int virLXCControllerSetupNBDDeviceDisk(virDomainDiskDefPtr disk)
{
    char *dev;
497 498
    const char *src = virDomainDiskGetSource(disk);
    int format = virDomainDiskGetFormat(disk);
499

500
    if (format <= VIR_STORAGE_FILE_NONE) {
501 502 503 504 505
        virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s",
                       _("An explicit disk format must be specified"));
        return -1;
    }

506 507
    if (virFileNBDDeviceAssociate(src,
                                  format,
508
                                  disk->src->readonly,
509 510 511
                                  &dev) < 0)
        return -1;

512
    VIR_DEBUG("Changing disk %s to use type=block for dev %s",
513
              src, dev);
514 515 516 517
    /*
     * We now change it into a block device type, so that
     * the rest of container setup 'just works'
     */
E
Eric Blake 已提交
518
    virDomainDiskSetType(disk, VIR_STORAGE_TYPE_BLOCK);
519 520 521 522 523
    if (virDomainDiskSetSource(disk, dev) < 0) {
        VIR_FREE(dev);
        return -1;
    }
    VIR_FREE(dev);
524 525 526 527 528

    return 0;
}


529
static int virLXCControllerSetupLoopDevices(virLXCControllerPtr ctrl)
530 531 532 533
{
    size_t i;
    int ret = -1;

534 535
    VIR_DEBUG("Setting up loop devices for filesystems");

536
    for (i = 0; i < ctrl->def->nfss; i++) {
537
        virDomainFSDefPtr fs = ctrl->def->fss[i];
538 539
        int fd;

540
        if (fs->type != VIR_DOMAIN_FS_TYPE_FILE)
541 542
            continue;

543 544 545 546 547 548 549 550
        if (fs->fsdriver == VIR_DOMAIN_FS_DRIVER_TYPE_DEFAULT) {
            if (fs->format == VIR_STORAGE_FILE_RAW ||
                fs->format == VIR_STORAGE_FILE_NONE)
                fs->fsdriver = VIR_DOMAIN_FS_DRIVER_TYPE_LOOP;
            else
                fs->fsdriver = VIR_DOMAIN_FS_DRIVER_TYPE_NBD;
        }

551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569
        if (fs->fsdriver == VIR_DOMAIN_FS_DRIVER_TYPE_LOOP) {
            if (fs->format != VIR_STORAGE_FILE_RAW &&
                fs->format != VIR_STORAGE_FILE_NONE) {
                virReportError(VIR_ERR_CONFIG_UNSUPPORTED,
                               _("fs format %s is not supported"),
                               virStorageFileFormatTypeToString(fs->format));
                goto cleanup;
            }

            fd = virLXCControllerSetupLoopDeviceFS(fs);
            if (fd < 0)
                goto cleanup;

            VIR_DEBUG("Saving loop fd %d", fd);
            if (VIR_EXPAND_N(ctrl->loopDevFds, ctrl->nloopDevs, 1) < 0) {
                VIR_FORCE_CLOSE(fd);
                goto cleanup;
            }
            ctrl->loopDevFds[ctrl->nloopDevs - 1] = fd;
570 571 572
        } else if (fs->fsdriver == VIR_DOMAIN_FS_DRIVER_TYPE_NBD) {
            if (virLXCControllerSetupNBDDeviceFS(fs) < 0)
                goto cleanup;
573 574 575
        } else {
            virReportError(VIR_ERR_CONFIG_UNSUPPORTED,
                           _("fs driver %s is not supported"),
576
                           virDomainFSDriverTypeToString(fs->fsdriver));
577
            goto cleanup;
578 579 580
        }
    }

581 582
    VIR_DEBUG("Setting up loop devices for disks");

583
    for (i = 0; i < ctrl->def->ndisks; i++) {
584 585
        virDomainDiskDefPtr disk = ctrl->def->disks[i];
        int fd;
586 587
        const char *driver = virDomainDiskGetDriver(disk);
        int format = virDomainDiskGetFormat(disk);
588

E
Eric Blake 已提交
589
        if (virDomainDiskGetType(disk) != VIR_STORAGE_TYPE_FILE)
590 591
            continue;

592 593 594 595
        /* If no driverName is set, we prefer 'loop' for
         * dealing with raw or undefined formats, otherwise
         * we use 'nbd'.
         */
596 597 598 599 600 601
        if (STREQ_NULLABLE(driver, "loop") ||
            (!driver &&
             (format == VIR_STORAGE_FILE_RAW ||
              format == VIR_STORAGE_FILE_NONE))) {
            if (format != VIR_STORAGE_FILE_RAW &&
                format != VIR_STORAGE_FILE_NONE) {
602
                virReportError(VIR_ERR_CONFIG_UNSUPPORTED,
603
                               _("disk format %s is not supported"),
604
                               virStorageFileFormatTypeToString(format));
605 606 607
                goto cleanup;
            }

608 609 610 611
            /* We treat 'none' as meaning 'raw' since we
             * don't want to go into the auto-probing
             * business for security reasons
             */
612 613 614 615
            fd = virLXCControllerSetupLoopDeviceDisk(disk);
            if (fd < 0)
                goto cleanup;

616 617 618 619 620 621
            VIR_DEBUG("Saving loop fd %d", fd);
            if (VIR_EXPAND_N(ctrl->loopDevFds, ctrl->nloopDevs, 1) < 0) {
                VIR_FORCE_CLOSE(fd);
                goto cleanup;
            }
            ctrl->loopDevFds[ctrl->nloopDevs - 1] = fd;
622
        } else if (!driver || STREQ(driver, "nbd")) {
623 624 625 626 627 628 629 630 631
            if (disk->cachemode != VIR_DOMAIN_DISK_CACHE_DEFAULT &&
                disk->cachemode != VIR_DOMAIN_DISK_CACHE_DISABLE) {
                virReportError(VIR_ERR_CONFIG_UNSUPPORTED,
                               _("Disk cache mode %s is not supported"),
                               virDomainDiskCacheTypeToString(disk->cachemode));
                goto cleanup;
            }
            if (virLXCControllerSetupNBDDeviceDisk(disk) < 0)
                goto cleanup;
632
        } else {
633
            virReportError(VIR_ERR_CONFIG_UNSUPPORTED,
634
                           _("disk driver %s is not supported"),
635
                           driver);
636
            goto cleanup;
637 638 639
        }
    }

640 641 642
    VIR_DEBUG("Setup all loop devices");
    ret = 0;

643
 cleanup:
644 645 646
    return ret;
}

647 648 649 650

/*
 * To be run while still single threaded
 */
651
static int virLXCControllerSetupCpuAffinity(virLXCControllerPtr ctrl)
652
{
H
Hu Tao 已提交
653 654
    int hostcpus, maxcpu = CPU_SETSIZE;
    virBitmapPtr cpumap, cpumapToSet;
655 656 657 658 659

    VIR_DEBUG("Setting CPU affinity");

    /* setaffinity fails if you set bits for CPUs which
     * aren't present, so we have to limit ourselves */
660 661 662
    if ((hostcpus = nodeGetCPUCount()) < 0)
        return -1;

663 664 665
    if (maxcpu > hostcpus)
        maxcpu = hostcpus;

666 667
    cpumap = virBitmapNew(maxcpu);
    if (!cpumap)
668 669
        return -1;

H
Hu Tao 已提交
670 671
    cpumapToSet = cpumap;

672
    if (ctrl->def->cpumask) {
H
Hu Tao 已提交
673
        cpumapToSet = ctrl->def->cpumask;
674 675 676 677 678
    } else {
        /* You may think this is redundant, but we can't assume libvirtd
         * itself is running on all pCPUs, so we need to explicitly set
         * the spawned LXC instance to all pCPUs if no map is given in
         * its config file */
679
        virBitmapSetAll(cpumap);
680 681
    }

682
    /* We are presuming we are running between fork/exec of LXC
683 684 685
     * so use '0' to indicate our own process ID. No threads are
     * running at this point
     */
686
    if (virProcessSetAffinity(0 /* Self */, cpumapToSet) < 0) {
687
        virBitmapFree(cpumap);
688 689
        return -1;
    }
690
    virBitmapFree(cpumap);
691 692 693 694 695

    return 0;
}


696 697 698 699
static int virLXCControllerGetNumadAdvice(virLXCControllerPtr ctrl,
                                          virBitmapPtr *mask)
{
    virBitmapPtr nodemask = NULL;
700
    char *nodeset = NULL;
701 702 703 704 705
    int ret = -1;

    /* Get the advisory nodeset from numad if 'placement' of
     * either <vcpu> or <numatune> is 'auto'.
     */
706
    if (virDomainDefNeedsPlacementAdvice(ctrl->def)) {
707 708 709 710 711 712 713 714 715 716 717 718 719 720
        nodeset = virNumaGetAutoPlacementAdvice(ctrl->def->vcpus,
                                                ctrl->def->mem.cur_balloon);
        if (!nodeset)
            goto cleanup;

        VIR_DEBUG("Nodeset returned from numad: %s", nodeset);

        if (virBitmapParse(nodeset, 0, &nodemask, VIR_DOMAIN_CPUMASK_LEN) < 0)
            goto cleanup;
    }

    ret = 0;
    *mask = nodemask;

721
 cleanup:
722 723 724 725 726
    VIR_FREE(nodeset);
    return ret;
}


727
/**
728 729
 * virLXCControllerSetupResourceLimits
 * @ctrl: the controller state
730
 *
731 732 733
 * Sets up the non-cgroup based resource limits that need
 * to be inherited by the child process across clone()/exec().
 * The cgroup limits are setup later
734 735 736
 *
 * Returns 0 on success or -1 in case of error
 */
737
static int virLXCControllerSetupResourceLimits(virLXCControllerPtr ctrl)
738
{
739
    virBitmapPtr auto_nodeset = NULL;
740
    int ret = -1;
741 742 743
    virBitmapPtr nodeset = NULL;
    virDomainNumatuneMemMode mode;

744
    mode = virDomainNumatuneGetMode(ctrl->def->numa, -1);
745

746 747 748 749
    if (mode == VIR_DOMAIN_NUMATUNE_MEM_STRICT &&
        virCgroupControllerAvailable(VIR_CGROUP_CONTROLLER_CPUSET)) {
        /* Use virNuma* API iff necessary. Once set and child is exec()-ed,
         * there's no way for us to change it. Rely on cgroups (if available
750
         * and enabled in the config) rather than virNuma*. */
751 752
        VIR_DEBUG("Relying on CGroups for memory binding");
    } else {
753

754
        VIR_DEBUG("Setting up process resource limits");
755

756 757 758 759 760 761 762 763
        if (virLXCControllerGetNumadAdvice(ctrl, &auto_nodeset) < 0)
            goto cleanup;

        nodeset = virDomainNumatuneGetNodeset(ctrl->def->numa, auto_nodeset, -1);

        if (virNumaSetupMemoryPolicy(mode, nodeset) < 0)
            goto cleanup;
    }
764

765
    if (virLXCControllerSetupCpuAffinity(ctrl) < 0)
766
        goto cleanup;
767

768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789
    ret = 0;
 cleanup:
    virBitmapFree(auto_nodeset);
    return ret;
}


/*
 * Creates the cgroup and sets up the various limits associated
 * with it
 */
static int virLXCControllerSetupCgroupLimits(virLXCControllerPtr ctrl)
{
    virBitmapPtr auto_nodeset = NULL;
    int ret = -1;
    virBitmapPtr nodeset = NULL;

    VIR_DEBUG("Setting up cgroup resource limits");

    if (virLXCControllerGetNumadAdvice(ctrl, &auto_nodeset) < 0)
        goto cleanup;

790
    nodeset = virDomainNumatuneGetNodeset(ctrl->def->numa, auto_nodeset, -1);
791 792

    if (!(ctrl->cgroup = virLXCCgroupCreate(ctrl->def,
793 794 795
                                            ctrl->initpid,
                                            ctrl->nnicindexes,
                                            ctrl->nicindexes)))
796 797 798 799 800
        goto cleanup;

    if (virCgroupAddTask(ctrl->cgroup, getpid()) < 0)
        goto cleanup;

801
    if (virLXCCgroupSetup(ctrl->def, ctrl->cgroup, nodeset) < 0)
802
        goto cleanup;
803

804
    ret = 0;
805
 cleanup:
806
    virBitmapFree(auto_nodeset);
807
    return ret;
D
Dan Smith 已提交
808 809
}

810

811 812 813 814 815 816 817 818 819 820 821 822 823
static void virLXCControllerClientCloseHook(virNetServerClientPtr client)
{
    virLXCControllerPtr ctrl = virNetServerClientGetPrivateData(client);

    VIR_DEBUG("Client %p has closed", client);
    if (ctrl->client == client)
        ctrl->client = NULL;
    if (ctrl->inShutdown) {
        VIR_DEBUG("Arm timer to quit event loop");
        virEventUpdateTimeout(ctrl->timerShutdown, 0);
    }
}

824 825
static void virLXCControllerClientPrivateFree(void *data)
{
826 827
    virLXCControllerPtr ctrl = data;
    VIR_DEBUG("Got private data free %p", ctrl);
828 829 830 831
}

static void *virLXCControllerClientPrivateNew(virNetServerClientPtr client,
                                              void *opaque)
832 833
{
    virLXCControllerPtr ctrl = opaque;
834

835 836 837
    virNetServerClientSetCloseHook(client, virLXCControllerClientCloseHook);
    VIR_DEBUG("Got new client %p", client);
    ctrl->client = client;
838 839 840 841 842

    if (ctrl->initpid && ctrl->firstClient)
        virLXCControllerEventSendInit(ctrl, ctrl->initpid);
    ctrl->firstClient = false;

843
    return ctrl;
844 845
}

846 847

static int virLXCControllerSetupServer(virLXCControllerPtr ctrl)
848
{
849 850
    virNetServerServicePtr svc = NULL;
    char *sockpath;
851

852
    if (virAsprintf(&sockpath, "%s/%s.sock",
853
                    LXC_STATE_DIR, ctrl->name) < 0)
854
        return -1;
855

856
    if (!(ctrl->server = virNetServerNew(0, 0, 0, 1,
857
                                         0, -1, 0, false,
858
                                         NULL,
859
                                         virLXCControllerClientPrivateNew,
860
                                         NULL,
861
                                         virLXCControllerClientPrivateFree,
862
                                         ctrl)))
C
Chris Lalancette 已提交
863
        goto error;
864

865 866 867
    if (virSecurityManagerSetSocketLabel(ctrl->securityManager, ctrl->def) < 0)
        goto error;

868 869 870 871
    if (!(svc = virNetServerServiceNewUNIX(sockpath,
                                           0700,
                                           0,
                                           0,
872
#if WITH_GNUTLS
873 874
                                           NULL,
#endif
875
                                           false,
M
Michal Privoznik 已提交
876
                                           0,
877
                                           5)))
878
        goto error;
879

880 881 882
    if (virSecurityManagerClearSocketLabel(ctrl->securityManager, ctrl->def) < 0)
        goto error;

883
    if (virNetServerAddService(ctrl->server, svc, NULL) < 0)
884
        goto error;
885
    virObjectUnref(svc);
886
    svc = NULL;
887

888 889 890 891
    if (!(ctrl->prog = virNetServerProgramNew(VIR_LXC_MONITOR_PROGRAM,
                                              VIR_LXC_MONITOR_PROGRAM_VERSION,
                                              virLXCMonitorProcs,
                                              virLXCMonitorNProcs)))
892 893
        goto error;

894 895 896
    virNetServerUpdateServices(ctrl->server, true);
    VIR_FREE(sockpath);
    return 0;
897

898
 error:
899
    VIR_FREE(sockpath);
900
    virObjectUnref(ctrl->server);
901
    ctrl->server = NULL;
902
    virObjectUnref(svc);
903 904
    return -1;
}
905

D
Daniel P. Berrange 已提交
906 907 908

static int lxcControllerClearCapabilities(void)
{
909
#if WITH_CAPNG
D
Daniel P. Berrange 已提交
910 911 912 913 914
    int ret;

    capng_clear(CAPNG_SELECT_BOTH);

    if ((ret = capng_apply(CAPNG_SELECT_BOTH)) < 0) {
915 916
        virReportError(VIR_ERR_INTERNAL_ERROR,
                       _("failed to apply capabilities: %d"), ret);
D
Daniel P. Berrange 已提交
917 918 919
        return -1;
    }
#else
920
    VIR_WARN("libcap-ng support not compiled in, unable to clear capabilities");
D
Daniel P. Berrange 已提交
921 922 923 924
#endif
    return 0;
}

925
static bool wantReboot;
926
static virMutex lock = VIR_MUTEX_INITIALIZER;
927 928


929
static void virLXCControllerSignalChildIO(virNetServerPtr server,
930
                                          siginfo_t *info ATTRIBUTE_UNUSED,
931
                                          void *opaque)
932
{
933
    virLXCControllerPtr ctrl = opaque;
934
    int ret;
935
    int status;
936

937
    ret = waitpid(-1, &status, WNOHANG);
938
    VIR_DEBUG("Got sig child %d vs %lld", ret, (unsigned long long)ctrl->initpid);
939
    if (ret == ctrl->initpid) {
940
        virNetServerQuit(server);
941
        virMutexLock(&lock);
942
        if (WIFSIGNALED(status) &&
943 944
            WTERMSIG(status) == SIGHUP) {
            VIR_DEBUG("Status indicates reboot");
945
            wantReboot = true;
946
        }
947 948
        virMutexUnlock(&lock);
    }
949 950 951
}


952
static void virLXCControllerConsoleUpdateWatch(virLXCControllerConsolePtr console)
953 954 955 956
{
    int hostEvents = 0;
    int contEvents = 0;

957 958
    /* If host console is open, then we can look to read/write */
    if (!console->hostClosed) {
959 960 961 962 963
        if (console->fromHostLen < sizeof(console->fromHostBuf))
            hostEvents |= VIR_EVENT_HANDLE_READABLE;
        if (console->fromContLen)
            hostEvents |= VIR_EVENT_HANDLE_WRITABLE;
    }
964 965 966

    /* If cont console is open, then we can look to read/write */
    if (!console->contClosed) {
967 968 969 970 971 972
        if (console->fromContLen < sizeof(console->fromContBuf))
            contEvents |= VIR_EVENT_HANDLE_READABLE;
        if (console->fromHostLen)
            contEvents |= VIR_EVENT_HANDLE_WRITABLE;
    }

973 974 975
    VIR_DEBUG("Container watch=%d, events=%d closed=%d; host watch=%d events=%d closed=%d",
              console->contWatch, contEvents, console->contClosed,
              console->hostWatch, hostEvents, console->hostClosed);
976 977
    virEventUpdateHandle(console->contWatch, contEvents);
    virEventUpdateHandle(console->hostWatch, hostEvents);
978

979
    if (console->hostClosed) {
980
        /* Must setup an epoll to detect when host becomes accessible again */
981
        int events = EPOLLIN | EPOLLET;
982
        if (console->fromContLen)
983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998
            events |= EPOLLOUT;

        if (events != console->hostEpoll) {
            struct epoll_event event;
            int action = EPOLL_CTL_ADD;
            if (console->hostEpoll)
                action = EPOLL_CTL_MOD;

            VIR_DEBUG("newHostEvents=%x oldHostEvents=%x", events, console->hostEpoll);

            event.events = events;
            event.data.fd = console->hostFd;
            if (epoll_ctl(console->epollFd, action, console->hostFd, &event) < 0) {
                VIR_DEBUG(":fail");
                virReportSystemError(errno, "%s",
                                     _("Unable to add epoll fd"));
999
                virNetServerQuit(console->server);
1000 1001 1002 1003 1004 1005 1006 1007 1008 1009
                goto cleanup;
            }
            console->hostEpoll = events;
            VIR_DEBUG("newHostEvents=%x oldHostEvents=%x", events, console->hostEpoll);
        }
    } else if (console->hostEpoll) {
        VIR_DEBUG("Stop epoll oldContEvents=%x", console->hostEpoll);
        if (epoll_ctl(console->epollFd, EPOLL_CTL_DEL, console->hostFd, NULL) < 0) {
            virReportSystemError(errno, "%s",
                                 _("Unable to remove epoll fd"));
1010 1011
            VIR_DEBUG(":fail");
            virNetServerQuit(console->server);
1012 1013 1014 1015
            goto cleanup;
        }
        console->hostEpoll = 0;
    }
1016

1017
    if (console->contClosed) {
1018
        /* Must setup an epoll to detect when guest becomes accessible again */
1019
        int events = EPOLLIN | EPOLLET;
1020
        if (console->fromHostLen)
1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036
            events |= EPOLLOUT;

        if (events != console->contEpoll) {
            struct epoll_event event;
            int action = EPOLL_CTL_ADD;
            if (console->contEpoll)
                action = EPOLL_CTL_MOD;

            VIR_DEBUG("newContEvents=%x oldContEvents=%x", events, console->contEpoll);

            event.events = events;
            event.data.fd = console->contFd;
            if (epoll_ctl(console->epollFd, action, console->contFd, &event) < 0) {
                virReportSystemError(errno, "%s",
                                     _("Unable to add epoll fd"));
                VIR_DEBUG(":fail");
1037
                virNetServerQuit(console->server);
1038 1039 1040 1041 1042 1043 1044 1045 1046 1047
                goto cleanup;
            }
            console->contEpoll = events;
            VIR_DEBUG("newHostEvents=%x oldHostEvents=%x", events, console->contEpoll);
        }
    } else if (console->contEpoll) {
        VIR_DEBUG("Stop epoll oldContEvents=%x", console->contEpoll);
        if (epoll_ctl(console->epollFd, EPOLL_CTL_DEL, console->contFd, NULL) < 0) {
            virReportSystemError(errno, "%s",
                                 _("Unable to remove epoll fd"));
1048 1049
            VIR_DEBUG(":fail");
            virNetServerQuit(console->server);
1050 1051 1052 1053
            goto cleanup;
        }
        console->contEpoll = 0;
    }
1054
 cleanup:
1055 1056
    return;
}
1057 1058


1059
static void virLXCControllerConsoleEPoll(int watch, int fd, int events, void *opaque)
1060
{
1061
    virLXCControllerConsolePtr console = opaque;
1062

1063 1064 1065 1066 1067 1068 1069 1070 1071 1072
    virMutexLock(&lock);
    VIR_DEBUG("IO event watch=%d fd=%d events=%d fromHost=%zu fromcont=%zu",
              watch, fd, events,
              console->fromHostLen,
              console->fromContLen);

    while (1) {
        struct epoll_event event;
        int ret;
        ret = epoll_wait(console->epollFd, &event, 1, 0);
1073
        if (ret < 0) {
S
Stefan Berger 已提交
1074
            if (errno == EINTR)
1075 1076 1077
                continue;
            virReportSystemError(errno, "%s",
                                 _("Unable to wait on epoll"));
1078
            virNetServerQuit(console->server);
1079 1080 1081
            goto cleanup;
        }

1082 1083 1084 1085 1086 1087 1088
        if (ret == 0)
            break;

        VIR_DEBUG("fd=%d hostFd=%d contFd=%d hostEpoll=%x contEpoll=%x",
                  event.data.fd, console->hostFd, console->contFd,
                  console->hostEpoll, console->contEpoll);

1089 1090
        /* If we get HUP+dead PID, we just re-enable the main loop
         * which will see the PID has died and exit */
1091
        if ((event.events & (EPOLLIN|EPOLLOUT))) {
1092 1093
            if (event.data.fd == console->hostFd) {
                console->hostClosed = false;
1094
            } else {
1095
                console->contClosed = false;
1096
            }
1097
            virLXCControllerConsoleUpdateWatch(console);
1098 1099 1100 1101
            break;
        }
    }

1102
 cleanup:
1103
    virMutexUnlock(&lock);
1104 1105
}

1106
static void virLXCControllerConsoleIO(int watch, int fd, int events, void *opaque)
1107
{
1108
    virLXCControllerConsolePtr console = opaque;
1109 1110

    virMutexLock(&lock);
1111 1112 1113 1114
    VIR_DEBUG("IO event watch=%d fd=%d events=%d fromHost=%zu fromcont=%zu",
              watch, fd, events,
              console->fromHostLen,
              console->fromContLen);
1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182
    if (events & VIR_EVENT_HANDLE_READABLE) {
        char *buf;
        size_t *len;
        size_t avail;
        ssize_t done;
        if (watch == console->hostWatch) {
            buf = console->fromHostBuf;
            len = &console->fromHostLen;
            avail = sizeof(console->fromHostBuf) - *len;
        } else {
            buf = console->fromContBuf;
            len = &console->fromContLen;
            avail = sizeof(console->fromContBuf) - *len;
        }
    reread:
        done = read(fd, buf + *len, avail);
        if (done == -1 && errno == EINTR)
            goto reread;
        if (done == -1 && errno != EAGAIN) {
            virReportSystemError(errno, "%s",
                                 _("Unable to read container pty"));
            goto error;
        }
        if (done > 0) {
            *len += done;
        } else {
            VIR_DEBUG("Read fd %d done %d errno %d", fd, (int)done, errno);
        }
    }

    if (events & VIR_EVENT_HANDLE_WRITABLE) {
        char *buf;
        size_t *len;
        ssize_t done;
        if (watch == console->hostWatch) {
            buf = console->fromContBuf;
            len = &console->fromContLen;
        } else {
            buf = console->fromHostBuf;
            len = &console->fromHostLen;
        }

    rewrite:
        done = write(fd, buf, *len);
        if (done == -1 && errno == EINTR)
            goto rewrite;
        if (done == -1 && errno != EAGAIN) {
            virReportSystemError(errno, "%s",
                                 _("Unable to write to container pty"));
            goto error;
        }
        if (done > 0) {
            memmove(buf, buf + done, (*len - done));
            *len -= done;
        } else {
            VIR_DEBUG("Write fd %d done %d errno %d", fd, (int)done, errno);
        }
    }

    if (events & VIR_EVENT_HANDLE_HANGUP) {
        if (watch == console->hostWatch) {
            console->hostClosed = true;
        } else {
            console->contClosed = true;
        }
        VIR_DEBUG("Got EOF on %d %d", watch, fd);
    }

1183
    virLXCControllerConsoleUpdateWatch(console);
1184 1185 1186
    virMutexUnlock(&lock);
    return;

1187
 error:
1188 1189 1190
    virEventRemoveHandle(console->contWatch);
    virEventRemoveHandle(console->hostWatch);
    console->contWatch = console->hostWatch = -1;
1191
    virNetServerQuit(console->server);
1192 1193 1194 1195
    virMutexUnlock(&lock);
}


1196
/**
1197
 * lxcControllerMain
1198 1199
 * @serverFd: server socket fd to accept client requests
 * @clientFd: initial client which is the libvirtd daemon
1200
 *
1201
 * Processes I/O on consoles and the monitor
1202 1203 1204
 *
 * Returns 0 on success or -1 in case of error
 */
1205
static int virLXCControllerMain(virLXCControllerPtr ctrl)
1206
{
1207
    virErrorPtr err;
1208
    int rc = -1;
1209
    size_t i;
1210

1211 1212 1213 1214
    if (virNetServerAddSignalHandler(ctrl->server,
                                     SIGCHLD,
                                     virLXCControllerSignalChildIO,
                                     ctrl) < 0)
1215 1216
        goto cleanup;

1217 1218
    virResetLastError();

1219
    for (i = 0; i < ctrl->nconsoles; i++) {
1220
        if ((ctrl->consoles[i].epollFd = epoll_create1(EPOLL_CLOEXEC)) < 0) {
1221 1222 1223 1224 1225
            virReportSystemError(errno, "%s",
                                 _("Unable to create epoll fd"));
            goto cleanup;
        }

1226 1227 1228 1229 1230
        if ((ctrl->consoles[i].epollWatch = virEventAddHandle(ctrl->consoles[i].epollFd,
                                                              VIR_EVENT_HANDLE_READABLE,
                                                              virLXCControllerConsoleEPoll,
                                                              &(ctrl->consoles[i]),
                                                              NULL)) < 0) {
1231 1232
            virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
                           _("Unable to watch epoll FD"));
1233 1234 1235
            goto cleanup;
        }

1236 1237 1238 1239 1240
        if ((ctrl->consoles[i].hostWatch = virEventAddHandle(ctrl->consoles[i].hostFd,
                                                             VIR_EVENT_HANDLE_READABLE,
                                                             virLXCControllerConsoleIO,
                                                             &(ctrl->consoles[i]),
                                                             NULL)) < 0) {
1241 1242
            virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
                           _("Unable to watch host console PTY"));
1243 1244 1245
            goto cleanup;
        }

1246 1247 1248 1249 1250
        if ((ctrl->consoles[i].contWatch = virEventAddHandle(ctrl->consoles[i].contFd,
                                                             VIR_EVENT_HANDLE_READABLE,
                                                             virLXCControllerConsoleIO,
                                                             &(ctrl->consoles[i]),
                                                             NULL)) < 0) {
1251 1252
            virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
                           _("Unable to watch host console PTY"));
1253 1254
            goto cleanup;
        }
1255
    }
1256

1257
    virNetServerRun(ctrl->server);
1258

1259 1260
    err = virGetLastError();
    if (!err || err->code == VIR_ERR_OK)
1261
        rc = wantReboot ? 1 : 0;
1262

1263
 cleanup:
1264
    for (i = 0; i < ctrl->nconsoles; i++)
1265
        virLXCControllerConsoleClose(&(ctrl->consoles[i]));
1266

1267 1268 1269
    return rc;
}

1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283
static unsigned int
virLXCControllerLookupUsernsMap(virDomainIdMapEntryPtr map,
                                int num,
                                unsigned int src)
{
    size_t i;

    for (i = 0; i < num; i++) {
        if (src > map[i].start && src < map[i].start + map[i].count)
            return map[i].target + (src - map[i].start);
    }

    return src;
}
1284

1285 1286 1287 1288 1289 1290
static int
virLXCControllerSetupUsernsMap(virDomainIdMapEntryPtr map,
                               int num,
                               char *path)
{
    virBuffer map_value = VIR_BUFFER_INITIALIZER;
1291 1292
    size_t i;
    int ret = -1;
1293 1294 1295 1296 1297

    for (i = 0; i < num; i++)
        virBufferAsprintf(&map_value, "%u %u %u\n",
                          map[i].start, map[i].target, map[i].count);

1298 1299
    if (virBufferCheckError(&map_value) < 0)
        goto cleanup;
1300

1301 1302
    VIR_DEBUG("Set '%s' to '%s'", path, virBufferCurrentContent(&map_value));

1303 1304 1305 1306 1307 1308
    if (virFileWriteStr(path, virBufferCurrentContent(&map_value), 0) < 0) {
        virReportSystemError(errno, _("unable write to %s"), path);
        goto cleanup;
    }

    ret = 0;
1309
 cleanup:
1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327
    virBufferFreeAndReset(&map_value);
    return ret;
}

/**
 * virLXCControllerSetupUserns
 *
 * Set proc files for user namespace
 *
 * Returns 0 on success or -1 in case of error
 */
static int virLXCControllerSetupUserns(virLXCControllerPtr ctrl)
{
    char *uid_map = NULL;
    char *gid_map = NULL;
    int ret = -1;

    /* User namespace is disabled for container */
1328 1329
    if (ctrl->def->idmap.nuidmap == 0) {
        VIR_DEBUG("No uid map, skipping userns setup");
1330
        return 0;
1331
    }
1332

1333
    VIR_DEBUG("Setting up userns maps");
1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350
    if (virAsprintf(&uid_map, "/proc/%d/uid_map", ctrl->initpid) < 0)
        goto cleanup;

    if (virLXCControllerSetupUsernsMap(ctrl->def->idmap.uidmap,
                                       ctrl->def->idmap.nuidmap,
                                       uid_map) < 0)
        goto cleanup;

    if (virAsprintf(&gid_map, "/proc/%d/gid_map", ctrl->initpid) < 0)
        goto cleanup;

    if (virLXCControllerSetupUsernsMap(ctrl->def->idmap.gidmap,
                                       ctrl->def->idmap.ngidmap,
                                       gid_map) < 0)
        goto cleanup;

    ret = 0;
1351
 cleanup:
1352 1353 1354 1355 1356
    VIR_FREE(uid_map);
    VIR_FREE(gid_map);
    return ret;
}

1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369
static int virLXCControllerSetupDev(virLXCControllerPtr ctrl)
{
    char *mount_options = NULL;
    char *opts = NULL;
    char *dev = NULL;
    int ret = -1;

    VIR_DEBUG("Setting up /dev/ for container");

    mount_options = virSecurityManagerGetMountOptions(ctrl->securityManager,
                                                      ctrl->def);

    if (virAsprintf(&dev, "/%s/%s.dev",
1370
                    LXC_STATE_DIR, ctrl->def->name) < 0)
1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384
        goto cleanup;

    if (virFileMakePath(dev) < 0) {
        virReportSystemError(errno,
                             _("Failed to make path %s"), dev);
        goto cleanup;
    }

    /*
     * tmpfs is limited to 64kb, since we only have device nodes in there
     * and don't want to DOS the entire OS RAM usage
     */

    if (virAsprintf(&opts,
1385
                    "mode=755,size=65536%s", mount_options) < 0)
1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396
        goto cleanup;

    VIR_DEBUG("Mount devfs on %s type=tmpfs flags=%x, opts=%s",
              dev, MS_NOSUID, opts);
    if (mount("devfs", dev, "tmpfs", MS_NOSUID, opts) < 0) {
        virReportSystemError(errno,
                             _("Failed to mount devfs on %s type %s (%s)"),
                             dev, "tmpfs", opts);
        goto cleanup;
    }

1397
    if (lxcContainerChown(ctrl->def, dev) < 0)
1398 1399
        goto cleanup;

1400
    ret = 0;
1401
 cleanup:
1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423
    VIR_FREE(opts);
    VIR_FREE(mount_options);
    VIR_FREE(dev);
    return ret;
}

static int virLXCControllerPopulateDevices(virLXCControllerPtr ctrl)
{
    size_t i;
    int ret = -1;
    char *path = NULL;
    const struct {
        int maj;
        int min;
        mode_t mode;
        const char *path;
    } devs[] = {
        { LXC_DEV_MAJ_MEMORY, LXC_DEV_MIN_NULL, 0666, "/null" },
        { LXC_DEV_MAJ_MEMORY, LXC_DEV_MIN_ZERO, 0666, "/zero" },
        { LXC_DEV_MAJ_MEMORY, LXC_DEV_MIN_FULL, 0666, "/full" },
        { LXC_DEV_MAJ_MEMORY, LXC_DEV_MIN_RANDOM, 0666, "/random" },
        { LXC_DEV_MAJ_MEMORY, LXC_DEV_MIN_URANDOM, 0666, "/urandom" },
1424
        { LXC_DEV_MAJ_TTY, LXC_DEV_MIN_TTY, 0666, "/tty" },
1425 1426 1427 1428 1429 1430 1431 1432
    };

    if (virLXCControllerSetupDev(ctrl) < 0)
        goto cleanup;

    /* Populate /dev/ with a few important bits */
    for (i = 0; i < ARRAY_CARDINALITY(devs); i++) {
        if (virAsprintf(&path, "/%s/%s.dev/%s",
1433
                        LXC_STATE_DIR, ctrl->def->name, devs[i].path) < 0)
1434 1435 1436 1437 1438 1439 1440 1441 1442 1443
            goto cleanup;

        dev_t dev = makedev(devs[i].maj, devs[i].min);
        if (mknod(path, S_IFCHR, dev) < 0 ||
            chmod(path, devs[i].mode)) {
            virReportSystemError(errno,
                                 _("Failed to make device %s"),
                                 path);
            goto cleanup;
        }
1444

1445
        if (lxcContainerChown(ctrl->def, path) < 0)
1446 1447
            goto cleanup;

1448 1449 1450 1451
        VIR_FREE(path);
    }

    ret = 0;
1452
 cleanup:
1453 1454 1455
    VIR_FREE(path);
    return ret;
}
1456

1457

1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469
static int
virLXCControllerSetupHostdevSubsysUSB(virDomainDefPtr vmDef,
                                      virDomainHostdevDefPtr def,
                                      virSecurityManagerPtr securityDriver)
{
    int ret = -1;
    char *src = NULL;
    char *dstdir = NULL;
    char *dstfile = NULL;
    char *vroot = NULL;
    struct stat sb;
    mode_t mode;
1470
    virDomainHostdevSubsysUSBPtr usbsrc = &def->source.subsys.u.usb;
1471 1472

    if (virAsprintf(&src, USB_DEVFS "/%03d/%03d",
1473
                    usbsrc->bus, usbsrc->device) < 0)
1474 1475 1476 1477 1478 1479
        goto cleanup;

    if (virAsprintf(&vroot, "/%s/%s.dev/bus/usb/",
                    LXC_STATE_DIR, vmDef->name) < 0)
        goto cleanup;

1480
    if (virAsprintf(&dstdir, "%s/%03d/", vroot, usbsrc->bus) < 0)
1481 1482
        goto cleanup;

1483
    if (virAsprintf(&dstfile, "%s/%03d", dstdir, usbsrc->device) < 0)
1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515
        goto cleanup;

    if (stat(src, &sb) < 0) {
        virReportSystemError(errno,
                             _("Unable to access %s"), src);
        goto cleanup;
    }

    if (!S_ISCHR(sb.st_mode)) {
        virReportError(VIR_ERR_CONFIG_UNSUPPORTED,
                       _("USB source %s was not a character device"),
                       src);
        goto cleanup;
    }

    mode = 0700 | S_IFCHR;

    if (virFileMakePath(dstdir) < 0) {
        virReportSystemError(errno,
                             _("Unable to create %s"), dstdir);
        goto cleanup;
    }

    VIR_DEBUG("Creating dev %s (%d,%d)",
              dstfile, major(sb.st_rdev), minor(sb.st_rdev));
    if (mknod(dstfile, mode, sb.st_rdev) < 0) {
        virReportSystemError(errno,
                             _("Unable to create device %s"),
                             dstfile);
        goto cleanup;
    }

1516 1517 1518
    if (lxcContainerChown(vmDef, dstfile) < 0)
        goto cleanup;

1519 1520 1521 1522 1523 1524
    if (virSecurityManagerSetHostdevLabel(securityDriver,
                                          vmDef, def, vroot) < 0)
        goto cleanup;

    ret = 0;

1525
 cleanup:
1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595
    VIR_FREE(src);
    VIR_FREE(dstfile);
    VIR_FREE(dstdir);
    VIR_FREE(vroot);
    return ret;
}


static int
virLXCControllerSetupHostdevCapsStorage(virDomainDefPtr vmDef,
                                        virDomainHostdevDefPtr def,
                                        virSecurityManagerPtr securityDriver)
{
    char *dst = NULL;
    char *path = NULL;
    int len = 0;
    int ret = -1;
    struct stat sb;
    mode_t mode;
    char *dev = def->source.caps.u.storage.block;

    if (dev == NULL) {
        virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s",
                       _("Missing storage host block path"));
        goto cleanup;
    }

    if (VIR_STRDUP(path, dev) < 0)
        goto cleanup;

    while (*(path + len) == '/')
        len++;

    if (virAsprintf(&dst, "/%s/%s.dev/%s",
                    LXC_STATE_DIR, vmDef->name,
                    strchr(path + len, '/')) < 0)
        goto cleanup;

    if (stat(dev, &sb) < 0) {
        virReportSystemError(errno,
                             _("Unable to access %s"),
                             dev);
        goto cleanup;
    }

    if (!S_ISBLK(sb.st_mode)) {
        virReportError(VIR_ERR_CONFIG_UNSUPPORTED,
                       _("Storage source %s must be a block device"),
                       dev);
        goto cleanup;
    }

    if (lxcContainerSetupHostdevCapsMakePath(dst) < 0) {
        virReportError(errno,
                       _("Failed to create directory for device %s"),
                       dev);
        goto cleanup;
    }

    mode = 0700 | S_IFBLK;

    VIR_DEBUG("Creating dev %s (%d,%d)", dst,
              major(sb.st_rdev), minor(sb.st_rdev));
    if (mknod(dst, mode, sb.st_rdev) < 0) {
        virReportSystemError(errno,
                             _("Unable to create device %s"),
                             dst);
        goto cleanup;
    }

1596 1597 1598
    if (lxcContainerChown(vmDef, dst) < 0)
        goto cleanup;

1599 1600 1601 1602 1603 1604
    def->source.caps.u.storage.block = dst;
    if (virSecurityManagerSetHostdevLabel(securityDriver, vmDef, def, NULL) < 0)
        goto cleanup;

    ret = 0;

1605
 cleanup:
1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674
    def->source.caps.u.storage.block = dev;
    VIR_FREE(dst);
    VIR_FREE(path);
    return ret;
}


static int
virLXCControllerSetupHostdevCapsMisc(virDomainDefPtr vmDef,
                                     virDomainHostdevDefPtr def,
                                     virSecurityManagerPtr securityDriver)
{
    char *dst = NULL;
    char *path = NULL;
    int len = 0;
    int ret = -1;
    struct stat sb;
    mode_t mode;
    char *dev = def->source.caps.u.misc.chardev;

    if (dev == NULL) {
        virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s",
                       _("Missing storage host block path"));
        goto cleanup;
    }

    if (VIR_STRDUP(path, dev) < 0)
        goto cleanup;

    while (*(path + len) == '/')
        len++;

    if (virAsprintf(&dst, "/%s/%s.dev/%s",
                    LXC_STATE_DIR, vmDef->name,
                    strchr(path + len, '/')) < 0)
        goto cleanup;

    if (stat(dev, &sb) < 0) {
        virReportSystemError(errno,
                             _("Unable to access %s"),
                             dev);
        goto cleanup;
    }

    if (!S_ISCHR(sb.st_mode)) {
        virReportError(VIR_ERR_CONFIG_UNSUPPORTED,
                       _("Storage source %s must be a character device"),
                       dev);
        goto cleanup;
    }

    if (lxcContainerSetupHostdevCapsMakePath(dst) < 0) {
        virReportError(errno,
                       _("Failed to create directory for device %s"),
                       dst);
        goto cleanup;
    }

    mode = 0700 | S_IFCHR;

    VIR_DEBUG("Creating dev %s (%d,%d)", dst,
              major(sb.st_rdev), minor(sb.st_rdev));
    if (mknod(dst, mode, sb.st_rdev) < 0) {
        virReportSystemError(errno,
                             _("Unable to create device %s"),
                             dev);
        goto cleanup;
    }

1675 1676 1677
    if (lxcContainerChown(vmDef, dst) < 0)
        goto cleanup;

1678 1679 1680 1681 1682 1683
    def->source.caps.u.misc.chardev = dst;
    if (virSecurityManagerSetHostdevLabel(securityDriver, vmDef, def, NULL) < 0)
        goto cleanup;

    ret = 0;

1684
 cleanup:
1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774
    def->source.caps.u.misc.chardev = dev;
    VIR_FREE(dst);
    VIR_FREE(path);
    return ret;
}

static int
virLXCControllerSetupHostdevSubsys(virDomainDefPtr vmDef,
                                   virDomainHostdevDefPtr def,
                                   virSecurityManagerPtr securityDriver)
{
    switch (def->source.subsys.type) {
    case VIR_DOMAIN_HOSTDEV_SUBSYS_TYPE_USB:
        return virLXCControllerSetupHostdevSubsysUSB(vmDef,
                                                     def,
                                                     securityDriver);

    default:
        virReportError(VIR_ERR_CONFIG_UNSUPPORTED,
                       _("Unsupported host device mode %s"),
                       virDomainHostdevSubsysTypeToString(def->source.subsys.type));
        return -1;
    }
}


static int
virLXCControllerSetupHostdevCaps(virDomainDefPtr vmDef,
                                 virDomainHostdevDefPtr def,
                                 virSecurityManagerPtr securityDriver)
{
    switch (def->source.subsys.type) {
    case VIR_DOMAIN_HOSTDEV_CAPS_TYPE_STORAGE:
        return virLXCControllerSetupHostdevCapsStorage(vmDef,
                                                       def,
                                                       securityDriver);

    case VIR_DOMAIN_HOSTDEV_CAPS_TYPE_MISC:
        return virLXCControllerSetupHostdevCapsMisc(vmDef,
                                                    def,
                                                    securityDriver);

    case VIR_DOMAIN_HOSTDEV_CAPS_TYPE_NET:
        return 0; // case is handled in virLXCControllerMoveInterfaces

    default:
        virReportError(VIR_ERR_CONFIG_UNSUPPORTED,
                       _("Unsupported host device mode %s"),
                       virDomainHostdevCapsTypeToString(def->source.subsys.type));
        return -1;
    }
}


static int
virLXCControllerSetupAllHostdevs(virLXCControllerPtr ctrl)
{
    size_t i;
    virDomainDefPtr vmDef = ctrl->def;
    virSecurityManagerPtr securityDriver = ctrl->securityManager;
    VIR_DEBUG("Setting up hostdevs");

    for (i = 0; i < vmDef->nhostdevs; i++) {
        virDomainHostdevDefPtr def = vmDef->hostdevs[i];
        switch (def->mode) {
        case VIR_DOMAIN_HOSTDEV_MODE_SUBSYS:
            if (virLXCControllerSetupHostdevSubsys(vmDef,
                                                   def,
                                                   securityDriver) < 0)
                return -1;
            break;
        case VIR_DOMAIN_HOSTDEV_MODE_CAPABILITIES:
            if (virLXCControllerSetupHostdevCaps(vmDef,
                                                 def,
                                                 securityDriver) < 0)
                return -1;
            break;
        default:
            virReportError(VIR_ERR_CONFIG_UNSUPPORTED,
                           _("Unsupported host device mode %s"),
                           virDomainHostdevModeTypeToString(def->mode));
            return -1;
        }
    }

    VIR_DEBUG("Setup all hostdevs");
    return 0;
}


1775 1776 1777 1778 1779 1780 1781 1782
static int virLXCControllerSetupDisk(virLXCControllerPtr ctrl,
                                     virDomainDiskDefPtr def,
                                     virSecurityManagerPtr securityDriver)
{
    char *dst = NULL;
    int ret = -1;
    struct stat sb;
    mode_t mode;
1783
    char *tmpsrc = def->src->path;
1784

E
Eric Blake 已提交
1785
    if (virDomainDiskGetType(def) != VIR_STORAGE_TYPE_BLOCK) {
1786 1787 1788 1789
        virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s",
                       _("Can't setup disk for non-block device"));
        goto cleanup;
    }
1790
    if (!tmpsrc) {
1791 1792 1793 1794 1795 1796 1797 1798 1799
        virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s",
                       _("Can't setup disk without media"));
        goto cleanup;
    }

    if (virAsprintf(&dst, "/%s/%s.dev/%s",
                    LXC_STATE_DIR, ctrl->def->name, def->dst) < 0)
        goto cleanup;

1800
    if (stat(def->src->path, &sb) < 0) {
1801
        virReportSystemError(errno,
1802
                             _("Unable to access %s"), tmpsrc);
1803 1804 1805 1806 1807 1808
        goto cleanup;
    }

    if (!S_ISCHR(sb.st_mode) && !S_ISBLK(sb.st_mode)) {
        virReportError(VIR_ERR_CONFIG_UNSUPPORTED,
                       _("Disk source %s must be a character/block device"),
1809
                       tmpsrc);
1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826
        goto cleanup;
    }

    mode = 0700;
    if (S_ISCHR(sb.st_mode))
        mode |= S_IFCHR;
    else
        mode |= S_IFBLK;

    /* Yes, the device name we're creating may not
     * actually correspond to the major:minor number
     * we're using, but we've no other option at this
     * time. Just have to hope that containerized apps
     * don't get upset that the major:minor is different
     * to that normally implied by the device name
     */
    VIR_DEBUG("Creating dev %s (%d,%d) from %s",
1827
              dst, major(sb.st_rdev), minor(sb.st_rdev), tmpsrc);
1828 1829 1830 1831 1832 1833 1834
    if (mknod(dst, mode, sb.st_rdev) < 0) {
        virReportSystemError(errno,
                             _("Unable to create device %s"),
                             dst);
        goto cleanup;
    }

1835
    if (lxcContainerChown(ctrl->def, dst) < 0)
1836 1837
        goto cleanup;

1838
    /* Labelling normally operates on src, but we need
E
Eric Blake 已提交
1839
     * to actually label the dst here, so hack the config */
1840
    def->src->path = dst;
1841
    if (virSecurityManagerSetDiskLabel(securityDriver, ctrl->def, def) < 0)
1842 1843 1844 1845
        goto cleanup;

    ret = 0;

1846
 cleanup:
1847
    def->src->path = tmpsrc;
1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868
    VIR_FREE(dst);
    return ret;
}

static int virLXCControllerSetupAllDisks(virLXCControllerPtr ctrl)
{
    size_t i;
    VIR_DEBUG("Setting up disks");

    for (i = 0; i < ctrl->def->ndisks; i++) {
        if (virLXCControllerSetupDisk(ctrl, ctrl->def->disks[i],
                                      ctrl->securityManager) < 0)
            return -1;
    }

    VIR_DEBUG("Setup all disks");
    return 0;
}



1869
/**
1870
 * virLXCControllerMoveInterfaces
1871 1872 1873 1874 1875 1876 1877 1878
 * @nveths: number of interfaces
 * @veths: interface names
 * @container: pid of container
 *
 * Moves network interfaces into a container's namespace
 *
 * Returns 0 on success or -1 in case of error
 */
1879
static int virLXCControllerMoveInterfaces(virLXCControllerPtr ctrl)
1880
{
1881
    size_t i;
1882
    virDomainDefPtr def = ctrl->def;
1883

1884
    for (i = 0; i < ctrl->nveths; i++) {
1885
        if (virNetDevSetNamespace(ctrl->veths[i], ctrl->initpid) < 0)
1886
            return -1;
1887
    }
1888

1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903
    for (i = 0; i < def->nhostdevs; i ++) {
        virDomainHostdevDefPtr hdev = def->hostdevs[i];

        if (hdev->mode != VIR_DOMAIN_HOSTDEV_MODE_CAPABILITIES)
            continue;

        virDomainHostdevCaps hdcaps = hdev->source.caps;

        if (hdcaps.type != VIR_DOMAIN_HOSTDEV_CAPS_TYPE_NET)
           continue;

        if (virNetDevSetNamespace(hdcaps.u.net.iface, ctrl->initpid) < 0)
            return -1;
    }

1904 1905 1906 1907 1908
    return 0;
}


/**
1909 1910
 * virLXCControllerDeleteInterfaces:
 * @ctrl: the LXC controller
1911 1912 1913 1914 1915
 *
 * Cleans up the container interfaces by deleting the veth device pairs.
 *
 * Returns 0 on success or -1 in case of error
 */
1916
static int virLXCControllerDeleteInterfaces(virLXCControllerPtr ctrl)
1917
{
1918 1919
    size_t i;
    int ret = 0;
1920

1921
    for (i = 0; i < ctrl->nveths; i++) {
1922 1923 1924 1925 1926
        if (virNetDevVethDelete(ctrl->veths[i]) < 0)
            ret = -1;
    }

    return ret;
1927 1928
}

1929

1930 1931
static int lxcSetPersonality(virDomainDefPtr def)
{
1932
    virArch altArch;
1933

1934
    VIR_DEBUG("Checking for 32-bit personality");
1935
    altArch = lxcContainerGetAlt32bitArch(virArchFromHost());
1936
    if (altArch &&
1937
        (def->os.arch == altArch)) {
1938 1939
        VIR_DEBUG("Setting personality to %s",
                  virArchToString(altArch));
1940 1941
        if (personality(PER_LINUX32) < 0) {
            virReportSystemError(errno, _("Unable to request personality for %s on %s"),
1942 1943
                                 virArchToString(altArch),
                                 virArchToString(virArchFromHost()));
1944 1945 1946 1947 1948 1949
            return -1;
        }
    }
    return 0;
}

1950
#ifndef MS_REC
1951
# define MS_REC          16384
1952 1953 1954
#endif

#ifndef MS_SLAVE
1955
# define MS_SLAVE              (1<<19)
1956
#endif
1957

1958 1959 1960 1961 1962 1963
/* Create a private tty using the private devpts at PTMX, returning
 * the master in *TTYMASTER and the name of the slave, _from the
 * perspective of the guest after remounting file systems_, in
 * *TTYNAME.  Heavily borrowed from glibc, but doesn't require that
 * devpts == "/dev/pts" */
static int
1964 1965
lxcCreateTty(virLXCControllerPtr ctrl, int *ttymaster,
             char **ttyName, char **ttyHostPath)
1966 1967 1968 1969 1970
{
    int ret = -1;
    int ptyno;
    int unlock = 0;

1971
    if ((*ttymaster = open(ctrl->devptmx, O_RDWR|O_NOCTTY|O_NONBLOCK)) < 0)
1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985
        goto cleanup;

    if (ioctl(*ttymaster, TIOCSPTLCK, &unlock) < 0)
        goto cleanup;

    if (ioctl(*ttymaster, TIOCGPTN, &ptyno) < 0)
        goto cleanup;

    /* If mount() succeeded at honoring newinstance, then the kernel
     * was new enough to also honor the mode=0620,gid=5 options, which
     * guarantee that the new pty already has correct permissions; so
     * while glibc has to fstat(), fchmod(), and fchown() for older
     * kernels, we can skip those steps.  ptyno shouldn't currently be
     * anything other than 0, but let's play it safe.  */
1986 1987 1988
    if ((virAsprintf(ttyName, "/dev/pts/%d", ptyno) < 0) ||
        (virAsprintf(ttyHostPath, "/%s/%s.devpts/%d", LXC_STATE_DIR,
                    ctrl->def->name, ptyno) < 0)) {
1989 1990 1991 1992 1993 1994
        errno = ENOMEM;
        goto cleanup;
    }

    ret = 0;

1995
 cleanup:
1996 1997 1998 1999 2000 2001 2002 2003
    if (ret != 0) {
        VIR_FORCE_CLOSE(*ttymaster);
        VIR_FREE(*ttyName);
    }

    return ret;
}

2004

2005 2006 2007 2008 2009
static int
virLXCControllerSetupPrivateNS(void)
{
    int ret = -1;

2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030
    /*
     * If doing a chroot style setup, we need to prepare
     * a private /dev/pts for the child now, which they
     * will later move into position.
     *
     * This is complex because 'virsh console' needs to
     * use /dev/pts from the host OS, and the guest OS
     * needs to use /dev/pts from the guest.
     *
     * This means that we (libvirt_lxc) need to see and
     * use both /dev/pts instances. We're running in the
     * host OS context though and don't want to expose
     * the guest OS /dev/pts there.
     *
     * Thus we call unshare(CLONE_NS) so that we can see
     * the guest's new /dev/pts, without it becoming
     * visible to the host OS. We also put the root FS
     * into slave mode, just in case it was currently
     * marked as shared
     */

2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043
    if (unshare(CLONE_NEWNS) < 0) {
        virReportSystemError(errno, "%s",
                             _("Cannot unshare mount namespace"));
        goto cleanup;
    }

    if (mount("", "/", NULL, MS_SLAVE|MS_REC, NULL) < 0) {
        virReportSystemError(errno, "%s",
                             _("Failed to switch root mount into slave mode"));
        goto cleanup;
    }

    ret = 0;
2044
 cleanup:
2045 2046 2047 2048
    return ret;
}


2049
static int
2050
virLXCControllerSetupDevPTS(virLXCControllerPtr ctrl)
2051
{
2052
    char *mount_options = NULL;
2053
    char *opts = NULL;
2054 2055
    char *devpts = NULL;
    int ret = -1;
2056
    gid_t ptsgid = 5;
2057

2058
    VIR_DEBUG("Setting up private /dev/pts");
2059

2060 2061
    mount_options = virSecurityManagerGetMountOptions(ctrl->securityManager,
                                                      ctrl->def);
2062

2063 2064 2065
    if (virAsprintf(&devpts, "%s/%s.devpts",
                    LXC_STATE_DIR, ctrl->def->name) < 0 ||
        virAsprintf(&ctrl->devptmx, "%s/%s.devpts/ptmx",
2066
                    LXC_STATE_DIR, ctrl->def->name) < 0)
2067
        goto cleanup;
2068

2069 2070 2071 2072 2073 2074
    if (virFileMakePath(devpts) < 0) {
        virReportSystemError(errno,
                             _("Failed to make path %s"),
                             devpts);
        goto cleanup;
    }
2075

2076 2077 2078 2079 2080
    if (ctrl->def->idmap.ngidmap)
        ptsgid = virLXCControllerLookupUsernsMap(ctrl->def->idmap.gidmap,
                                                 ctrl->def->idmap.ngidmap,
                                                 ptsgid);

2081 2082
    /* XXX should we support gid=X for X!=5 for distros which use
     * a different gid for tty?  */
2083 2084
    if (virAsprintf(&opts, "newinstance,ptmxmode=0666,mode=0620,gid=%u%s",
                    ptsgid, (mount_options ? mount_options : "")) < 0)
2085
        goto cleanup;
2086

2087 2088 2089 2090 2091 2092 2093 2094
    VIR_DEBUG("Mount devpts on %s type=tmpfs flags=%x, opts=%s",
              devpts, MS_NOSUID, opts);
    if (mount("devpts", devpts, "devpts", MS_NOSUID, opts) < 0) {
        virReportSystemError(errno,
                             _("Failed to mount devpts on %s"),
                             devpts);
        goto cleanup;
    }
2095

2096
    if (access(ctrl->devptmx, R_OK) < 0) {
2097 2098 2099
        virReportSystemError(ENOSYS, "%s",
                             _("Kernel does not support private devpts"));
        goto cleanup;
2100 2101
    }

2102 2103
    if ((lxcContainerChown(ctrl->def, ctrl->devptmx) < 0) ||
        (lxcContainerChown(ctrl->def, devpts) < 0))
2104 2105
         goto cleanup;

2106 2107
    ret = 0;

2108
 cleanup:
2109 2110
    VIR_FREE(opts);
    VIR_FREE(devpts);
2111
    VIR_FREE(mount_options);
2112 2113 2114 2115
    return ret;
}


G
Gao feng 已提交
2116 2117 2118 2119 2120 2121
static int
virLXCControllerSetupFuse(virLXCControllerPtr ctrl)
{
    return lxcSetupFuse(&ctrl->fuse, ctrl->def);
}

2122 2123 2124 2125 2126 2127
static int
virLXCControllerStartFuse(virLXCControllerPtr ctrl)
{
    return lxcStartFuse(ctrl->fuse);
}

2128 2129 2130 2131 2132
static int
virLXCControllerSetupConsoles(virLXCControllerPtr ctrl,
                              char **containerTTYPaths)
{
    size_t i;
2133 2134
    int ret = -1;
    char *ttyHostPath = NULL;
2135

2136
    for (i = 0; i < ctrl->nconsoles; i++) {
2137
        VIR_DEBUG("Opening tty on private %s", ctrl->devptmx);
2138
        if (lxcCreateTty(ctrl,
2139
                         &ctrl->consoles[i].contFd,
2140
                         &containerTTYPaths[i], &ttyHostPath) < 0) {
2141
            virReportSystemError(errno, "%s",
2142
                                     _("Failed to allocate tty"));
2143
            goto cleanup;
2144
        }
2145 2146

        /* Change the owner of tty device to the root user of container */
2147
        if (lxcContainerChown(ctrl->def, ttyHostPath) < 0)
2148 2149 2150
            goto cleanup;

        VIR_FREE(ttyHostPath);
2151
    }
2152 2153

    ret = 0;
2154
 cleanup:
2155 2156
    VIR_FREE(ttyHostPath);
    return ret;
2157 2158 2159
}


2160 2161 2162 2163 2164 2165 2166 2167
static void
virLXCControllerEventSend(virLXCControllerPtr ctrl,
                          int procnr,
                          xdrproc_t proc,
                          void *data)
{
    virNetMessagePtr msg;

2168
    if (!ctrl->client) {
2169
        VIR_WARN("Dropping event %d because libvirtd is not connected", procnr);
2170
        return;
2171
    }
2172 2173 2174 2175 2176 2177 2178 2179 2180 2181 2182 2183 2184 2185 2186 2187 2188 2189 2190 2191 2192 2193 2194 2195

    VIR_DEBUG("Send event %d client=%p", procnr, ctrl->client);
    if (!(msg = virNetMessageNew(false)))
        goto error;

    msg->header.prog = virNetServerProgramGetID(ctrl->prog);
    msg->header.vers = virNetServerProgramGetVersion(ctrl->prog);
    msg->header.proc = procnr;
    msg->header.type = VIR_NET_MESSAGE;
    msg->header.serial = 1;
    msg->header.status = VIR_NET_OK;

    if (virNetMessageEncodeHeader(msg) < 0)
        goto error;

    if (virNetMessageEncodePayload(msg, proc, data) < 0)
        goto error;

    VIR_DEBUG("Queue event %d %zu", procnr, msg->bufferLength);
    virNetServerClientSendMessage(ctrl->client, msg);

    xdr_free(proc, data);
    return;

2196
 error:
2197 2198 2199 2200 2201 2202 2203 2204 2205
    virNetMessageFree(msg);
    xdr_free(proc, data);
}


static int
virLXCControllerEventSendExit(virLXCControllerPtr ctrl,
                              int exitstatus)
{
2206
    virLXCMonitorExitEventMsg msg;
2207

2208
    VIR_DEBUG("Exit status %d (client=%p)", exitstatus, ctrl->client);
2209 2210 2211
    memset(&msg, 0, sizeof(msg));
    switch (exitstatus) {
    case 0:
2212
        msg.status = VIR_LXC_MONITOR_EXIT_STATUS_SHUTDOWN;
2213
        break;
2214
    case 1:
2215
        msg.status = VIR_LXC_MONITOR_EXIT_STATUS_REBOOT;
2216
        break;
2217
    default:
2218
        msg.status = VIR_LXC_MONITOR_EXIT_STATUS_ERROR;
2219 2220 2221 2222
        break;
    }

    virLXCControllerEventSend(ctrl,
2223 2224
                              VIR_LXC_MONITOR_PROC_EXIT_EVENT,
                              (xdrproc_t)xdr_virLXCMonitorExitEventMsg,
2225 2226 2227 2228 2229 2230 2231 2232 2233 2234 2235 2236 2237
                              (void*)&msg);

    if (ctrl->client) {
        VIR_DEBUG("Waiting for client to complete dispatch");
        ctrl->inShutdown = true;
        virNetServerClientDelayedClose(ctrl->client);
        virNetServerRun(ctrl->server);
    }
    VIR_DEBUG("Client has gone away");
    return 0;
}


2238 2239 2240 2241
static int
virLXCControllerEventSendInit(virLXCControllerPtr ctrl,
                              pid_t initpid)
{
2242
    virLXCMonitorInitEventMsg msg;
2243 2244 2245 2246 2247 2248

    VIR_DEBUG("Init pid %llu", (unsigned long long)initpid);
    memset(&msg, 0, sizeof(msg));
    msg.initpid = initpid;

    virLXCControllerEventSend(ctrl,
2249 2250
                              VIR_LXC_MONITOR_PROC_INIT_EVENT,
                              (xdrproc_t)xdr_virLXCMonitorInitEventMsg,
2251 2252 2253 2254 2255
                              (void*)&msg);
    return 0;
}


2256
static int
2257
virLXCControllerRun(virLXCControllerPtr ctrl)
2258 2259 2260 2261 2262 2263 2264
{
    int rc = -1;
    int control[2] = { -1, -1};
    int containerhandshake[2] = { -1, -1 };
    char **containerTTYPaths = NULL;
    size_t i;

2265
    if (VIR_ALLOC_N(containerTTYPaths, ctrl->nconsoles) < 0)
2266 2267 2268 2269 2270 2271 2272 2273 2274 2275 2276 2277 2278 2279
        goto cleanup;

    if (socketpair(PF_UNIX, SOCK_STREAM, 0, control) < 0) {
        virReportSystemError(errno, "%s",
                             _("sockpair failed"));
        goto cleanup;
    }

    if (socketpair(PF_UNIX, SOCK_STREAM, 0, containerhandshake) < 0) {
        virReportSystemError(errno, "%s",
                             _("socketpair failed"));
        goto cleanup;
    }

2280 2281 2282
    if (virLXCControllerSetupPrivateNS() < 0)
        goto cleanup;

2283 2284 2285
    if (virLXCControllerSetupLoopDevices(ctrl) < 0)
        goto cleanup;

2286
    if (virLXCControllerSetupResourceLimits(ctrl) < 0)
2287 2288 2289 2290 2291
        goto cleanup;

    if (virLXCControllerSetupDevPTS(ctrl) < 0)
        goto cleanup;

2292 2293 2294
    if (virLXCControllerPopulateDevices(ctrl) < 0)
        goto cleanup;

2295 2296 2297
    if (virLXCControllerSetupAllDisks(ctrl) < 0)
        goto cleanup;

2298 2299 2300
    if (virLXCControllerSetupAllHostdevs(ctrl) < 0)
        goto cleanup;

G
Gao feng 已提交
2301 2302 2303
    if (virLXCControllerSetupFuse(ctrl) < 0)
        goto cleanup;

2304 2305
    if (virLXCControllerSetupConsoles(ctrl, containerTTYPaths) < 0)
        goto cleanup;
2306

2307
    if (lxcSetPersonality(ctrl->def) < 0)
2308
        goto cleanup;
2309

2310
    if ((ctrl->initpid = lxcContainerStart(ctrl->def,
2311
                                           ctrl->securityManager,
2312 2313
                                           ctrl->nveths,
                                           ctrl->veths,
2314 2315
                                           ctrl->npassFDs,
                                           ctrl->passFDs,
2316 2317
                                           control[1],
                                           containerhandshake[1],
2318 2319
                                           ctrl->nconsoles,
                                           containerTTYPaths)) < 0)
2320
        goto cleanup;
2321
    VIR_FORCE_CLOSE(control[1]);
2322
    VIR_FORCE_CLOSE(containerhandshake[1]);
2323

2324 2325 2326
    for (i = 0; i < ctrl->npassFDs; i++)
        VIR_FORCE_CLOSE(ctrl->passFDs[i]);

2327 2328 2329
    if (virLXCControllerSetupCgroupLimits(ctrl) < 0)
        goto cleanup;

2330 2331 2332
    if (virLXCControllerSetupUserns(ctrl) < 0)
        goto cleanup;

2333
    if (virLXCControllerMoveInterfaces(ctrl) < 0)
2334 2335
        goto cleanup;

2336 2337 2338
    if (virLXCControllerStartFuse(ctrl) < 0)
        goto cleanup;

2339 2340 2341
    if (lxcContainerSendContinue(control[0]) < 0) {
        virReportSystemError(errno, "%s",
                             _("Unable to send container continue message"));
2342
        goto cleanup;
2343
    }
2344

2345 2346 2347 2348 2349 2350
    if (lxcContainerWaitForContinue(containerhandshake[0]) < 0) {
        virReportSystemError(errno, "%s",
                             _("error receiving signal from container"));
        goto cleanup;
    }

2351
    /* ...and reduce our privileges */
D
Daniel P. Berrange 已提交
2352 2353 2354
    if (lxcControllerClearCapabilities() < 0)
        goto cleanup;

2355
    if (virLXCControllerDaemonHandshake(ctrl) < 0)
2356 2357
        goto cleanup;

2358
    for (i = 0; i < ctrl->nconsoles; i++)
2359
        if (virLXCControllerConsoleSetNonblocking(&(ctrl->consoles[i])) < 0)
2360
            goto cleanup;
2361

2362 2363 2364 2365 2366 2367
    /* We must not hold open a dbus connection for life
     * of LXC instance, since dbus-daemon is limited to
     * only a few 100 connections by default
     */
    virDBusCloseSystemBus();

2368
    rc = virLXCControllerMain(ctrl);
2369

2370 2371
    virLXCControllerEventSendExit(ctrl, rc);

2372
 cleanup:
2373 2374
    VIR_FORCE_CLOSE(control[0]);
    VIR_FORCE_CLOSE(control[1]);
2375 2376
    VIR_FORCE_CLOSE(containerhandshake[0]);
    VIR_FORCE_CLOSE(containerhandshake[1]);
2377

2378
    for (i = 0; i < ctrl->nconsoles; i++)
2379 2380
        VIR_FREE(containerTTYPaths[i]);
    VIR_FREE(containerTTYPaths);
2381

2382
    virLXCControllerStopInit(ctrl);
2383

2384 2385 2386 2387
    return rc;
}


2388
int main(int argc, char *argv[])
2389 2390
{
    pid_t pid;
2391
    int rc = -1;
2392
    const char *name = NULL;
2393
    size_t nveths = 0;
2394
    char **veths = NULL;
2395
    int handshakeFd = -1;
2396
    bool bg = false;
2397
    const struct option options[] = {
2398 2399 2400 2401
        { "background", 0, NULL, 'b' },
        { "name",   1, NULL, 'n' },
        { "veth",   1, NULL, 'v' },
        { "console", 1, NULL, 'c' },
2402
        { "passfd", 1, NULL, 'p' },
2403
        { "handshakefd", 1, NULL, 's' },
2404
        { "security", 1, NULL, 'S' },
2405 2406 2407
        { "help", 0, NULL, 'h' },
        { 0, 0, 0, 0 },
    };
2408 2409
    int *ttyFDs = NULL;
    size_t nttyFDs = 0;
2410 2411
    int *passFDs = NULL;
    size_t npassFDs = 0;
2412
    virLXCControllerPtr ctrl = NULL;
2413
    size_t i;
2414
    const char *securityDriver = "none";
2415

E
Eric Blake 已提交
2416 2417
    if (setlocale(LC_ALL, "") == NULL ||
        bindtextdomain(PACKAGE, LOCALEDIR) == NULL ||
2418 2419 2420
        textdomain(PACKAGE) == NULL ||
        virThreadInitialize() < 0 ||
        virErrorInitialize() < 0) {
E
Eric Blake 已提交
2421 2422 2423 2424
        fprintf(stderr, _("%s: initialization failed\n"), argv[0]);
        exit(EXIT_FAILURE);
    }

2425 2426 2427
    /* Initialize logging */
    virLogSetFromEnv();

2428 2429
    while (1) {
        int c;
2430

2431
        c = getopt_long(argc, argv, "dn:v:p:m:c:s:h:S:",
2432 2433 2434 2435 2436 2437 2438
                       options, NULL);

        if (c == -1)
            break;

        switch (c) {
        case 'b':
2439
            bg = true;
2440 2441 2442
            break;

        case 'n':
2443
            name = optarg;
2444 2445 2446
            break;

        case 'v':
2447
            if (VIR_REALLOC_N(veths, nveths+1) < 0)
2448
                goto cleanup;
2449
            if (VIR_STRDUP(veths[nveths++], optarg) < 0)
2450 2451 2452 2453
                goto cleanup;
            break;

        case 'c':
2454
            if (VIR_REALLOC_N(ttyFDs, nttyFDs + 1) < 0)
2455 2456
                goto cleanup;
            if (virStrToLong_i(optarg, NULL, 10, &ttyFDs[nttyFDs++]) < 0) {
2457 2458 2459 2460 2461
                fprintf(stderr, "malformed --console argument '%s'", optarg);
                goto cleanup;
            }
            break;

2462 2463 2464 2465 2466 2467 2468 2469 2470
        case 'p':
            if (VIR_REALLOC_N(passFDs, npassFDs + 1) < 0)
                goto cleanup;
            if (virStrToLong_i(optarg, NULL, 10, &passFDs[npassFDs++]) < 0) {
                fprintf(stderr, "malformed --passfd argument '%s'", optarg);
                goto cleanup;
            }
            break;

2471
        case 's':
2472
            if (virStrToLong_i(optarg, NULL, 10, &handshakeFd) < 0) {
2473 2474 2475 2476 2477 2478
                fprintf(stderr, "malformed --handshakefd argument '%s'",
                        optarg);
                goto cleanup;
            }
            break;

2479
        case 'S':
2480
            securityDriver = optarg;
2481 2482
            break;

2483 2484 2485 2486 2487 2488 2489 2490 2491 2492 2493
        case 'h':
        case '?':
            fprintf(stderr, "\n");
            fprintf(stderr, "syntax: %s [OPTIONS]\n", argv[0]);
            fprintf(stderr, "\n");
            fprintf(stderr, "Options\n");
            fprintf(stderr, "\n");
            fprintf(stderr, "  -b, --background\n");
            fprintf(stderr, "  -n NAME, --name NAME\n");
            fprintf(stderr, "  -c FD, --console FD\n");
            fprintf(stderr, "  -v VETH, --veth VETH\n");
2494
            fprintf(stderr, "  -s FD, --handshakefd FD\n");
2495
            fprintf(stderr, "  -S NAME, --security NAME\n");
2496 2497 2498
            fprintf(stderr, "  -h, --help\n");
            fprintf(stderr, "\n");
            goto cleanup;
2499 2500 2501
        }
    }

2502 2503 2504 2505 2506
    if (name == NULL) {
        fprintf(stderr, "%s: missing --name argument for configuration\n", argv[0]);
        goto cleanup;
    }

2507
    if (handshakeFd < 0) {
2508
        fprintf(stderr, "%s: missing --handshakefd argument for container PTY\n",
2509 2510 2511 2512
                argv[0]);
        goto cleanup;
    }

2513
    if (geteuid() != 0) {
2514 2515 2516
        fprintf(stderr, "%s: must be run as the 'root' user\n", argv[0]);
        goto cleanup;
    }
2517

2518
    virEventRegisterDefaultImpl();
2519 2520

    virDBusSetSharedBus(false);
2521

2522
    if (!(ctrl = virLXCControllerNew(name)))
2523
        goto cleanup;
2524

2525 2526
    ctrl->handshakeFd = handshakeFd;

2527 2528 2529 2530 2531
    if (!(ctrl->securityManager = virSecurityManagerNew(securityDriver,
                                                        LXC_DRIVER_NAME,
                                                        false, false, false)))
        goto cleanup;

2532 2533 2534 2535 2536 2537 2538 2539 2540
    if (ctrl->def->seclabels) {
        VIR_DEBUG("Security model %s type %s label %s imagelabel %s",
                  NULLSTR(ctrl->def->seclabels[0]->model),
                  virDomainSeclabelTypeToString(ctrl->def->seclabels[0]->type),
                  NULLSTR(ctrl->def->seclabels[0]->label),
                  NULLSTR(ctrl->def->seclabels[0]->imagelabel));
    } else {
        VIR_DEBUG("Security model not initialized");
    }
2541

2542 2543 2544
    ctrl->veths = veths;
    ctrl->nveths = nveths;

2545 2546 2547
    ctrl->passFDs = passFDs;
    ctrl->npassFDs = npassFDs;

2548
    for (i = 0; i < nttyFDs; i++) {
2549 2550 2551 2552 2553
        if (virLXCControllerAddConsole(ctrl, ttyFDs[i]) < 0)
            goto cleanup;
        ttyFDs[i] = -1;
    }

2554
    if (virLXCControllerValidateNICs(ctrl) < 0)
2555
        goto cleanup;
2556

2557 2558 2559
    if (virLXCControllerGetNICIndexes(ctrl) < 0)
        goto cleanup;

2560 2561 2562
    if (virLXCControllerValidateConsoles(ctrl) < 0)
        goto cleanup;

2563
    if (virLXCControllerSetupServer(ctrl) < 0)
2564
        goto cleanup;
2565

2566 2567 2568
    if (bg) {
        if ((pid = fork()) < 0)
            goto cleanup;
2569

2570
        if (pid > 0) {
2571
            if ((rc = virPidFileWrite(LXC_STATE_DIR, name, pid)) < 0) {
2572
                virReportSystemError(-rc,
2573 2574
                                     _("Unable to write pid file '%s/%s.pid'"),
                                     LXC_STATE_DIR, name);
2575 2576
                _exit(1);
            }
2577

2578 2579 2580 2581
            /* First child now exits, allowing original caller
             * (ie libvirtd's LXC driver to complete their
             * waitpid & continue */
            _exit(0);
2582 2583
        }

E
Eric Blake 已提交
2584
        /* Don't hold on to any cwd we inherit from libvirtd either */
2585
        if (chdir("/") < 0) {
2586
            virReportSystemError(errno, "%s",
2587
                                 _("Unable to change to root dir"));
2588 2589 2590 2591
            goto cleanup;
        }

        if (setsid() < 0) {
2592
            virReportSystemError(errno, "%s",
2593
                                 _("Unable to become session leader"));
2594 2595 2596
            goto cleanup;
        }
    }
2597

2598
    rc = virLXCControllerRun(ctrl);
2599

2600
 cleanup:
2601 2602 2603 2604 2605 2606 2607 2608 2609
    if (rc < 0) {
        virErrorPtr err = virGetLastError();
        if (err && err->message)
            fprintf(stderr, "%s\n", err->message);
        else
            fprintf(stderr, "%s\n",
                    _("Unknown failure in libvirt_lxc startup"));
    }

2610
    virPidFileDelete(LXC_STATE_DIR, name);
2611 2612
    if (ctrl)
        virLXCControllerDeleteInterfaces(ctrl);
2613
    for (i = 0; i < nttyFDs; i++)
2614 2615
        VIR_FORCE_CLOSE(ttyFDs[i]);
    VIR_FREE(ttyFDs);
2616 2617 2618
    for (i = 0; i < npassFDs; i++)
        VIR_FORCE_CLOSE(passFDs[i]);
    VIR_FREE(passFDs);
2619

2620
    virLXCControllerFree(ctrl);
2621

2622
    return rc < 0? EXIT_FAILURE : EXIT_SUCCESS;
2623
}