bridge_driver.c 137.6 KB
Newer Older
1
/*
2
 * bridge_driver.c: core driver methods for managing network
3
 *
4
 * Copyright (C) 2006-2012 Red Hat, Inc.
5 6 7 8 9 10 11 12 13 14 15 16 17
 * Copyright (C) 2006 Daniel P. Berrange
 *
 * This library is free software; you can redistribute it and/or
 * modify it under the terms of the GNU Lesser General Public
 * License as published by the Free Software Foundation; either
 * version 2.1 of the License, or (at your option) any later version.
 *
 * This library is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * Lesser General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public
18
 * License along with this library.  If not, see
O
Osier Yang 已提交
19
 * <http://www.gnu.org/licenses/>.
20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43
 *
 * Author: Daniel P. Berrange <berrange@redhat.com>
 */

#include <config.h>

#include <sys/types.h>
#include <sys/poll.h>
#include <limits.h>
#include <string.h>
#include <stdio.h>
#include <stdarg.h>
#include <stdlib.h>
#include <unistd.h>
#include <errno.h>
#include <sys/utsname.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <signal.h>
#include <paths.h>
#include <pwd.h>
#include <stdio.h>
#include <sys/wait.h>
#include <sys/ioctl.h>
44
#include <net/if.h>
45

46
#include "virterror_internal.h"
47
#include "datatypes.h"
48
#include "bridge_driver.h"
49
#include "network_conf.h"
50
#include "device_conf.h"
51 52
#include "driver.h"
#include "buf.h"
53
#include "virpidfile.h"
54
#include "util.h"
55
#include "command.h"
56 57 58
#include "memory.h"
#include "uuid.h"
#include "iptables.h"
59
#include "logging.h"
60
#include "dnsmasq.h"
61
#include "configmake.h"
62
#include "virnetdev.h"
63
#include "pci.h"
64 65
#include "virnetdevbridge.h"
#include "virnetdevtap.h"
66
#include "virnetdevvportprofile.h"
67
#include "virdbus.h"
68
#include "virfile.h"
69

70 71
#define NETWORK_PID_DIR LOCALSTATEDIR "/run/libvirt/network"
#define NETWORK_STATE_DIR LOCALSTATEDIR "/lib/libvirt/network"
72

73
#define DNSMASQ_STATE_DIR LOCALSTATEDIR "/lib/libvirt/dnsmasq"
74
#define RADVD_STATE_DIR LOCALSTATEDIR "/lib/libvirt/radvd"
75

76 77
#define VIR_FROM_THIS VIR_FROM_NETWORK

78 79
/* Main driver state */
struct network_driver {
80
    virMutex lock;
81

82
    virNetworkObjList networks;
83 84 85 86 87 88 89

    iptablesContext *iptables;
    char *networkConfigDir;
    char *networkAutostartDir;
    char *logDir;
};

90 91 92

static void networkDriverLock(struct network_driver *driver)
{
93
    virMutexLock(&driver->lock);
94 95 96
}
static void networkDriverUnlock(struct network_driver *driver)
{
97
    virMutexUnlock(&driver->lock);
98 99
}

100 101
static int networkShutdown(void);

102 103 104 105 106 107 108
static int networkStartNetwork(struct network_driver *driver,
                               virNetworkObjPtr network);

static int networkShutdownNetwork(struct network_driver *driver,
                                  virNetworkObjPtr network);

static int networkStartNetworkVirtual(struct network_driver *driver,
109
                                     virNetworkObjPtr network);
110

111 112 113 114 115 116 117
static int networkShutdownNetworkVirtual(struct network_driver *driver,
                                        virNetworkObjPtr network);

static int networkStartNetworkExternal(struct network_driver *driver,
                                     virNetworkObjPtr network);

static int networkShutdownNetworkExternal(struct network_driver *driver,
118
                                        virNetworkObjPtr network);
119

120
static void networkReloadIptablesRules(struct network_driver *driver);
121
static void networkRefreshDaemons(struct network_driver *driver);
122

123 124
static struct network_driver *driverState = NULL;

125
static char *
126
networkDnsmasqLeaseFileNameDefault(const char *netname)
127 128 129
{
    char *leasefile;

130 131
    ignore_value(virAsprintf(&leasefile, DNSMASQ_STATE_DIR "/%s.leases",
                             netname));
132 133 134
    return leasefile;
}

135 136 137
networkDnsmasqLeaseFileNameFunc networkDnsmasqLeaseFileName =
    networkDnsmasqLeaseFileNameDefault;

138 139 140 141 142 143
static char *
networkRadvdPidfileBasename(const char *netname)
{
    /* this is simple but we want to be sure it's consistently done */
    char *pidfilebase;

144
    ignore_value(virAsprintf(&pidfilebase, "%s-radvd", netname));
145 146 147 148 149 150 151 152
    return pidfilebase;
}

static char *
networkRadvdConfigFileName(const char *netname)
{
    char *configfile;

153 154
    ignore_value(virAsprintf(&configfile, RADVD_STATE_DIR "/%s-radvd.conf",
                             netname));
155 156
    return configfile;
}
157

158 159 160
static char *
networkBridgeDummyNicName(const char *brname)
{
161
    static const char dummyNicSuffix[] = "-nic";
162 163
    char *nicname;

164 165 166 167 168 169 170
    if (strlen(brname) + sizeof(dummyNicSuffix) > IFNAMSIZ) {
        /* because the length of an ifname is limited to IFNAMSIZ-1
         * (usually 15), and we're adding 4 more characters, we must
         * truncate the original name to 11 to fit. In order to catch
         * a possible numeric ending (eg virbr0, virbr1, etc), we grab
         * the first 8 and last 3 characters of the string.
         */
171 172 173 174 175
        ignore_value(virAsprintf(&nicname, "%.*s%s%s",
                                 /* space for last 3 chars + "-nic" + NULL */
                                 (int)(IFNAMSIZ - (3 + sizeof(dummyNicSuffix))),
                                 brname, brname + strlen(brname) - 3,
                                 dummyNicSuffix));
176
    } else {
177
        ignore_value(virAsprintf(&nicname, "%s%s", brname, dummyNicSuffix));
178
    }
179 180 181
    return nicname;
}

182 183 184 185 186 187 188 189 190 191 192
static void
networkFindActiveConfigs(struct network_driver *driver) {
    unsigned int i;

    for (i = 0 ; i < driver->networks.count ; i++) {
        virNetworkObjPtr obj = driver->networks.objs[i];
        virNetworkDefPtr tmp;
        char *config;

        virNetworkObjLock(obj);

193
        if ((config = virNetworkConfigFile(NETWORK_STATE_DIR,
194 195 196 197 198 199 200 201 202 203 204 205
                                           obj->def->name)) == NULL) {
            virNetworkObjUnlock(obj);
            continue;
        }

        if (access(config, R_OK) < 0) {
            VIR_FREE(config);
            virNetworkObjUnlock(obj);
            continue;
        }

        /* Try and load the live config */
206
        tmp = virNetworkDefParseFile(config);
207 208 209 210 211 212 213 214
        VIR_FREE(config);
        if (tmp) {
            obj->newDef = obj->def;
            obj->def = tmp;
        }

        /* If bridge exists, then mark it active */
        if (obj->def->bridge &&
H
Hu Tao 已提交
215
            virNetDevExists(obj->def->bridge) == 1) {
216 217
            obj->active = 1;

218 219
            /* Try and read dnsmasq/radvd pids if any */
            if (obj->def->ips && (obj->def->nips > 0)) {
220 221 222 223
                char *radvdpidbase;

                ignore_value(virPidFileReadIfAlive(NETWORK_PID_DIR, obj->def->name,
                                                   &obj->dnsmasqPid, DNSMASQ));
224

225
                if (!(radvdpidbase = networkRadvdPidfileBasename(obj->def->name))) {
226
                    virReportOOMError();
227 228
                    goto cleanup;
                }
229 230
                ignore_value(virPidFileReadIfAlive(NETWORK_PID_DIR, radvdpidbase,
                                                   &obj->radvdPid, RADVD));
231
                VIR_FREE(radvdpidbase);
232 233 234
            }
        }

235
    cleanup:
236 237 238 239 240
        virNetworkObjUnlock(obj);
    }
}


241 242 243
static void
networkAutostartConfigs(struct network_driver *driver) {
    unsigned int i;
244

245
    for (i = 0 ; i < driver->networks.count ; i++) {
246
        virNetworkObjLock(driver->networks.objs[i]);
247
        if (driver->networks.objs[i]->autostart &&
248 249
            !virNetworkObjIsActive(driver->networks.objs[i])) {
            if (networkStartNetwork(driver, driver->networks.objs[i]) < 0) {
250
            /* failed to start but already logged */
251
            }
252
        }
253
        virNetworkObjUnlock(driver->networks.objs[i]);
254 255 256
    }
}

257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275
#if HAVE_FIREWALLD
static DBusHandlerResult
firewalld_dbus_filter_bridge(DBusConnection *connection ATTRIBUTE_UNUSED,
                             DBusMessage *message, void *user_data) {
    struct network_driver *_driverState = user_data;

    if (dbus_message_is_signal(message, DBUS_INTERFACE_DBUS,
                               "NameOwnerChanged") ||
        dbus_message_is_signal(message, "org.fedoraproject.FirewallD1",
                               "Reloaded"))
    {
        VIR_DEBUG("Reload in bridge_driver because of firewalld.");
        networkReloadIptablesRules(_driverState);
    }

    return DBUS_HANDLER_RESULT_NOT_YET_HANDLED;
}
#endif

276 277 278 279 280 281
/**
 * networkStartup:
 *
 * Initialization function for the QEmu daemon
 */
static int
282
networkStartup(int privileged) {
283
    char *base = NULL;
284 285 286
#ifdef HAVE_FIREWALLD
    DBusConnection *sysbus = NULL;
#endif
287 288

    if (VIR_ALLOC(driverState) < 0)
289
        goto error;
290

291 292 293 294
    if (virMutexInit(&driverState->lock) < 0) {
        VIR_FREE(driverState);
        goto error;
    }
295 296
    networkDriverLock(driverState);

297
    if (privileged) {
298
        if (virAsprintf(&driverState->logDir,
299
                        "%s/log/libvirt/qemu", LOCALSTATEDIR) == -1)
300 301
            goto out_of_memory;

302
        if ((base = strdup (SYSCONFDIR "/libvirt")) == NULL)
303 304
            goto out_of_memory;
    } else {
305
        char *userdir = virGetUserCacheDirectory();
306 307 308

        if (!userdir)
            goto error;
309

310
        if (virAsprintf(&driverState->logDir,
311
                        "%s/qemu/log", userdir) == -1) {
312
            VIR_FREE(userdir);
313
            goto out_of_memory;
314
        }
315
        VIR_FREE(userdir);
316

317
        userdir = virGetUserConfigDirectory();
318
        if (virAsprintf(&base, "%s", userdir) == -1) {
319
            VIR_FREE(userdir);
320 321
            goto out_of_memory;
        }
322
        VIR_FREE(userdir);
323 324 325 326 327
    }

    /* Configuration paths are either ~/.libvirt/qemu/... (session) or
     * /etc/libvirt/qemu/... (system).
     */
328
    if (virAsprintf(&driverState->networkConfigDir, "%s/qemu/networks", base) == -1)
329 330
        goto out_of_memory;

331 332
    if (virAsprintf(&driverState->networkAutostartDir, "%s/qemu/networks/autostart",
                    base) == -1)
333 334 335 336
        goto out_of_memory;

    VIR_FREE(base);

337
    if (!(driverState->iptables = iptablesContextNew())) {
338
        goto out_of_memory;
339 340 341
    }


342
    if (virNetworkLoadAllConfigs(&driverState->networks,
343
                                 driverState->networkConfigDir,
344 345 346
                                 driverState->networkAutostartDir) < 0)
        goto error;

347
    networkFindActiveConfigs(driverState);
348
    networkReloadIptablesRules(driverState);
349
    networkRefreshDaemons(driverState);
350 351
    networkAutostartConfigs(driverState);

352 353
    networkDriverUnlock(driverState);

354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379
#ifdef HAVE_FIREWALLD
    if (!(sysbus = virDBusGetSystemBus())) {
        virErrorPtr err = virGetLastError();
        VIR_WARN("DBus not available, disabling firewalld support "
                 "in bridge_driver: %s", err->message);
    } else {
        /* add matches for
         * NameOwnerChanged on org.freedesktop.DBus for firewalld start/stop
         * Reloaded on org.fedoraproject.FirewallD1 for firewalld reload
         */
        dbus_bus_add_match(sysbus,
                           "type='signal'"
                           ",interface='"DBUS_INTERFACE_DBUS"'"
                           ",member='NameOwnerChanged'"
                           ",arg0='org.fedoraproject.FirewallD1'",
                           NULL);
        dbus_bus_add_match(sysbus,
                           "type='signal'"
                           ",interface='org.fedoraproject.FirewallD1'"
                           ",member='Reloaded'",
                           NULL);
        dbus_connection_add_filter(sysbus, firewalld_dbus_filter_bridge,
                                   driverState, NULL);
    }
#endif

380 381
    return 0;

382
out_of_memory:
383
    virReportOOMError();
384 385

error:
386 387 388
    if (driverState)
        networkDriverUnlock(driverState);

389
    VIR_FREE(base);
390
    networkShutdown();
391 392 393 394 395 396 397 398 399 400 401
    return -1;
}

/**
 * networkReload:
 *
 * Function to restart the QEmu daemon, it will recheck the configuration
 * files and update its state and the networking
 */
static int
networkReload(void) {
402 403 404
    if (!driverState)
        return 0;

405
    networkDriverLock(driverState);
406
    virNetworkLoadAllConfigs(&driverState->networks,
407 408
                             driverState->networkConfigDir,
                             driverState->networkAutostartDir);
409
    networkReloadIptablesRules(driverState);
410
    networkRefreshDaemons(driverState);
411
    networkAutostartConfigs(driverState);
412
    networkDriverUnlock(driverState);
413 414 415 416 417 418 419 420 421 422 423 424 425
    return 0;
}

/**
 * networkActive:
 *
 * Checks if the QEmu daemon is active, i.e. has an active domain or
 * an active network
 *
 * Returns 1 if active, 0 otherwise
 */
static int
networkActive(void) {
426
    unsigned int i;
427
    int active = 0;
428

429 430 431
    if (!driverState)
        return 0;

432
    networkDriverLock(driverState);
433 434
    for (i = 0 ; i < driverState->networks.count ; i++) {
        virNetworkObjPtr net = driverState->networks.objs[i];
435
        virNetworkObjLock(net);
D
Daniel P. Berrange 已提交
436
        if (virNetworkObjIsActive(net))
437
            active = 1;
438
        virNetworkObjUnlock(net);
439
    }
440
    networkDriverUnlock(driverState);
441
    return active;
442 443 444 445 446 447 448 449 450 451 452 453
}

/**
 * networkShutdown:
 *
 * Shutdown the QEmu daemon, it will stop all active domains and networks
 */
static int
networkShutdown(void) {
    if (!driverState)
        return -1;

454 455
    networkDriverLock(driverState);

456
    /* free inactive networks */
457
    virNetworkObjListFree(&driverState->networks);
458 459 460 461 462 463 464 465

    VIR_FREE(driverState->logDir);
    VIR_FREE(driverState->networkConfigDir);
    VIR_FREE(driverState->networkAutostartDir);

    if (driverState->iptables)
        iptablesContextFree(driverState->iptables);

466
    networkDriverUnlock(driverState);
467
    virMutexDestroy(&driverState->lock);
468

469 470 471 472 473 474
    VIR_FREE(driverState);

    return 0;
}


475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533
/* networkKillDaemon:
 *
 * kill the specified pid/name, and wait a bit to make sure it's dead.
 */
static int
networkKillDaemon(pid_t pid, const char *daemonName, const char *networkName)
{
    int ii, ret = -1;
    const char *signame = "TERM";

    /* send SIGTERM, then wait up to 3 seconds for the process to
     * disappear, send SIGKILL, then wait for up to another 2
     * seconds. If that fails, log a warning and continue, hoping
     * for the best.
     */
    for (ii = 0; ii < 25; ii++) {
        int signum = 0;
        if (ii == 0)
            signum = SIGTERM;
        else if (ii == 15) {
            signum = SIGKILL;
            signame = "KILL";
        }
        if (kill(pid, signum) < 0) {
            if (errno == ESRCH) {
                ret = 0;
            } else {
                char ebuf[1024];
                VIR_WARN("Failed to terminate %s process %d "
                         "for network '%s' with SIG%s: %s",
                         daemonName, pid, networkName, signame,
                         virStrerror(errno, ebuf, sizeof(ebuf)));
            }
            goto cleanup;
        }
        /* NB: since networks have no reference count like
         * domains, there is no safe way to unlock the network
         * object temporarily, and so we can't follow the
         * procedure used by the qemu driver of 1) unlock driver
         * 2) sleep, 3) add ref to object 4) unlock object, 5)
         * re-lock driver, 6) re-lock object. We may need to add
         * that functionality eventually, but for now this
         * function is rarely used and, at worst, leaving the
         * network driver locked during this loop of sleeps will
         * have the effect of holding up any other thread trying
         * to make modifications to a network for up to 5 seconds;
         * since modifications to networks are much less common
         * than modifications to domains, this seems a reasonable
         * tradeoff in exchange for less code disruption.
         */
        usleep(20 * 1000);
    }
    VIR_WARN("Timed out waiting after SIG%s to %s process %d "
             "(network '%s')",
             signame, daemonName, pid, networkName);
cleanup:
    return ret;
}

534 535 536 537
static int
networkBuildDnsmasqHostsfile(dnsmasqContext *dctx,
                             virNetworkIpDefPtr ipdef,
                             virNetworkDNSDefPtr dnsdef)
538
{
539
    unsigned int i, j;
540

541 542
    for (i = 0; i < ipdef->nhosts; i++) {
        virNetworkDHCPHostDefPtr host = &(ipdef->hosts[i]);
543
        if ((host->mac) && VIR_SOCKET_ADDR_VALID(&host->ip))
544 545
            if (dnsmasqAddDhcpHost(dctx, host->mac, &host->ip, host->name) < 0)
                return -1;
546
    }
547

548 549 550
    if (dnsdef) {
        for (i = 0; i < dnsdef->nhosts; i++) {
            virNetworkDNSHostsDefPtr host = &(dnsdef->hosts[i]);
551
            if (VIR_SOCKET_ADDR_VALID(&host->ip)) {
552
                for (j = 0; j < host->nnames; j++)
553 554
                    if (dnsmasqAddHost(dctx, &host->ip, host->names[j]) < 0)
                        return -1;
555 556
            }
        }
557 558
    }

559
    return 0;
560 561 562
}


563
static int
564
networkBuildDnsmasqArgv(virNetworkObjPtr network,
565
                        virNetworkIpDefPtr ipdef,
566
                        const char *pidfile,
567 568 569
                        virCommandPtr cmd,
                        dnsmasqContext *dctx)
{
570
    int r, ret = -1;
571
    int nbleases = 0;
572
    int ii;
573 574 575 576
    char *record = NULL;
    char *recordPort = NULL;
    char *recordWeight = NULL;
    char *recordPriority = NULL;
577
    virNetworkIpDefPtr tmpipdef;
578 579

    /*
580
     * NB, be careful about syntax for dnsmasq options in long format.
581 582 583 584 585 586 587 588 589 590 591 592 593
     *
     * If the flag has a mandatory argument, it can be given using
     * either syntax:
     *
     *     --foo bar
     *     --foo=bar
     *
     * If the flag has a optional argument, it *must* be given using
     * the syntax:
     *
     *     --foo=bar
     *
     * It is hard to determine whether a flag is optional or not,
594 595
     * without reading the dnsmasq source :-( The manpage is not
     * very explicit on this.
596
     */
597 598 599 600 601

    /*
     * Needed to ensure dnsmasq uses same algorithm for processing
     * multiple namedriver entries in /etc/resolv.conf as GLibC.
     */
602
    virCommandAddArgList(cmd, "--strict-order", "--bind-interfaces", NULL);
603

604
    if (network->def->domain)
605 606 607 608
        virCommandAddArgPair(cmd, "--domain", network->def->domain);
    /* need to specify local even if no domain specified */
    virCommandAddArgFormat(cmd, "--local=/%s/",
                           network->def->domain ? network->def->domain : "");
609
    virCommandAddArg(cmd, "--domain-needed");
610

611 612
    if (pidfile)
        virCommandAddArgPair(cmd, "--pid-file", pidfile);
613

614
    /* *no* conf file */
615
    virCommandAddArg(cmd, "--conf-file=");
616

617 618 619
    virCommandAddArgList(cmd,
                         "--except-interface", "lo",
                         NULL);
620

621 622
    /* If this is an isolated network, set the default route option
     * (3) to be empty to avoid setting a default route that's
623 624 625 626
     * guaranteed to not work, and set --no-resolv so that no dns
     * requests are forwarded on to the dns server listed in the
     * host's /etc/resolv.conf (since this could be used as a channel
     * to build a connection to the outside).
627
     */
628 629 630 631
    if (network->def->forwardType == VIR_NETWORK_FORWARD_NONE) {
        virCommandAddArgList(cmd, "--dhcp-option=3",
                             "--no-resolv", NULL);
    }
632

633 634 635 636 637
    if (network->def->dns != NULL) {
        virNetworkDNSDefPtr dns = network->def->dns;
        int i;

        for (i = 0; i < dns->ntxtrecords; i++) {
E
Eric Blake 已提交
638
            virCommandAddArgFormat(cmd, "--txt-record=%s,%s",
639 640
                                   dns->txtrecords[i].name,
                                   dns->txtrecords[i].value);
641
        }
642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677

        for (i = 0; i < dns->nsrvrecords; i++) {
            if (dns->srvrecords[i].service && dns->srvrecords[i].protocol) {
                if (dns->srvrecords[i].port) {
                    if (virAsprintf(&recordPort, "%d", dns->srvrecords[i].port) < 0) {
                        virReportOOMError();
                        goto cleanup;
                    }
                }
                if (dns->srvrecords[i].priority) {
                    if (virAsprintf(&recordPriority, "%d", dns->srvrecords[i].priority) < 0) {
                        virReportOOMError();
                        goto cleanup;
                    }
                }
                if (dns->srvrecords[i].weight) {
                    if (virAsprintf(&recordWeight, "%d", dns->srvrecords[i].weight) < 0) {
                        virReportOOMError();
                        goto cleanup;
                    }
                }

                if (virAsprintf(&record, "%s.%s.%s,%s,%s,%s,%s",
                                dns->srvrecords[i].service,
                                dns->srvrecords[i].protocol,
                                dns->srvrecords[i].domain   ? dns->srvrecords[i].domain : "",
                                dns->srvrecords[i].target   ? dns->srvrecords[i].target : "",
                                recordPort                  ? recordPort                : "",
                                recordPriority              ? recordPriority            : "",
                                recordWeight                ? recordWeight              : "") < 0) {
                    virReportOOMError();
                    goto cleanup;
                }

                virCommandAddArgPair(cmd, "--srv-host", record);
                VIR_FREE(record);
678 679 680
                VIR_FREE(recordPort);
                VIR_FREE(recordWeight);
                VIR_FREE(recordPriority);
681 682
            }
        }
683 684
    }

685 686 687 688 689 690 691 692 693 694 695
    /*
     * --interface does not actually work with dnsmasq < 2.47,
     * due to DAD for ipv6 addresses on the interface.
     *
     * virCommandAddArgList(cmd, "--interface", ipdef->bridge, NULL);
     *
     * So listen on all defined IPv[46] addresses
     */
    for (ii = 0;
         (tmpipdef = virNetworkDefGetIpByIndex(network->def, AF_UNSPEC, ii));
         ii++) {
696
        char *ipaddr = virSocketAddrFormat(&tmpipdef->address);
697 698 699 700 701 702
        if (!ipaddr)
            goto cleanup;
        virCommandAddArgList(cmd, "--listen-address", ipaddr, NULL);
        VIR_FREE(ipaddr);
    }

703
    if (ipdef) {
704
        for (r = 0 ; r < ipdef->nranges ; r++) {
705
            char *saddr = virSocketAddrFormat(&ipdef->ranges[r].start);
706 707
            if (!saddr)
                goto cleanup;
708
            char *eaddr = virSocketAddrFormat(&ipdef->ranges[r].end);
709 710 711 712 713 714
            if (!eaddr) {
                VIR_FREE(saddr);
                goto cleanup;
            }
            virCommandAddArg(cmd, "--dhcp-range");
            virCommandAddArgFormat(cmd, "%s,%s", saddr, eaddr);
715
            VIR_FREE(saddr);
716
            VIR_FREE(eaddr);
717 718
            nbleases += virSocketAddrGetRange(&ipdef->ranges[r].start,
                                              &ipdef->ranges[r].end);
719
        }
720

721 722 723 724 725 726
        /*
         * For static-only DHCP, i.e. with no range but at least one host element,
         * we have to add a special --dhcp-range option to enable the service in
         * dnsmasq.
         */
        if (!ipdef->nranges && ipdef->nhosts) {
727
            char *bridgeaddr = virSocketAddrFormat(&ipdef->address);
728 729 730 731 732 733
            if (!bridgeaddr)
                goto cleanup;
            virCommandAddArg(cmd, "--dhcp-range");
            virCommandAddArgFormat(cmd, "%s,static", bridgeaddr);
            VIR_FREE(bridgeaddr);
        }
734

735
        if (ipdef->nranges > 0) {
736 737 738 739 740
            char *leasefile = networkDnsmasqLeaseFileName(network->def->name);
            if (!leasefile)
                goto cleanup;
            virCommandAddArgFormat(cmd, "--dhcp-leasefile=%s", leasefile);
            VIR_FREE(leasefile);
741 742
            virCommandAddArgFormat(cmd, "--dhcp-lease-max=%d", nbleases);
        }
743

744 745
        if (ipdef->nranges || ipdef->nhosts)
            virCommandAddArg(cmd, "--dhcp-no-override");
746

747 748 749 750
        /* add domain to any non-qualified hostnames in /etc/hosts or addn-hosts */
        if (network->def->domain)
           virCommandAddArg(cmd, "--expand-hosts");

751 752 753
        if (networkBuildDnsmasqHostsfile(dctx, ipdef, network->def->dns) < 0)
            goto cleanup;

754 755 756 757 758
        /* Even if there are currently no static hosts, if we're
         * listening for DHCP, we should write a 0-length hosts
         * file to allow for runtime additions.
         */
        if (ipdef->nranges || ipdef->nhosts)
759 760
            virCommandAddArgPair(cmd, "--dhcp-hostsfile",
                                 dctx->hostsfile->path);
761 762 763 764 765 766

        /* Likewise, always create this file and put it on the commandline, to allow for
         * for runtime additions.
         */
        virCommandAddArgPair(cmd, "--addn-hosts",
                             dctx->addnhostsfile->path);
767

768 769 770 771 772 773 774
        if (ipdef->tftproot) {
            virCommandAddArgList(cmd, "--enable-tftp",
                                 "--tftp-root", ipdef->tftproot,
                                 NULL);
        }
        if (ipdef->bootfile) {
            virCommandAddArg(cmd, "--dhcp-boot");
775 776
            if (VIR_SOCKET_ADDR_VALID(&ipdef->bootserver)) {
                char *bootserver = virSocketAddrFormat(&ipdef->bootserver);
777

778 779 780 781 782 783 784 785
                if (!bootserver)
                    goto cleanup;
                virCommandAddArgFormat(cmd, "%s%s%s",
                                       ipdef->bootfile, ",,", bootserver);
                VIR_FREE(bootserver);
            } else {
                virCommandAddArg(cmd, ipdef->bootfile);
            }
786
        }
787 788
    }

789 790
    ret = 0;
cleanup:
791 792 793 794
    VIR_FREE(record);
    VIR_FREE(recordPort);
    VIR_FREE(recordWeight);
    VIR_FREE(recordPriority);
795
    return ret;
796 797
}

798 799
int
networkBuildDhcpDaemonCommandLine(virNetworkObjPtr network, virCommandPtr *cmdout,
800
                                  char *pidfile, dnsmasqContext *dctx)
801
{
802
    virCommandPtr cmd = NULL;
803
    int ret = -1, ii;
804
    virNetworkIpDefPtr ipdef;
805 806

    network->dnsmasqPid = -1;
807

808 809 810 811 812 813 814
    /* Look for first IPv4 address that has dhcp defined. */
    /* We support dhcp config on 1 IPv4 interface only. */
    for (ii = 0;
         (ipdef = virNetworkDefGetIpByIndex(network->def, AF_INET, ii));
         ii++) {
        if (ipdef->nranges || ipdef->nhosts)
            break;
815
    }
816
    /* If no IPv4 addresses had dhcp info, pick the first (if there were any). */
817
    if (!ipdef)
818 819 820 821 822 823 824 825
        ipdef = virNetworkDefGetIpByIndex(network->def, AF_INET, 0);

    /* If there are no IP addresses at all (v4 or v6), return now, since
     * there won't be any address for dnsmasq to listen on anyway.
     * If there are any addresses, even if no dhcp ranges or static entries,
     * we should continue and run dnsmasq, just for the DNS capabilities.
     */
    if (!virNetworkDefGetIpByIndex(network->def, AF_UNSPEC, 0))
826
        return 0;
827

828
    cmd = virCommandNew(DNSMASQ);
829
    if (networkBuildDnsmasqArgv(network, ipdef, pidfile, cmd, dctx) < 0) {
830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847
        goto cleanup;
    }

    if (cmdout)
        *cmdout = cmd;
    ret = 0;
cleanup:
    if (ret < 0)
        virCommandFree(cmd);
    return ret;
}

static int
networkStartDhcpDaemon(virNetworkObjPtr network)
{
    virCommandPtr cmd = NULL;
    char *pidfile = NULL;
    int ret = -1;
848
    dnsmasqContext *dctx = NULL;
849

850 851 852 853 854 855
    if (!virNetworkDefGetIpByIndex(network->def, AF_UNSPEC, 0)) {
        /* no IPv6 addresses, so we don't need to run radvd */
        ret = 0;
        goto cleanup;
    }

856 857
    if (virFileMakePath(NETWORK_PID_DIR) < 0) {
        virReportSystemError(errno,
858 859
                             _("cannot create directory %s"),
                             NETWORK_PID_DIR);
860
        goto cleanup;
861
    }
862 863
    if (virFileMakePath(NETWORK_STATE_DIR) < 0) {
        virReportSystemError(errno,
864 865
                             _("cannot create directory %s"),
                             NETWORK_STATE_DIR);
866
        goto cleanup;
867 868
    }

869
    if (!(pidfile = virPidFileBuildPath(NETWORK_PID_DIR, network->def->name))) {
870
        virReportOOMError();
871
        goto cleanup;
872 873
    }

874 875
    if (virFileMakePath(DNSMASQ_STATE_DIR) < 0) {
        virReportSystemError(errno,
876 877 878 879 880
                             _("cannot create directory %s"),
                             DNSMASQ_STATE_DIR);
        goto cleanup;
    }

881 882 883 884 885 886 887 888 889 890
    dctx = dnsmasqContextNew(network->def->name, DNSMASQ_STATE_DIR);
    if (dctx == NULL)
        goto cleanup;

    ret = networkBuildDhcpDaemonCommandLine(network, &cmd, pidfile, dctx);
    if (ret < 0)
        goto cleanup;

    ret = dnsmasqSave(dctx);
    if (ret < 0)
891
        goto cleanup;
892

G
Guido Günther 已提交
893 894
    ret = virCommandRun(cmd, NULL);
    if (ret < 0) {
895
        goto cleanup;
G
Guido Günther 已提交
896
    }
897 898

    /*
899 900 901 902 903
     * There really is no race here - when dnsmasq daemonizes, its
     * leader process stays around until its child has actually
     * written its pidfile. So by time virCommandRun exits it has
     * waitpid'd and guaranteed the proess has started and written a
     * pid
904 905
     */

906
    ret = virPidFileRead(NETWORK_PID_DIR, network->def->name,
907 908
                         &network->dnsmasqPid);
    if (ret < 0)
909
        goto cleanup;
910

911 912 913
    ret = 0;
cleanup:
    VIR_FREE(pidfile);
914
    virCommandFree(cmd);
915
    dnsmasqContextFree(dctx);
916 917 918
    return ret;
}

919 920 921 922 923 924
/* networkRefreshDhcpDaemon:
 *  Update dnsmasq config files, then send a SIGHUP so that it rereads
 *  them.
 *
 *  Returns 0 on success, -1 on failure.
 */
925
static int
926
networkRefreshDhcpDaemon(virNetworkObjPtr network)
927
{
928
    int ret = -1, ii;
929
    virNetworkIpDefPtr ipdef;
930
    dnsmasqContext *dctx = NULL;
931

932 933 934
    /* if there's no running dnsmasq, just start it */
    if (network->dnsmasqPid <= 0 || (kill(network->dnsmasqPid, 0) < 0))
        return networkStartDhcpDaemon(network);
935

936 937 938 939 940 941 942
    /* Look for first IPv4 address that has dhcp defined. */
    /* We support dhcp config on 1 IPv4 interface only. */
    for (ii = 0;
         (ipdef = virNetworkDefGetIpByIndex(network->def, AF_INET, ii));
         ii++) {
        if (ipdef->nranges || ipdef->nhosts)
            break;
943
    }
944 945 946
    /* If no IPv4 addresses had dhcp info, pick the first (if there were any). */
    if (!ipdef)
        ipdef = virNetworkDefGetIpByIndex(network->def, AF_INET, 0);
947

948 949 950
    if (!ipdef) {
        /* no <ip> elements, so nothing to do */
        return 0;
951 952
    }

953
    if (!(dctx = dnsmasqContextNew(network->def->name, DNSMASQ_STATE_DIR)))
954
        goto cleanup;
955 956 957 958 959

    if (networkBuildDnsmasqHostsfile(dctx, ipdef, network->def->dns) < 0)
       goto cleanup;

    if ((ret = dnsmasqSave(dctx)) < 0)
960
        goto cleanup;
961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982

    ret = kill(network->dnsmasqPid, SIGHUP);
cleanup:
    dnsmasqContextFree(dctx);
    return ret;
}

/* networkRestartDhcpDaemon:
 *
 * kill and restart dnsmasq, in order to update any config that is on
 * the dnsmasq commandline (and any placed in separate config files).
 *
 *  Returns 0 on success, -1 on failure.
 */
static int
networkRestartDhcpDaemon(virNetworkObjPtr network)
{
    /* if there is a running dnsmasq, kill it */
    if (network->dnsmasqPid > 0) {
        networkKillDaemon(network->dnsmasqPid, "dnsmasq",
                          network->def->name);
        network->dnsmasqPid = -1;
983
    }
984 985 986 987 988 989 990
    /* now start dnsmasq if it should be started */
    return networkStartDhcpDaemon(network);
}

static int
networkRadvdConfContents(virNetworkObjPtr network, char **configstr)
{
E
Eric Blake 已提交
991
    virBuffer configbuf = VIR_BUFFER_INITIALIZER;
992 993 994 995 996
    int ret = -1, ii;
    virNetworkIpDefPtr ipdef;
    bool v6present = false;

    *configstr = NULL;
997

998 999 1000
    /* create radvd config file appropriate for this network;
     * IgnoreIfMissing allows radvd to start even when the bridge is down
     */
1001
    virBufferAsprintf(&configbuf, "interface %s\n"
1002 1003 1004 1005
                      "{\n"
                      "  AdvSendAdvert on;\n"
                      "  AdvManagedFlag off;\n"
                      "  AdvOtherConfigFlag off;\n"
1006
                      "  IgnoreIfMissing on;\n"
1007 1008
                      "\n",
                      network->def->bridge);
1009 1010

    /* add a section for each IPv6 address in the config */
1011 1012 1013 1014 1015 1016
    for (ii = 0;
         (ipdef = virNetworkDefGetIpByIndex(network->def, AF_INET6, ii));
         ii++) {
        int prefix;
        char *netaddr;

1017
        v6present = true;
1018 1019
        prefix = virNetworkIpDefPrefix(ipdef);
        if (prefix < 0) {
1020 1021 1022
            virReportError(VIR_ERR_INTERNAL_ERROR,
                           _("bridge '%s' has an invalid prefix"),
                           network->def->bridge);
1023 1024
            goto cleanup;
        }
1025
        if (!(netaddr = virSocketAddrFormat(&ipdef->address)))
1026
            goto cleanup;
1027
        virBufferAsprintf(&configbuf,
1028 1029 1030 1031 1032 1033 1034 1035 1036 1037
                          "  prefix %s/%d\n"
                          "  {\n"
                          "    AdvOnLink on;\n"
                          "    AdvAutonomous on;\n"
                          "    AdvRouterAddr off;\n"
                          "  };\n",
                          netaddr, prefix);
        VIR_FREE(netaddr);
    }

1038 1039 1040
    /* only create the string if we found at least one IPv6 address */
    if (v6present) {
        virBufferAddLit(&configbuf, "};\n");
1041

1042 1043 1044 1045 1046 1047 1048 1049
        if (virBufferError(&configbuf)) {
            virReportOOMError();
            goto cleanup;
        }
        if (!(*configstr = virBufferContentAndReset(&configbuf))) {
            virReportOOMError();
            goto cleanup;
        }
1050
    }
1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075

    ret = 0;
cleanup:
    virBufferFreeAndReset(&configbuf);
    return ret;
}

/* write file and return it's name (which must be freed by caller) */
static int
networkRadvdConfWrite(virNetworkObjPtr network, char **configFile)
{
    int ret = -1;
    char *configStr = NULL;
    char *myConfigFile = NULL;

    if (!configFile)
        configFile = &myConfigFile;

    *configFile = NULL;

    if (networkRadvdConfContents(network, &configStr) < 0)
        goto cleanup;

    if (!configStr) {
        ret = 0;
1076 1077 1078 1079
        goto cleanup;
    }

    /* construct the filename */
1080
    if (!(*configFile = networkRadvdConfigFileName(network->def->name))) {
1081 1082 1083 1084
        virReportOOMError();
        goto cleanup;
    }
    /* write the file */
1085
    if (virFileWriteStr(*configFile, configStr, 0600) < 0) {
1086 1087
        virReportSystemError(errno,
                             _("couldn't write radvd config file '%s'"),
1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120
                             *configFile);
        goto cleanup;
    }

    ret = 0;
cleanup:
    VIR_FREE(configStr);
    VIR_FREE(myConfigFile);
    return ret;
}

static int
networkStartRadvd(virNetworkObjPtr network)
{
    char *pidfile = NULL;
    char *radvdpidbase = NULL;
    char *configfile = NULL;
    virCommandPtr cmd = NULL;
    int ret = -1;

    network->radvdPid = -1;

    if (!virNetworkDefGetIpByIndex(network->def, AF_INET6, 0)) {
        /* no IPv6 addresses, so we don't need to run radvd */
        ret = 0;
        goto cleanup;
    }

    if (!virFileIsExecutable(RADVD)) {
        virReportSystemError(errno,
                             _("Cannot find %s - "
                               "Possibly the package isn't installed"),
                             RADVD);
1121 1122 1123
        goto cleanup;
    }

1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149
    if (virFileMakePath(NETWORK_PID_DIR) < 0) {
        virReportSystemError(errno,
                             _("cannot create directory %s"),
                             NETWORK_PID_DIR);
        goto cleanup;
    }
    if (virFileMakePath(RADVD_STATE_DIR) < 0) {
        virReportSystemError(errno,
                             _("cannot create directory %s"),
                             RADVD_STATE_DIR);
        goto cleanup;
    }

    /* construct pidfile name */
    if (!(radvdpidbase = networkRadvdPidfileBasename(network->def->name))) {
        virReportOOMError();
        goto cleanup;
    }
    if (!(pidfile = virPidFileBuildPath(NETWORK_PID_DIR, radvdpidbase))) {
        virReportOOMError();
        goto cleanup;
    }

    if (networkRadvdConfWrite(network, &configfile) < 0)
        goto cleanup;

1150 1151 1152 1153
    /* prevent radvd from daemonizing itself with "--debug 1", and use
     * a dummy pidfile name - virCommand will create the pidfile we
     * want to use (this is necessary because radvd's internal
     * daemonization and pidfile creation causes a race, and the
1154
     * virPidFileRead() below will fail if we use them).
1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169
     * Unfortunately, it isn't possible to tell radvd to not create
     * its own pidfile, so we just let it do so, with a slightly
     * different name. Unused, but harmless.
     */
    cmd = virCommandNewArgList(RADVD, "--debug", "1",
                               "--config", configfile,
                               "--pidfile", NULL);
    virCommandAddArgFormat(cmd, "%s-bin", pidfile);

    virCommandSetPidFile(cmd, pidfile);
    virCommandDaemonize(cmd);

    if (virCommandRun(cmd, NULL) < 0)
        goto cleanup;

1170
    if (virPidFileRead(NETWORK_PID_DIR, radvdpidbase, &network->radvdPid) < 0)
1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181
        goto cleanup;

    ret = 0;
cleanup:
    virCommandFree(cmd);
    VIR_FREE(configfile);
    VIR_FREE(radvdpidbase);
    VIR_FREE(pidfile);
    return ret;
}

1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199
static int
networkRefreshRadvd(virNetworkObjPtr network)
{
    /* if there's no running radvd, just start it */
    if (network->radvdPid <= 0 || (kill(network->radvdPid, 0) < 0))
        return networkStartRadvd(network);

    if (!virNetworkDefGetIpByIndex(network->def, AF_INET6, 0)) {
        /* no IPv6 addresses, so we don't need to run radvd */
        return 0;
    }

    if (networkRadvdConfWrite(network, NULL) < 0)
        return -1;

    return kill(network->radvdPid, SIGHUP);
}

1200 1201
#if 0
/* currently unused, so it causes a build error unless we #if it out */
1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226
static int
networkRestartRadvd(virNetworkObjPtr network)
{
    char *radvdpidbase;

    /* if there is a running radvd, kill it */
    if (network->radvdPid > 0) {
        /* essentially ignore errors from the following two functions,
         * since there's really no better recovery to be done than to
         * just push ahead (and that may be exactly what's needed).
         */
        if ((networkKillDaemon(network->dnsmasqPid, "radvd",
                               network->def->name) >= 0) &&
            ((radvdpidbase = networkRadvdPidfileBasename(network->def->name))
             != NULL)) {
            virPidFileDelete(NETWORK_PID_DIR, radvdpidbase);
            VIR_FREE(radvdpidbase);
        }
        network->radvdPid = -1;
    }
    /* now start radvd if it should be started */
    return networkStartRadvd(network);
}
#endif /* #if 0 */

1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257
/* SIGHUP/restart any dnsmasq or radvd daemons.
 * This should be called when libvirtd is restarted.
 */
static void
networkRefreshDaemons(struct network_driver *driver)
{
    unsigned int i;

    VIR_INFO("Refreshing network daemons");

    for (i = 0 ; i < driver->networks.count ; i++) {
        virNetworkObjPtr network = driver->networks.objs[i];

        virNetworkObjLock(network);
        if (virNetworkObjIsActive(network) &&
            ((network->def->forwardType == VIR_NETWORK_FORWARD_NONE) ||
             (network->def->forwardType == VIR_NETWORK_FORWARD_NAT) ||
             (network->def->forwardType == VIR_NETWORK_FORWARD_ROUTE))) {
            /* Only the three L3 network types that are configured by
             * libvirt will have a dnsmasq or radvd daemon associated
             * with them.  Here we send a SIGHUP to an existing
             * dnsmasq and/or radvd, or restart them if they've
             * disappeared.
             */
            networkRefreshDhcpDaemon(network);
            networkRefreshRadvd(network);
        }
        virNetworkObjUnlock(network);
    }
}

1258
static int
1259
networkAddMasqueradingIptablesRules(struct network_driver *driver,
1260 1261
                                    virNetworkObjPtr network,
                                    virNetworkIpDefPtr ipdef)
1262 1263
{
    int prefix = virNetworkIpDefPrefix(ipdef);
1264
    const char *forwardIf = virNetworkDefForwardIf(network->def, 0);
1265 1266

    if (prefix < 0) {
1267 1268 1269
        virReportError(VIR_ERR_INTERNAL_ERROR,
                       _("Invalid prefix or netmask for '%s'"),
                       network->def->bridge);
1270 1271
        goto masqerr1;
    }
1272

1273
    /* allow forwarding packets from the bridge interface */
1274
    if (iptablesAddForwardAllowOut(driver->iptables,
1275
                                   &ipdef->address,
1276
                                   prefix,
1277
                                   network->def->bridge,
1278
                                   forwardIf) < 0) {
1279 1280 1281
        virReportError(VIR_ERR_SYSTEM_ERROR,
                       _("failed to add iptables rule to allow forwarding from '%s'"),
                       network->def->bridge);
1282 1283 1284
        goto masqerr1;
    }

1285 1286 1287
    /* allow forwarding packets to the bridge interface if they are
     * part of an existing connection
     */
1288
    if (iptablesAddForwardAllowRelatedIn(driver->iptables,
1289
                                         &ipdef->address,
1290
                                         prefix,
1291
                                         network->def->bridge,
1292
                                         forwardIf) < 0) {
1293 1294 1295
        virReportError(VIR_ERR_SYSTEM_ERROR,
                       _("failed to add iptables rule to allow forwarding to '%s'"),
                       network->def->bridge);
1296 1297 1298
        goto masqerr2;
    }

1299 1300 1301 1302 1303
    /*
     * Enable masquerading.
     *
     * We need to end up with 3 rules in the table in this order
     *
E
Eric Blake 已提交
1304 1305
     *  1. protocol=tcp with sport mapping restriction
     *  2. protocol=udp with sport mapping restriction
1306 1307 1308
     *  3. generic any protocol
     *
     * The sport mappings are required, because default IPtables
E
Eric Blake 已提交
1309
     * MASQUERADE maintain port numbers unchanged where possible.
1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322
     *
     * NFS can be configured to only "trust" port numbers < 1023.
     *
     * Guests using NAT thus need to be prevented from having port
     * numbers < 1023, otherwise they can bypass the NFS "security"
     * check on the source port number.
     *
     * Since we use '--insert' to add rules to the header of the
     * chain, we actually need to add them in the reverse of the
     * order just mentioned !
     */

    /* First the generic masquerade rule for other protocols */
1323
    if (iptablesAddForwardMasquerade(driver->iptables,
1324
                                     &ipdef->address,
1325
                                     prefix,
1326
                                     forwardIf,
1327
                                     NULL) < 0) {
1328 1329 1330 1331 1332
        virReportError(VIR_ERR_SYSTEM_ERROR,
                       forwardIf ?
                       _("failed to add iptables rule to enable masquerading to %s") :
                       _("failed to add iptables rule to enable masquerading"),
                       forwardIf);
1333 1334 1335
        goto masqerr3;
    }

1336
    /* UDP with a source port restriction */
1337
    if (iptablesAddForwardMasquerade(driver->iptables,
1338
                                     &ipdef->address,
1339
                                     prefix,
1340
                                     forwardIf,
1341
                                     "udp") < 0) {
1342 1343 1344 1345 1346
        virReportError(VIR_ERR_SYSTEM_ERROR,
                       forwardIf ?
                       _("failed to add iptables rule to enable UDP masquerading to %s") :
                       _("failed to add iptables rule to enable UDP masquerading"),
                       forwardIf);
1347 1348 1349 1350
        goto masqerr4;
    }

    /* TCP with a source port restriction */
1351
    if (iptablesAddForwardMasquerade(driver->iptables,
1352
                                     &ipdef->address,
1353
                                     prefix,
1354
                                     forwardIf,
1355
                                     "tcp") < 0) {
1356 1357 1358 1359 1360
        virReportError(VIR_ERR_SYSTEM_ERROR,
                       forwardIf ?
                       _("failed to add iptables rule to enable TCP masquerading to %s") :
                       _("failed to add iptables rule to enable TCP masquerading"),
                       forwardIf);
1361 1362 1363
        goto masqerr5;
    }

1364
    return 0;
1365

1366 1367
 masqerr5:
    iptablesRemoveForwardMasquerade(driver->iptables,
1368
                                    &ipdef->address,
1369
                                    prefix,
1370
                                    forwardIf,
1371 1372 1373
                                    "udp");
 masqerr4:
    iptablesRemoveForwardMasquerade(driver->iptables,
1374
                                    &ipdef->address,
1375
                                    prefix,
1376
                                    forwardIf,
1377
                                    NULL);
1378 1379
 masqerr3:
    iptablesRemoveForwardAllowRelatedIn(driver->iptables,
1380
                                        &ipdef->address,
1381
                                        prefix,
1382
                                        network->def->bridge,
1383
                                        forwardIf);
1384 1385
 masqerr2:
    iptablesRemoveForwardAllowOut(driver->iptables,
1386
                                  &ipdef->address,
1387
                                  prefix,
1388
                                  network->def->bridge,
1389
                                  forwardIf);
1390
 masqerr1:
1391
    return -1;
1392 1393
}

1394 1395 1396 1397 1398 1399
static void
networkRemoveMasqueradingIptablesRules(struct network_driver *driver,
                                       virNetworkObjPtr network,
                                       virNetworkIpDefPtr ipdef)
{
    int prefix = virNetworkIpDefPrefix(ipdef);
1400
    const char *forwardIf = virNetworkDefForwardIf(network->def, 0);
1401 1402 1403 1404 1405

    if (prefix >= 0) {
        iptablesRemoveForwardMasquerade(driver->iptables,
                                        &ipdef->address,
                                        prefix,
1406
                                        forwardIf,
1407 1408 1409 1410
                                        "tcp");
        iptablesRemoveForwardMasquerade(driver->iptables,
                                        &ipdef->address,
                                        prefix,
1411
                                        forwardIf,
1412 1413 1414 1415
                                        "udp");
        iptablesRemoveForwardMasquerade(driver->iptables,
                                        &ipdef->address,
                                        prefix,
1416
                                        forwardIf,
1417 1418 1419 1420 1421 1422
                                        NULL);

        iptablesRemoveForwardAllowRelatedIn(driver->iptables,
                                            &ipdef->address,
                                            prefix,
                                            network->def->bridge,
1423
                                            forwardIf);
1424 1425 1426 1427
        iptablesRemoveForwardAllowOut(driver->iptables,
                                      &ipdef->address,
                                      prefix,
                                      network->def->bridge,
1428
                                      forwardIf);
1429 1430 1431
    }
}

1432
static int
1433
networkAddRoutingIptablesRules(struct network_driver *driver,
1434
                               virNetworkObjPtr network,
1435 1436
                               virNetworkIpDefPtr ipdef)
{
1437
    int prefix = virNetworkIpDefPrefix(ipdef);
1438
    const char *forwardIf = virNetworkDefForwardIf(network->def, 0);
1439 1440

    if (prefix < 0) {
1441 1442 1443
        virReportError(VIR_ERR_INTERNAL_ERROR,
                       _("Invalid prefix or netmask for '%s'"),
                       network->def->bridge);
1444 1445
        goto routeerr1;
    }
1446

1447
    /* allow routing packets from the bridge interface */
1448
    if (iptablesAddForwardAllowOut(driver->iptables,
1449
                                   &ipdef->address,
1450
                                   prefix,
1451
                                   network->def->bridge,
1452
                                   forwardIf) < 0) {
1453 1454 1455
        virReportError(VIR_ERR_SYSTEM_ERROR,
                       _("failed to add iptables rule to allow routing from '%s'"),
                       network->def->bridge);
1456 1457 1458 1459
        goto routeerr1;
    }

    /* allow routing packets to the bridge interface */
1460
    if (iptablesAddForwardAllowIn(driver->iptables,
1461
                                  &ipdef->address,
1462
                                  prefix,
1463
                                  network->def->bridge,
1464
                                  forwardIf) < 0) {
1465 1466 1467
        virReportError(VIR_ERR_SYSTEM_ERROR,
                       _("failed to add iptables rule to allow routing to '%s'"),
                       network->def->bridge);
1468 1469 1470
        goto routeerr2;
    }

1471
    return 0;
1472

1473
routeerr2:
1474
    iptablesRemoveForwardAllowOut(driver->iptables,
1475
                                  &ipdef->address,
1476
                                  prefix,
1477
                                  network->def->bridge,
1478
                                  forwardIf);
1479
routeerr1:
1480
    return -1;
1481 1482
}

1483 1484 1485 1486 1487 1488
static void
networkRemoveRoutingIptablesRules(struct network_driver *driver,
                                  virNetworkObjPtr network,
                                  virNetworkIpDefPtr ipdef)
{
    int prefix = virNetworkIpDefPrefix(ipdef);
1489
    const char *forwardIf = virNetworkDefForwardIf(network->def, 0);
1490 1491 1492 1493 1494 1495

    if (prefix >= 0) {
        iptablesRemoveForwardAllowIn(driver->iptables,
                                     &ipdef->address,
                                     prefix,
                                     network->def->bridge,
1496
                                     forwardIf);
1497 1498 1499 1500 1501

        iptablesRemoveForwardAllowOut(driver->iptables,
                                      &ipdef->address,
                                      prefix,
                                      network->def->bridge,
1502
                                      forwardIf);
1503 1504 1505
    }
}

1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518
/* Add all once/network rules required for IPv6 (if any IPv6 addresses are defined) */
static int
networkAddGeneralIp6tablesRules(struct network_driver *driver,
                               virNetworkObjPtr network)
{

    if (!virNetworkDefGetIpByIndex(network->def, AF_INET6, 0))
        return 0;

    /* Catch all rules to block forwarding to/from bridges */

    if (iptablesAddForwardRejectOut(driver->iptables, AF_INET6,
                                    network->def->bridge) < 0) {
1519 1520 1521
        virReportError(VIR_ERR_SYSTEM_ERROR,
                       _("failed to add ip6tables rule to block outbound traffic from '%s'"),
                       network->def->bridge);
1522 1523 1524 1525 1526
        goto err1;
    }

    if (iptablesAddForwardRejectIn(driver->iptables, AF_INET6,
                                   network->def->bridge) < 0) {
1527 1528 1529
        virReportError(VIR_ERR_SYSTEM_ERROR,
                       _("failed to add ip6tables rule to block inbound traffic to '%s'"),
                       network->def->bridge);
1530 1531 1532 1533 1534 1535
        goto err2;
    }

    /* Allow traffic between guests on the same bridge */
    if (iptablesAddForwardAllowCross(driver->iptables, AF_INET6,
                                     network->def->bridge) < 0) {
1536 1537 1538
        virReportError(VIR_ERR_SYSTEM_ERROR,
                       _("failed to add ip6tables rule to allow cross bridge traffic on '%s'"),
                       network->def->bridge);
1539 1540 1541
        goto err3;
    }

1542 1543 1544
    /* allow DNS over IPv6 */
    if (iptablesAddTcpInput(driver->iptables, AF_INET6,
                            network->def->bridge, 53) < 0) {
1545 1546 1547
        virReportError(VIR_ERR_SYSTEM_ERROR,
                       _("failed to add ip6tables rule to allow DNS requests from '%s'"),
                       network->def->bridge);
1548 1549 1550 1551 1552
        goto err4;
    }

    if (iptablesAddUdpInput(driver->iptables, AF_INET6,
                            network->def->bridge, 53) < 0) {
1553 1554 1555
        virReportError(VIR_ERR_SYSTEM_ERROR,
                       _("failed to add ip6tables rule to allow DNS requests from '%s'"),
                       network->def->bridge);
1556 1557 1558
        goto err5;
    }

1559 1560 1561
    return 0;

    /* unwind in reverse order from the point of failure */
1562 1563 1564 1565
err5:
    iptablesRemoveTcpInput(driver->iptables, AF_INET6, network->def->bridge, 53);
err4:
    iptablesRemoveForwardAllowCross(driver->iptables, AF_INET6, network->def->bridge);
1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585
err3:
    iptablesRemoveForwardRejectIn(driver->iptables, AF_INET6, network->def->bridge);
err2:
    iptablesRemoveForwardRejectOut(driver->iptables, AF_INET6, network->def->bridge);
err1:
    return -1;
}

static void
networkRemoveGeneralIp6tablesRules(struct network_driver *driver,
                                  virNetworkObjPtr network)
{
    if (!virNetworkDefGetIpByIndex(network->def, AF_INET6, 0))
        return;

    iptablesRemoveForwardAllowCross(driver->iptables, AF_INET6, network->def->bridge);
    iptablesRemoveForwardRejectIn(driver->iptables, AF_INET6, network->def->bridge);
    iptablesRemoveForwardRejectOut(driver->iptables, AF_INET6, network->def->bridge);
}

1586
static int
1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600
networkAddGeneralIptablesRules(struct network_driver *driver,
                               virNetworkObjPtr network)
{
    int ii;
    virNetworkIpDefPtr ipv4def;

    /* First look for first IPv4 address that has dhcp or tftpboot defined. */
    /* We support dhcp config on 1 IPv4 interface only. */
    for (ii = 0;
         (ipv4def = virNetworkDefGetIpByIndex(network->def, AF_INET, ii));
         ii++) {
        if (ipv4def->nranges || ipv4def->nhosts || ipv4def->tftproot)
            break;
    }
1601 1602

    /* allow DHCP requests through to dnsmasq */
1603

1604 1605
    if (iptablesAddTcpInput(driver->iptables, AF_INET,
                            network->def->bridge, 67) < 0) {
1606 1607 1608
        virReportError(VIR_ERR_SYSTEM_ERROR,
                       _("failed to add iptables rule to allow DHCP requests from '%s'"),
                       network->def->bridge);
1609 1610 1611
        goto err1;
    }

1612 1613
    if (iptablesAddUdpInput(driver->iptables, AF_INET,
                            network->def->bridge, 67) < 0) {
1614 1615 1616
        virReportError(VIR_ERR_SYSTEM_ERROR,
                       _("failed to add iptables rule to allow DHCP requests from '%s'"),
                       network->def->bridge);
1617 1618 1619
        goto err2;
    }

1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630
    /* If we are doing local DHCP service on this network, attempt to
     * add a rule that will fixup the checksum of DHCP response
     * packets back to the guests (but report failure without
     * aborting, since not all iptables implementations support it).
     */

    if (ipv4def && (ipv4def->nranges || ipv4def->nhosts) &&
        (iptablesAddOutputFixUdpChecksum(driver->iptables,
                                         network->def->bridge, 68) < 0)) {
        VIR_WARN("Could not add rule to fixup DHCP response checksums "
                 "on network '%s'.", network->def->name);
1631
        VIR_WARN("May need to update iptables package & kernel to support CHECKSUM rule.");
1632 1633
    }

1634
    /* allow DNS requests through to dnsmasq */
1635 1636
    if (iptablesAddTcpInput(driver->iptables, AF_INET,
                            network->def->bridge, 53) < 0) {
1637 1638 1639
        virReportError(VIR_ERR_SYSTEM_ERROR,
                       _("failed to add iptables rule to allow DNS requests from '%s'"),
                       network->def->bridge);
1640 1641 1642
        goto err3;
    }

1643 1644
    if (iptablesAddUdpInput(driver->iptables, AF_INET,
                            network->def->bridge, 53) < 0) {
1645 1646 1647
        virReportError(VIR_ERR_SYSTEM_ERROR,
                       _("failed to add iptables rule to allow DNS requests from '%s'"),
                       network->def->bridge);
1648 1649 1650
        goto err4;
    }

1651 1652
    /* allow TFTP requests through to dnsmasq if necessary */
    if (ipv4def && ipv4def->tftproot &&
1653 1654
        iptablesAddUdpInput(driver->iptables, AF_INET,
                            network->def->bridge, 69) < 0) {
1655 1656 1657
        virReportError(VIR_ERR_SYSTEM_ERROR,
                       _("failed to add iptables rule to allow TFTP requests from '%s'"),
                       network->def->bridge);
1658
        goto err5;
1659 1660
    }

1661 1662
    /* Catch all rules to block forwarding to/from bridges */

1663 1664
    if (iptablesAddForwardRejectOut(driver->iptables, AF_INET,
                                    network->def->bridge) < 0) {
1665 1666 1667
        virReportError(VIR_ERR_SYSTEM_ERROR,
                       _("failed to add iptables rule to block outbound traffic from '%s'"),
                       network->def->bridge);
1668
        goto err6;
1669 1670
    }

1671 1672
    if (iptablesAddForwardRejectIn(driver->iptables, AF_INET,
                                   network->def->bridge) < 0) {
1673 1674 1675
        virReportError(VIR_ERR_SYSTEM_ERROR,
                       _("failed to add iptables rule to block inbound traffic to '%s'"),
                       network->def->bridge);
1676
        goto err7;
1677 1678 1679
    }

    /* Allow traffic between guests on the same bridge */
1680 1681
    if (iptablesAddForwardAllowCross(driver->iptables, AF_INET,
                                     network->def->bridge) < 0) {
1682 1683 1684
        virReportError(VIR_ERR_SYSTEM_ERROR,
                       _("failed to add iptables rule to allow cross bridge traffic on '%s'"),
                       network->def->bridge);
1685
        goto err8;
1686 1687
    }

1688 1689 1690 1691 1692
    /* add IPv6 general rules, if needed */
    if (networkAddGeneralIp6tablesRules(driver, network) < 0) {
        goto err9;
    }

1693
    return 0;
1694

1695
    /* unwind in reverse order from the point of failure */
1696 1697
err9:
    iptablesRemoveForwardAllowCross(driver->iptables, AF_INET, network->def->bridge);
1698
err8:
1699
    iptablesRemoveForwardRejectIn(driver->iptables, AF_INET, network->def->bridge);
1700
err7:
1701
    iptablesRemoveForwardRejectOut(driver->iptables, AF_INET, network->def->bridge);
1702 1703
err6:
    if (ipv4def && ipv4def->tftproot) {
1704
        iptablesRemoveUdpInput(driver->iptables, AF_INET, network->def->bridge, 69);
1705
    }
1706
err5:
1707
    iptablesRemoveUdpInput(driver->iptables, AF_INET, network->def->bridge, 53);
1708
err4:
1709
    iptablesRemoveTcpInput(driver->iptables, AF_INET, network->def->bridge, 53);
1710
err3:
1711
    iptablesRemoveUdpInput(driver->iptables, AF_INET, network->def->bridge, 67);
1712
err2:
1713
    iptablesRemoveTcpInput(driver->iptables, AF_INET, network->def->bridge, 67);
1714
err1:
1715
    return -1;
1716 1717 1718
}

static void
1719 1720 1721 1722 1723
networkRemoveGeneralIptablesRules(struct network_driver *driver,
                                  virNetworkObjPtr network)
{
    int ii;
    virNetworkIpDefPtr ipv4def;
1724

1725 1726
    networkRemoveGeneralIp6tablesRules(driver, network);

1727 1728 1729 1730 1731
    for (ii = 0;
         (ipv4def = virNetworkDefGetIpByIndex(network->def, AF_INET, ii));
         ii++) {
        if (ipv4def->nranges || ipv4def->nhosts || ipv4def->tftproot)
            break;
1732
    }
1733

1734 1735 1736
    iptablesRemoveForwardAllowCross(driver->iptables, AF_INET, network->def->bridge);
    iptablesRemoveForwardRejectIn(driver->iptables, AF_INET, network->def->bridge);
    iptablesRemoveForwardRejectOut(driver->iptables, AF_INET, network->def->bridge);
1737
    if (ipv4def && ipv4def->tftproot) {
1738
        iptablesRemoveUdpInput(driver->iptables, AF_INET, network->def->bridge, 69);
1739
    }
1740 1741
    iptablesRemoveUdpInput(driver->iptables, AF_INET, network->def->bridge, 53);
    iptablesRemoveTcpInput(driver->iptables, AF_INET, network->def->bridge, 53);
1742 1743 1744 1745
    if (ipv4def && (ipv4def->nranges || ipv4def->nhosts)) {
        iptablesRemoveOutputFixUdpChecksum(driver->iptables,
                                           network->def->bridge, 68);
    }
1746 1747
    iptablesRemoveUdpInput(driver->iptables, AF_INET, network->def->bridge, 67);
    iptablesRemoveTcpInput(driver->iptables, AF_INET, network->def->bridge, 67);
1748 1749
}

1750 1751 1752 1753 1754
static int
networkAddIpSpecificIptablesRules(struct network_driver *driver,
                                  virNetworkObjPtr network,
                                  virNetworkIpDefPtr ipdef)
{
1755 1756 1757
    /* NB: in the case of IPv6, routing rules are added when the
     * forward mode is NAT. This is because IPv6 has no NAT.
     */
1758

1759
    if (network->def->forwardType == VIR_NETWORK_FORWARD_NAT) {
1760
        if (VIR_SOCKET_ADDR_IS_FAMILY(&ipdef->address, AF_INET))
1761
            return networkAddMasqueradingIptablesRules(driver, network, ipdef);
1762
        else if (VIR_SOCKET_ADDR_IS_FAMILY(&ipdef->address, AF_INET6))
1763 1764 1765 1766
            return networkAddRoutingIptablesRules(driver, network, ipdef);
    } else if (network->def->forwardType == VIR_NETWORK_FORWARD_ROUTE) {
        return networkAddRoutingIptablesRules(driver, network, ipdef);
    }
1767 1768 1769 1770 1771 1772 1773 1774
    return 0;
}

static void
networkRemoveIpSpecificIptablesRules(struct network_driver *driver,
                                     virNetworkObjPtr network,
                                     virNetworkIpDefPtr ipdef)
{
1775
    if (network->def->forwardType == VIR_NETWORK_FORWARD_NAT) {
1776
        if (VIR_SOCKET_ADDR_IS_FAMILY(&ipdef->address, AF_INET))
1777
            networkRemoveMasqueradingIptablesRules(driver, network, ipdef);
1778
        else if (VIR_SOCKET_ADDR_IS_FAMILY(&ipdef->address, AF_INET6))
1779 1780
            networkRemoveRoutingIptablesRules(driver, network, ipdef);
    } else if (network->def->forwardType == VIR_NETWORK_FORWARD_ROUTE) {
1781
        networkRemoveRoutingIptablesRules(driver, network, ipdef);
1782
    }
1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835
}

/* Add all rules for all ip addresses (and general rules) on a network */
static int
networkAddIptablesRules(struct network_driver *driver,
                        virNetworkObjPtr network)
{
    int ii;
    virNetworkIpDefPtr ipdef;

    /* Add "once per network" rules */
    if (networkAddGeneralIptablesRules(driver, network) < 0)
        return -1;

    for (ii = 0;
         (ipdef = virNetworkDefGetIpByIndex(network->def, AF_UNSPEC, ii));
         ii++) {
        /* Add address-specific iptables rules */
        if (networkAddIpSpecificIptablesRules(driver, network, ipdef) < 0) {
            goto err;
        }
    }
    return 0;

err:
    /* The final failed call to networkAddIpSpecificIptablesRules will
     * have removed any rules it created, but we need to remove those
     * added for previous IP addresses.
     */
    while ((--ii >= 0) &&
           (ipdef = virNetworkDefGetIpByIndex(network->def, AF_UNSPEC, ii))) {
        networkRemoveIpSpecificIptablesRules(driver, network, ipdef);
    }
    networkRemoveGeneralIptablesRules(driver, network);
    return -1;
}

/* Remove all rules for all ip addresses (and general rules) on a network */
static void
networkRemoveIptablesRules(struct network_driver *driver,
                           virNetworkObjPtr network)
{
    int ii;
    virNetworkIpDefPtr ipdef;

    for (ii = 0;
         (ipdef = virNetworkDefGetIpByIndex(network->def, AF_UNSPEC, ii));
         ii++) {
        networkRemoveIpSpecificIptablesRules(driver, network, ipdef);
    }
    networkRemoveGeneralIptablesRules(driver, network);
}

1836 1837 1838 1839 1840
static void
networkReloadIptablesRules(struct network_driver *driver)
{
    unsigned int i;

1841
    VIR_INFO("Reloading iptables rules");
1842 1843

    for (i = 0 ; i < driver->networks.count ; i++) {
1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855
        virNetworkObjPtr network = driver->networks.objs[i];

        virNetworkObjLock(network);
        if (virNetworkObjIsActive(network) &&
            ((network->def->forwardType == VIR_NETWORK_FORWARD_NONE) ||
             (network->def->forwardType == VIR_NETWORK_FORWARD_NAT) ||
             (network->def->forwardType == VIR_NETWORK_FORWARD_ROUTE))) {
            /* Only the three L3 network types that are configured by libvirt
             * need to have iptables rules reloaded.
             */
            networkRemoveIptablesRules(driver, network);
            if (networkAddIptablesRules(driver, network) < 0) {
1856 1857
                /* failed to add but already logged */
            }
1858
        }
1859
        virNetworkObjUnlock(network);
1860 1861 1862
    }
}

1863
/* Enable IP Forwarding. Return 0 for success, -1 for failure. */
1864
static int
1865
networkEnableIpForwarding(bool enableIPv4, bool enableIPv6)
1866
{
1867 1868 1869 1870 1871 1872
    int ret = 0;
    if (enableIPv4)
        ret = virFileWriteStr("/proc/sys/net/ipv4/ip_forward", "1\n", 0);
    if (enableIPv6 && ret == 0)
        ret = virFileWriteStr("/proc/sys/net/ipv6/conf/all/forwarding", "1\n", 0);
    return ret;
1873 1874
}

1875 1876
#define SYSCTL_PATH "/proc/sys"

1877 1878
static int
networkSetIPv6Sysctls(virNetworkObjPtr network)
1879 1880 1881 1882
{
    char *field = NULL;
    int ret = -1;

1883 1884 1885 1886 1887 1888 1889 1890 1891
    if (!virNetworkDefGetIpByIndex(network->def, AF_INET6, 0)) {
        /* Only set disable_ipv6 if there are no ipv6 addresses defined for
         * the network.
         */
        if (virAsprintf(&field, SYSCTL_PATH "/net/ipv6/conf/%s/disable_ipv6",
                        network->def->bridge) < 0) {
            virReportOOMError();
            goto cleanup;
        }
1892

1893 1894 1895 1896 1897 1898
        if (access(field, W_OK) < 0 && errno == ENOENT) {
            VIR_DEBUG("ipv6 appears to already be disabled on %s",
                      network->def->bridge);
            ret = 0;
            goto cleanup;
        }
1899

1900 1901 1902 1903 1904 1905 1906
        if (virFileWriteStr(field, "1", 0) < 0) {
            virReportSystemError(errno,
                                 _("cannot write to %s to disable IPv6 on bridge %s"),
                                 field, network->def->bridge);
            goto cleanup;
        }
        VIR_FREE(field);
1907 1908
    }

1909 1910 1911 1912 1913 1914 1915 1916 1917
    /* The rest of the ipv6 sysctl tunables should always be set,
     * whether or not we're using ipv6 on this bridge.
     */

    /* Prevent guests from hijacking the host network by sending out
     * their own router advertisements.
     */
    if (virAsprintf(&field, SYSCTL_PATH "/net/ipv6/conf/%s/accept_ra",
                    network->def->bridge) < 0) {
1918
        virReportOOMError();
1919 1920 1921
        goto cleanup;
    }

1922
    if (virFileWriteStr(field, "0", 0) < 0) {
1923
        virReportSystemError(errno,
1924 1925 1926 1927 1928
                             _("cannot disable %s"), field);
        goto cleanup;
    }
    VIR_FREE(field);

1929 1930 1931 1932 1933
    /* All interfaces used as a gateway (which is what this is, by
     * definition), must always have autoconf=0.
     */
    if (virAsprintf(&field, SYSCTL_PATH "/net/ipv6/conf/%s/autoconf",
                    network->def->bridge) < 0) {
1934
        virReportOOMError();
1935 1936 1937
        goto cleanup;
    }

1938
    if (virFileWriteStr(field, "0", 0) < 0) {
1939
        virReportSystemError(errno,
1940
                             _("cannot disable %s"), field);
1941 1942 1943 1944 1945 1946 1947 1948 1949
        goto cleanup;
    }

    ret = 0;
cleanup:
    VIR_FREE(field);
    return ret;
}

1950 1951 1952 1953 1954 1955
#define PROC_NET_ROUTE "/proc/net/route"

/* XXX: This function can be a lot more exhaustive, there are certainly
 *      other scenarios where we can ruin host network connectivity.
 * XXX: Using a proper library is preferred over parsing /proc
 */
1956 1957
static int
networkCheckRouteCollision(virNetworkObjPtr network)
1958
{
1959
    int ret = 0, len;
1960 1961 1962 1963 1964
    char *cur, *buf = NULL;
    enum {MAX_ROUTE_SIZE = 1024*64};

    /* Read whole routing table into memory */
    if ((len = virFileReadAll(PROC_NET_ROUTE, MAX_ROUTE_SIZE, &buf)) < 0)
1965
        goto out;
1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983

    /* Dropping the last character shouldn't hurt */
    if (len > 0)
        buf[len-1] = '\0';

    VIR_DEBUG("%s output:\n%s", PROC_NET_ROUTE, buf);

    if (!STRPREFIX (buf, "Iface"))
        goto out;

    /* First line is just headings, skip it */
    cur = strchr(buf, '\n');
    if (cur)
        cur++;

    while (cur) {
        char iface[17], dest[128], mask[128];
        unsigned int addr_val, mask_val;
1984 1985
        virNetworkIpDefPtr ipdef;
        int num, ii;
1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013

        /* NUL-terminate the line, so sscanf doesn't go beyond a newline.  */
        char *nl = strchr(cur, '\n');
        if (nl) {
            *nl++ = '\0';
        }

        num = sscanf(cur, "%16s %127s %*s %*s %*s %*s %*s %127s",
                     iface, dest, mask);
        cur = nl;

        if (num != 3) {
            VIR_DEBUG("Failed to parse %s", PROC_NET_ROUTE);
            continue;
        }

        if (virStrToLong_ui(dest, NULL, 16, &addr_val) < 0) {
            VIR_DEBUG("Failed to convert network address %s to uint", dest);
            continue;
        }

        if (virStrToLong_ui(mask, NULL, 16, &mask_val) < 0) {
            VIR_DEBUG("Failed to convert network mask %s to uint", mask);
            continue;
        }

        addr_val &= mask_val;

2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031
        for (ii = 0;
             (ipdef = virNetworkDefGetIpByIndex(network->def, AF_INET, ii));
             ii++) {

            unsigned int net_dest;
            virSocketAddr netmask;

            if (virNetworkIpDefNetmask(ipdef, &netmask) < 0) {
                VIR_WARN("Failed to get netmask of '%s'",
                         network->def->bridge);
                continue;
            }

            net_dest = (ipdef->address.data.inet4.sin_addr.s_addr &
                        netmask.data.inet4.sin_addr.s_addr);

            if ((net_dest == addr_val) &&
                (netmask.data.inet4.sin_addr.s_addr == mask_val)) {
2032 2033 2034
                virReportError(VIR_ERR_INTERNAL_ERROR,
                               _("Network is already in use by interface %s"),
                               iface);
2035 2036 2037
                ret = -1;
                goto out;
            }
2038 2039 2040 2041 2042 2043 2044 2045
        }
    }

out:
    VIR_FREE(buf);
    return ret;
}

2046
static int
D
Daniel P. Berrange 已提交
2047
networkAddAddrToBridge(virNetworkObjPtr network,
2048
                       virNetworkIpDefPtr ipdef)
2049
{
2050 2051 2052
    int prefix = virNetworkIpDefPrefix(ipdef);

    if (prefix < 0) {
2053 2054 2055
        virReportError(VIR_ERR_INTERNAL_ERROR,
                       _("bridge '%s' has an invalid netmask or IP address"),
                       network->def->bridge);
2056 2057 2058
        return -1;
    }

2059 2060
    if (virNetDevSetIPv4Address(network->def->bridge,
                                &ipdef->address, prefix) < 0)
2061 2062 2063 2064 2065 2066
        return -1;

    return 0;
}

static int
2067
networkStartNetworkVirtual(struct network_driver *driver,
2068 2069
                          virNetworkObjPtr network)
{
2070
    int ii;
2071
    bool v4present = false, v6present = false;
2072 2073
    virErrorPtr save_err = NULL;
    virNetworkIpDefPtr ipdef;
2074
    char *macTapIfName = NULL;
2075
    int tapfd = -1;
2076

2077 2078
    /* Check to see if any network IP collides with an existing route */
    if (networkCheckRouteCollision(network) < 0)
2079 2080
        return -1;

2081
    /* Create and configure the bridge device */
2082
    if (virNetDevBridgeCreate(network->def->bridge) < 0)
2083 2084
        return -1;

2085 2086 2087 2088 2089 2090 2091 2092 2093 2094 2095 2096
    if (network->def->mac_specified) {
        /* To set a mac for the bridge, we need to define a dummy tap
         * device, set its mac, then attach it to the bridge. As long
         * as its mac address is lower than any other interface that
         * gets attached, the bridge will always maintain this mac
         * address.
         */
        macTapIfName = networkBridgeDummyNicName(network->def->bridge);
        if (!macTapIfName) {
            virReportOOMError();
            goto err0;
        }
2097
        /* Keep tun fd open and interface up to allow for IPv6 DAD to happen */
2098
        if (virNetDevTapCreateInBridgePort(network->def->bridge,
2099
                                           &macTapIfName, &network->def->mac,
2100 2101 2102 2103
                                           NULL, &tapfd, NULL, NULL,
                                           VIR_NETDEV_TAP_CREATE_USE_MAC_FOR_BRIDGE |
                                           VIR_NETDEV_TAP_CREATE_IFUP |
                                           VIR_NETDEV_TAP_CREATE_PERSIST) < 0) {
2104 2105 2106 2107 2108
            VIR_FREE(macTapIfName);
            goto err0;
        }
    }

2109
    /* Set bridge options */
2110 2111 2112 2113

    /* delay is configured in seconds, but virNetDevBridgeSetSTPDelay
     * expects milliseconds
     */
2114
    if (virNetDevBridgeSetSTPDelay(network->def->bridge,
2115
                                   network->def->delay * 1000) < 0)
2116
        goto err1;
2117

2118
    if (virNetDevBridgeSetSTP(network->def->bridge,
2119
                              network->def->stp ? true : false) < 0)
2120
        goto err1;
2121

2122 2123 2124 2125
    /* Disable IPv6 on the bridge if there are no IPv6 addresses
     * defined, and set other IPv6 sysctl tunables appropriately.
     */
    if (networkSetIPv6Sysctls(network) < 0)
2126
        goto err1;
2127

2128 2129 2130 2131 2132 2133 2134
    /* Add "once per network" rules */
    if (networkAddIptablesRules(driver, network) < 0)
        goto err1;

    for (ii = 0;
         (ipdef = virNetworkDefGetIpByIndex(network->def, AF_UNSPEC, ii));
         ii++) {
2135
        if (VIR_SOCKET_ADDR_IS_FAMILY(&ipdef->address, AF_INET))
2136
            v4present = true;
2137
        if (VIR_SOCKET_ADDR_IS_FAMILY(&ipdef->address, AF_INET6))
2138
            v6present = true;
2139

2140
        /* Add the IP address/netmask to the bridge */
D
Daniel P. Berrange 已提交
2141
        if (networkAddAddrToBridge(network, ipdef) < 0) {
2142
            goto err2;
2143
        }
2144 2145
    }

2146
    /* Bring up the bridge interface */
2147
    if (virNetDevSetOnline(network->def->bridge, 1) < 0)
2148
        goto err2;
2149

2150
    /* If forwardType != NONE, turn on global IP forwarding */
2151
    if (network->def->forwardType != VIR_NETWORK_FORWARD_NONE &&
2152
        networkEnableIpForwarding(v4present, v6present) < 0) {
2153
        virReportSystemError(errno, "%s",
2154
                             _("failed to enable IP forwarding"));
2155
        goto err3;
2156 2157
    }

2158

2159 2160
    /* start dnsmasq if there are any IP addresses (v4 or v6) */
    if ((v4present || v6present) && networkStartDhcpDaemon(network) < 0)
2161
        goto err3;
2162

2163 2164 2165 2166
    /* start radvd if there are any ipv6 addresses */
    if (v6present && networkStartRadvd(network) < 0)
        goto err4;

2167 2168 2169 2170 2171 2172 2173 2174 2175
    /* DAD has happened (dnsmasq waits for it), dnsmasq is now bound to the
     * bridge's IPv6 address, so we can now set the dummy tun down.
     */
    if (tapfd >= 0) {
        if (virNetDevSetOnline(macTapIfName, false) < 0)
            goto err4;
        VIR_FORCE_CLOSE(tapfd);
    }

2176
    if (virNetDevBandwidthSet(network->def->bridge, network->def->bandwidth) < 0) {
2177 2178 2179
        virReportError(VIR_ERR_INTERNAL_ERROR,
                       _("cannot set bandwidth limits on %s"),
                       network->def->bridge);
2180 2181 2182
        goto err5;
    }

2183
    VIR_FREE(macTapIfName);
2184 2185 2186

    return 0;

2187
 err5:
2188
    virNetDevBandwidthClear(network->def->bridge);
2189

2190 2191 2192 2193
 err4:
    if (!save_err)
        save_err = virSaveLastError();

2194 2195 2196 2197 2198
    if (network->dnsmasqPid > 0) {
        kill(network->dnsmasqPid, SIGTERM);
        network->dnsmasqPid = -1;
    }

2199 2200 2201
 err3:
    if (!save_err)
        save_err = virSaveLastError();
2202
    ignore_value(virNetDevSetOnline(network->def->bridge, 0));
2203

2204 2205 2206 2207 2208 2209
 err2:
    if (!save_err)
        save_err = virSaveLastError();
    networkRemoveIptablesRules(driver, network);

 err1:
2210 2211 2212
    if (!save_err)
        save_err = virSaveLastError();

H
Hu Tao 已提交
2213
    if (macTapIfName) {
2214
        VIR_FORCE_CLOSE(tapfd);
H
Hu Tao 已提交
2215 2216 2217
        ignore_value(virNetDevTapDelete(macTapIfName));
        VIR_FREE(macTapIfName);
    }
2218 2219

 err0:
2220 2221
    if (!save_err)
        save_err = virSaveLastError();
2222
    ignore_value(virNetDevBridgeDelete(network->def->bridge));
2223

2224 2225 2226 2227
    if (save_err) {
        virSetError(save_err);
        virFreeError(save_err);
    }
2228 2229 2230
    return -1;
}

2231
static int networkShutdownNetworkVirtual(struct network_driver *driver,
2232 2233
                                        virNetworkObjPtr network)
{
2234
    virNetDevBandwidthClear(network->def->bridge);
2235

2236 2237 2238 2239 2240 2241 2242 2243
    if (network->radvdPid > 0) {
        char *radvdpidbase;

        kill(network->radvdPid, SIGTERM);
        /* attempt to delete the pidfile we created */
        if (!(radvdpidbase = networkRadvdPidfileBasename(network->def->name))) {
            virReportOOMError();
        } else {
2244
            virPidFileDelete(NETWORK_PID_DIR, radvdpidbase);
2245 2246 2247 2248
            VIR_FREE(radvdpidbase);
        }
    }

2249 2250 2251
    if (network->dnsmasqPid > 0)
        kill(network->dnsmasqPid, SIGTERM);

2252
    if (network->def->mac_specified) {
2253
        char *macTapIfName = networkBridgeDummyNicName(network->def->bridge);
2254 2255 2256
        if (!macTapIfName) {
            virReportOOMError();
        } else {
2257
            ignore_value(virNetDevTapDelete(macTapIfName));
2258 2259 2260 2261
            VIR_FREE(macTapIfName);
        }
    }

2262
    ignore_value(virNetDevSetOnline(network->def->bridge, 0));
2263

2264 2265
    networkRemoveIptablesRules(driver, network);

2266
    ignore_value(virNetDevBridgeDelete(network->def->bridge));
2267

2268
    /* See if its still alive and really really kill it */
2269
    if (network->dnsmasqPid > 0 &&
2270
        (kill(network->dnsmasqPid, 0) == 0))
2271 2272
        kill(network->dnsmasqPid, SIGKILL);
    network->dnsmasqPid = -1;
2273 2274 2275 2276 2277 2278

    if (network->radvdPid > 0 &&
        (kill(network->radvdPid, 0) == 0))
        kill(network->radvdPid, SIGKILL);
    network->radvdPid = -1;

2279 2280 2281 2282 2283 2284 2285 2286
    return 0;
}

static int
networkStartNetworkExternal(struct network_driver *driver ATTRIBUTE_UNUSED,
                            virNetworkObjPtr network ATTRIBUTE_UNUSED)
{
    /* put anything here that needs to be done each time a network of
2287
     * type BRIDGE, PRIVATE, VEPA, HOSTDEV or PASSTHROUGH is started. On
2288 2289 2290 2291 2292 2293 2294 2295 2296 2297
     * failure, undo anything you've done, and return -1. On success
     * return 0.
     */
    return 0;
}

static int networkShutdownNetworkExternal(struct network_driver *driver ATTRIBUTE_UNUSED,
                                        virNetworkObjPtr network ATTRIBUTE_UNUSED)
{
    /* put anything here that needs to be done each time a network of
2298
     * type BRIDGE, PRIVATE, VEPA, HOSTDEV or PASSTHROUGH is shutdown. On
2299 2300 2301 2302 2303 2304 2305 2306 2307 2308 2309 2310 2311
     * failure, undo anything you've done, and return -1. On success
     * return 0.
     */
    return 0;
}

static int
networkStartNetwork(struct network_driver *driver,
                    virNetworkObjPtr network)
{
    int ret = 0;

    if (virNetworkObjIsActive(network)) {
2312 2313
        virReportError(VIR_ERR_OPERATION_INVALID,
                       "%s", _("network is already active"));
2314 2315 2316
        return -1;
    }

2317 2318 2319
    if (virNetworkObjSetDefTransient(network, true) < 0)
        return -1;

2320 2321 2322 2323 2324 2325 2326 2327 2328 2329 2330 2331
    switch (network->def->forwardType) {

    case VIR_NETWORK_FORWARD_NONE:
    case VIR_NETWORK_FORWARD_NAT:
    case VIR_NETWORK_FORWARD_ROUTE:
        ret = networkStartNetworkVirtual(driver, network);
        break;

    case VIR_NETWORK_FORWARD_BRIDGE:
    case VIR_NETWORK_FORWARD_PRIVATE:
    case VIR_NETWORK_FORWARD_VEPA:
    case VIR_NETWORK_FORWARD_PASSTHROUGH:
2332
    case VIR_NETWORK_FORWARD_HOSTDEV:
2333 2334 2335 2336
        ret = networkStartNetworkExternal(driver, network);
        break;
    }

2337 2338
    if (ret < 0) {
        virNetworkObjUnsetDefTransient(network);
2339
        return ret;
2340
    }
2341 2342 2343 2344

    /* Persist the live configuration now that anything autogenerated
     * is setup.
     */
2345
    if ((ret = virNetworkSaveStatus(NETWORK_STATE_DIR, network)) < 0) {
2346 2347 2348 2349 2350 2351 2352 2353 2354 2355 2356 2357 2358 2359 2360 2361 2362 2363 2364 2365 2366 2367 2368 2369 2370 2371 2372 2373 2374 2375 2376 2377 2378 2379 2380 2381 2382 2383 2384 2385 2386 2387 2388 2389 2390 2391 2392 2393
        goto error;
    }

    VIR_INFO("Starting up network '%s'", network->def->name);
    network->active = 1;

error:
    if (ret < 0) {
        virErrorPtr save_err = virSaveLastError();
        int save_errno = errno;
        networkShutdownNetwork(driver, network);
        virSetError(save_err);
        virFreeError(save_err);
        errno = save_errno;
    }
    return ret;
}

static int networkShutdownNetwork(struct network_driver *driver,
                                        virNetworkObjPtr network)
{
    int ret = 0;
    char *stateFile;

    VIR_INFO("Shutting down network '%s'", network->def->name);

    if (!virNetworkObjIsActive(network))
        return 0;

    stateFile = virNetworkConfigFile(NETWORK_STATE_DIR, network->def->name);
    if (!stateFile)
        return -1;

    unlink(stateFile);
    VIR_FREE(stateFile);

    switch (network->def->forwardType) {

    case VIR_NETWORK_FORWARD_NONE:
    case VIR_NETWORK_FORWARD_NAT:
    case VIR_NETWORK_FORWARD_ROUTE:
        ret = networkShutdownNetworkVirtual(driver, network);
        break;

    case VIR_NETWORK_FORWARD_BRIDGE:
    case VIR_NETWORK_FORWARD_PRIVATE:
    case VIR_NETWORK_FORWARD_VEPA:
    case VIR_NETWORK_FORWARD_PASSTHROUGH:
2394
    case VIR_NETWORK_FORWARD_HOSTDEV:
2395 2396 2397 2398
        ret = networkShutdownNetworkExternal(driver, network);
        break;
    }

2399
    network->active = 0;
2400
    virNetworkObjUnsetDefTransient(network);
2401
    return ret;
2402 2403 2404
}


2405 2406 2407 2408 2409
static virNetworkPtr networkLookupByUUID(virConnectPtr conn,
                                         const unsigned char *uuid) {
    struct network_driver *driver = conn->networkPrivateData;
    virNetworkObjPtr network;
    virNetworkPtr ret = NULL;
2410

2411
    networkDriverLock(driver);
2412
    network = virNetworkFindByUUID(&driver->networks, uuid);
2413
    networkDriverUnlock(driver);
2414
    if (!network) {
2415 2416
        virReportError(VIR_ERR_NO_NETWORK,
                       "%s", _("no network with matching uuid"));
2417
        goto cleanup;
2418 2419
    }

2420 2421 2422
    ret = virGetNetwork(conn, network->def->name, network->def->uuid);

cleanup:
2423 2424
    if (network)
        virNetworkObjUnlock(network);
2425
    return ret;
2426 2427
}

2428 2429 2430 2431 2432 2433
static virNetworkPtr networkLookupByName(virConnectPtr conn,
                                         const char *name) {
    struct network_driver *driver = conn->networkPrivateData;
    virNetworkObjPtr network;
    virNetworkPtr ret = NULL;

2434
    networkDriverLock(driver);
2435
    network = virNetworkFindByName(&driver->networks, name);
2436
    networkDriverUnlock(driver);
2437
    if (!network) {
2438 2439
        virReportError(VIR_ERR_NO_NETWORK,
                       _("no network with matching name '%s'"), name);
2440
        goto cleanup;
2441 2442
    }

2443 2444 2445
    ret = virGetNetwork(conn, network->def->name, network->def->uuid);

cleanup:
2446 2447
    if (network)
        virNetworkObjUnlock(network);
2448
    return ret;
2449 2450 2451 2452
}

static virDrvOpenStatus networkOpenNetwork(virConnectPtr conn,
                                           virConnectAuthPtr auth ATTRIBUTE_UNUSED,
2453 2454 2455 2456
                                           unsigned int flags)
{
    virCheckFlags(VIR_CONNECT_RO, VIR_DRV_OPEN_ERROR);

2457 2458 2459 2460 2461 2462 2463 2464 2465 2466 2467 2468 2469
    if (!driverState)
        return VIR_DRV_OPEN_DECLINED;

    conn->networkPrivateData = driverState;
    return VIR_DRV_OPEN_SUCCESS;
}

static int networkCloseNetwork(virConnectPtr conn) {
    conn->networkPrivateData = NULL;
    return 0;
}

static int networkNumNetworks(virConnectPtr conn) {
2470
    int nactive = 0, i;
2471
    struct network_driver *driver = conn->networkPrivateData;
2472

2473 2474 2475
    networkDriverLock(driver);
    for (i = 0 ; i < driver->networks.count ; i++) {
        virNetworkObjLock(driver->networks.objs[i]);
D
Daniel P. Berrange 已提交
2476
        if (virNetworkObjIsActive(driver->networks.objs[i]))
2477
            nactive++;
2478 2479 2480
        virNetworkObjUnlock(driver->networks.objs[i]);
    }
    networkDriverUnlock(driver);
2481

2482 2483 2484 2485
    return nactive;
}

static int networkListNetworks(virConnectPtr conn, char **const names, int nnames) {
2486
    struct network_driver *driver = conn->networkPrivateData;
2487
    int got = 0, i;
2488

2489
    networkDriverLock(driver);
2490
    for (i = 0 ; i < driver->networks.count && got < nnames ; i++) {
2491
        virNetworkObjLock(driver->networks.objs[i]);
D
Daniel P. Berrange 已提交
2492
        if (virNetworkObjIsActive(driver->networks.objs[i])) {
2493
            if (!(names[got] = strdup(driver->networks.objs[i]->def->name))) {
2494
                virNetworkObjUnlock(driver->networks.objs[i]);
2495
                virReportOOMError();
2496 2497 2498 2499
                goto cleanup;
            }
            got++;
        }
2500
        virNetworkObjUnlock(driver->networks.objs[i]);
2501
    }
2502 2503
    networkDriverUnlock(driver);

2504 2505 2506
    return got;

 cleanup:
2507
    networkDriverUnlock(driver);
2508 2509 2510 2511 2512 2513
    for (i = 0 ; i < got ; i++)
        VIR_FREE(names[i]);
    return -1;
}

static int networkNumDefinedNetworks(virConnectPtr conn) {
2514
    int ninactive = 0, i;
2515
    struct network_driver *driver = conn->networkPrivateData;
2516

2517 2518 2519
    networkDriverLock(driver);
    for (i = 0 ; i < driver->networks.count ; i++) {
        virNetworkObjLock(driver->networks.objs[i]);
D
Daniel P. Berrange 已提交
2520
        if (!virNetworkObjIsActive(driver->networks.objs[i]))
2521
            ninactive++;
2522 2523 2524
        virNetworkObjUnlock(driver->networks.objs[i]);
    }
    networkDriverUnlock(driver);
2525

2526 2527 2528 2529
    return ninactive;
}

static int networkListDefinedNetworks(virConnectPtr conn, char **const names, int nnames) {
2530
    struct network_driver *driver = conn->networkPrivateData;
2531
    int got = 0, i;
2532

2533
    networkDriverLock(driver);
2534
    for (i = 0 ; i < driver->networks.count && got < nnames ; i++) {
2535
        virNetworkObjLock(driver->networks.objs[i]);
D
Daniel P. Berrange 已提交
2536
        if (!virNetworkObjIsActive(driver->networks.objs[i])) {
2537
            if (!(names[got] = strdup(driver->networks.objs[i]->def->name))) {
2538
                virNetworkObjUnlock(driver->networks.objs[i]);
2539
                virReportOOMError();
2540 2541 2542 2543
                goto cleanup;
            }
            got++;
        }
2544
        virNetworkObjUnlock(driver->networks.objs[i]);
2545
    }
2546
    networkDriverUnlock(driver);
2547 2548 2549
    return got;

 cleanup:
2550
    networkDriverUnlock(driver);
2551 2552 2553 2554 2555
    for (i = 0 ; i < got ; i++)
        VIR_FREE(names[i]);
    return -1;
}

2556 2557 2558 2559 2560 2561 2562 2563 2564 2565 2566 2567 2568 2569 2570 2571
static int
networkListAllNetworks(virConnectPtr conn,
                       virNetworkPtr **nets,
                       unsigned int flags)
{
    struct network_driver *driver = conn->networkPrivateData;
    int ret = -1;

    virCheckFlags(VIR_CONNECT_LIST_NETWORKS_FILTERS_ALL, -1);

    networkDriverLock(driver);
    ret = virNetworkList(conn, driver->networks, nets, flags);
    networkDriverUnlock(driver);

    return ret;
}
2572 2573 2574

static int networkIsActive(virNetworkPtr net)
{
2575
    struct network_driver *driver = net->conn->networkPrivateData;
2576 2577 2578 2579 2580 2581 2582
    virNetworkObjPtr obj;
    int ret = -1;

    networkDriverLock(driver);
    obj = virNetworkFindByUUID(&driver->networks, net->uuid);
    networkDriverUnlock(driver);
    if (!obj) {
2583
        virReportError(VIR_ERR_NO_NETWORK, NULL);
2584 2585 2586 2587 2588 2589 2590 2591 2592 2593 2594 2595
        goto cleanup;
    }
    ret = virNetworkObjIsActive(obj);

cleanup:
    if (obj)
        virNetworkObjUnlock(obj);
    return ret;
}

static int networkIsPersistent(virNetworkPtr net)
{
2596
    struct network_driver *driver = net->conn->networkPrivateData;
2597 2598 2599 2600 2601 2602 2603
    virNetworkObjPtr obj;
    int ret = -1;

    networkDriverLock(driver);
    obj = virNetworkFindByUUID(&driver->networks, net->uuid);
    networkDriverUnlock(driver);
    if (!obj) {
2604
        virReportError(VIR_ERR_NO_NETWORK, NULL);
2605 2606 2607 2608 2609 2610 2611 2612 2613 2614 2615
        goto cleanup;
    }
    ret = obj->persistent;

cleanup:
    if (obj)
        virNetworkObjUnlock(obj);
    return ret;
}


2616 2617 2618 2619
static int
networkValidate(virNetworkDefPtr def)
{
    int ii;
2620 2621
    bool vlanUsed, vlanAllowed, badVlanUse = false;
    virPortGroupDefPtr defaultPortGroup = NULL;
2622 2623 2624 2625 2626 2627 2628 2629 2630 2631 2632

    /* The only type of networks that currently support transparent
     * vlan configuration are those using hostdev sr-iov devices from
     * a pool, and those using an Open vSwitch bridge.
     */

    vlanAllowed = (def->forwardType == VIR_NETWORK_FORWARD_BRIDGE &&
                   def->virtPortProfile &&
                   def->virtPortProfile->virtPortType == VIR_NETDEV_VPORT_PROFILE_OPENVSWITCH);

    vlanUsed = def->vlan.nTags > 0;
2633 2634 2635 2636 2637 2638 2639 2640 2641 2642 2643 2644 2645 2646 2647 2648 2649
    for (ii = 0; ii < def->nPortGroups; ii++) {
        if (vlanUsed || def->portGroups[ii].vlan.nTags > 0) {
            /* anyone using this portgroup will get a vlan tag. Verify
             * that they will also be using an openvswitch connection,
             * as that is the only type of network that currently
             * supports a vlan tag.
             */
            if (def->portGroups[ii].virtPortProfile) {
                if (def->forwardType != VIR_NETWORK_FORWARD_BRIDGE ||
                    def->portGroups[ii].virtPortProfile->virtPortType
                    != VIR_NETDEV_VPORT_PROFILE_OPENVSWITCH) {
                    badVlanUse = true;
                }
            } else if (!vlanAllowed) {
                /* virtualport taken from base network definition */
                badVlanUse = true;
            }
2650
        }
2651 2652 2653 2654 2655 2656
        if (def->portGroups[ii].isDefault) {
            if (defaultPortGroup) {
                virReportError(VIR_ERR_CONFIG_UNSUPPORTED,
                               _("network '%s' has multiple default "
                                 "<portgroup> elements (%s and %s), "
                                 "but only one default is allowed"),
2657
                               def->name, defaultPortGroup->name,
2658
                               def->portGroups[ii].name);
2659
                return -1;
2660
            }
2661
            defaultPortGroup = &def->portGroups[ii];
2662
        }
2663
    }
2664 2665 2666 2667 2668 2669 2670
    if (badVlanUse ||
        (vlanUsed && !vlanAllowed && !defaultPortGroup)) {
        /* NB: if defaultPortGroup is set, we don't directly look at
         * vlanUsed && !vlanAllowed, because the network will never be
         * used without having a portgroup added in, so all necessary
         * checks were done in the loop above.
         */
2671 2672 2673 2674 2675 2676 2677 2678 2679
        virReportError(VIR_ERR_CONFIG_UNSUPPORTED,
                       _("<vlan> element specified for network %s, "
                         "whose type doesn't support vlan configuration"),
                       def->name);
        return -1;
    }
    return 0;
}

2680
static virNetworkPtr networkCreate(virConnectPtr conn, const char *xml) {
2681
    struct network_driver *driver = conn->networkPrivateData;
2682
    virNetworkDefPtr def;
2683
    virNetworkObjPtr network = NULL;
2684
    virNetworkPtr ret = NULL;
2685

2686 2687
    networkDriverLock(driver);

2688
    if (!(def = virNetworkDefParseString(xml)))
2689
        goto cleanup;
2690

2691 2692 2693
    if (virNetworkObjIsDuplicate(&driver->networks, def, 1) < 0)
        goto cleanup;

2694 2695 2696 2697 2698 2699
    /* Only the three L3 network types that are configured by libvirt
     * need to have a bridge device name / mac address provided
     */
    if (def->forwardType == VIR_NETWORK_FORWARD_NONE ||
        def->forwardType == VIR_NETWORK_FORWARD_NAT ||
        def->forwardType == VIR_NETWORK_FORWARD_ROUTE) {
2700

2701 2702 2703 2704 2705
        if (virNetworkSetBridgeName(&driver->networks, def, 1))
            goto cleanup;

        virNetworkSetBridgeMacAddr(def);
    }
2706

2707 2708 2709
    if (networkValidate(def) < 0)
       goto cleanup;

2710 2711 2712 2713
    /* NB: "live" is false because this transient network hasn't yet
     * been started
     */
    if (!(network = virNetworkAssignDef(&driver->networks, def, false)))
2714 2715
        goto cleanup;
    def = NULL;
2716

2717
    if (networkStartNetwork(driver, network) < 0) {
2718 2719
        virNetworkRemoveInactive(&driver->networks,
                                 network);
2720
        network = NULL;
2721
        goto cleanup;
2722 2723
    }

2724
    VIR_INFO("Creating network '%s'", network->def->name);
2725 2726 2727 2728
    ret = virGetNetwork(conn, network->def->name, network->def->uuid);

cleanup:
    virNetworkDefFree(def);
2729 2730 2731
    if (network)
        virNetworkObjUnlock(network);
    networkDriverUnlock(driver);
2732
    return ret;
2733 2734 2735
}

static virNetworkPtr networkDefine(virConnectPtr conn, const char *xml) {
2736
    struct network_driver *driver = conn->networkPrivateData;
2737
    virNetworkIpDefPtr ipdef, ipv4def = NULL;
2738
    virNetworkDefPtr def;
2739
    bool freeDef = true;
2740
    virNetworkObjPtr network = NULL;
2741
    virNetworkPtr ret = NULL;
2742
    int ii;
2743
    dnsmasqContext* dctx = NULL;
2744

2745 2746
    networkDriverLock(driver);

2747
    if (!(def = virNetworkDefParseString(xml)))
2748
        goto cleanup;
2749

E
Eric Blake 已提交
2750
    if (virNetworkObjIsDuplicate(&driver->networks, def, 0) < 0)
2751 2752
        goto cleanup;

2753 2754 2755 2756 2757 2758
    /* Only the three L3 network types that are configured by libvirt
     * need to have a bridge device name / mac address provided
     */
    if (def->forwardType == VIR_NETWORK_FORWARD_NONE ||
        def->forwardType == VIR_NETWORK_FORWARD_NAT ||
        def->forwardType == VIR_NETWORK_FORWARD_ROUTE) {
2759

2760 2761 2762 2763 2764
        if (virNetworkSetBridgeName(&driver->networks, def, 1))
            goto cleanup;

        virNetworkSetBridgeMacAddr(def);
    }
2765

2766
    /* We only support dhcp on one IPv4 address per defined network */
2767
    for (ii = 0;
2768
         (ipdef = virNetworkDefGetIpByIndex(def, AF_UNSPEC, ii));
2769
         ii++) {
2770
        if (VIR_SOCKET_ADDR_IS_FAMILY(&ipdef->address, AF_INET)) {
2771 2772
            if (ipdef->nranges || ipdef->nhosts) {
                if (ipv4def) {
2773 2774 2775 2776
                    virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s",
                                   _("Multiple dhcp sections found. "
                                     "dhcp is supported only for a "
                                     "single IPv4 address on each network"));
2777 2778 2779 2780 2781 2782 2783
                    goto cleanup;
                } else {
                    ipv4def = ipdef;
                }
            }
        }
    }
2784

2785 2786 2787
    if (networkValidate(def) < 0)
       goto cleanup;

2788
    if (!(network = virNetworkAssignDef(&driver->networks, def, false)))
2789 2790 2791 2792 2793 2794 2795 2796 2797 2798 2799
        goto cleanup;
    freeDef = false;

    network->persistent = 1;

    if (virNetworkSaveConfig(driver->networkConfigDir, def) < 0) {
        virNetworkRemoveInactive(&driver->networks, network);
        network = NULL;
        goto cleanup;
    }

2800
    if (ipv4def) {
2801
        dctx = dnsmasqContextNew(def->name, DNSMASQ_STATE_DIR);
2802
        if (dctx == NULL ||
2803
            networkBuildDnsmasqHostsfile(dctx, ipv4def, def->dns) < 0 ||
2804
            dnsmasqSave(dctx) < 0)
2805 2806 2807
            goto cleanup;
    }

2808 2809
    VIR_INFO("Defining network '%s'", def->name);
    ret = virGetNetwork(conn, def->name, def->uuid);
2810 2811

cleanup:
2812 2813
    if (freeDef)
       virNetworkDefFree(def);
2814
    dnsmasqContextFree(dctx);
2815 2816 2817
    if (network)
        virNetworkObjUnlock(network);
    networkDriverUnlock(driver);
2818
    return ret;
2819 2820 2821
}

static int networkUndefine(virNetworkPtr net) {
2822
    struct network_driver *driver = net->conn->networkPrivateData;
2823
    virNetworkObjPtr network;
2824 2825
    virNetworkIpDefPtr ipdef;
    bool dhcp_present = false, v6present = false;
2826
    int ret = -1, ii;
2827

2828 2829
    networkDriverLock(driver);

2830
    network = virNetworkFindByUUID(&driver->networks, net->uuid);
2831
    if (!network) {
2832 2833
        virReportError(VIR_ERR_NO_NETWORK,
                       "%s", _("no network with matching uuid"));
2834
        goto cleanup;
2835 2836
    }

D
Daniel P. Berrange 已提交
2837
    if (virNetworkObjIsActive(network)) {
2838 2839
        virReportError(VIR_ERR_OPERATION_INVALID,
                       "%s", _("network is still active"));
2840
        goto cleanup;
2841 2842
    }

2843
    if (virNetworkDeleteConfig(driver->networkConfigDir,
2844 2845
                               driver->networkAutostartDir,
                               network) < 0)
2846
        goto cleanup;
2847

2848 2849
    /* we only support dhcp on one IPv4 address per defined network */
    for (ii = 0;
2850
         (ipdef = virNetworkDefGetIpByIndex(network->def, AF_UNSPEC, ii));
2851
         ii++) {
2852
        if (VIR_SOCKET_ADDR_IS_FAMILY(&ipdef->address, AF_INET)) {
2853 2854
            if (ipdef->nranges || ipdef->nhosts)
                dhcp_present = true;
2855
        } else if (VIR_SOCKET_ADDR_IS_FAMILY(&ipdef->address, AF_INET6)) {
2856 2857
            v6present = true;
        }
2858
    }
2859 2860

    if (dhcp_present) {
2861
        char *leasefile;
2862 2863 2864 2865 2866 2867
        dnsmasqContext *dctx = dnsmasqContextNew(network->def->name, DNSMASQ_STATE_DIR);
        if (dctx == NULL)
            goto cleanup;

        dnsmasqDelete(dctx);
        dnsmasqContextFree(dctx);
2868 2869 2870 2871 2872 2873

        leasefile = networkDnsmasqLeaseFileName(network->def->name);
        if (!leasefile)
            goto cleanup;
        unlink(leasefile);
        VIR_FREE(leasefile);
2874 2875
    }

2876 2877 2878 2879 2880 2881 2882 2883 2884 2885 2886 2887 2888 2889 2890 2891
    if (v6present) {
        char *configfile = networkRadvdConfigFileName(network->def->name);

        if (!configfile) {
            virReportOOMError();
            goto cleanup;
        }
        unlink(configfile);
        VIR_FREE(configfile);

        char *radvdpidbase = networkRadvdPidfileBasename(network->def->name);

        if (!(radvdpidbase)) {
            virReportOOMError();
            goto cleanup;
        }
2892
        virPidFileDelete(NETWORK_PID_DIR, radvdpidbase);
2893 2894 2895 2896
        VIR_FREE(radvdpidbase);

    }

2897
    VIR_INFO("Undefining network '%s'", network->def->name);
2898 2899
    virNetworkRemoveInactive(&driver->networks,
                             network);
2900
    network = NULL;
2901
    ret = 0;
2902

2903
cleanup:
2904 2905 2906
    if (network)
        virNetworkObjUnlock(network);
    networkDriverUnlock(driver);
2907
    return ret;
2908 2909
}

2910 2911 2912 2913 2914 2915 2916 2917 2918 2919
static int
networkUpdate(virNetworkPtr net,
              unsigned int command,
              unsigned int section,
              int parentIndex,
              const char *xml,
              unsigned int flags)
{
    struct network_driver *driver = net->conn->networkPrivateData;
    virNetworkObjPtr network = NULL;
2920 2921 2922 2923
    int isActive, ret = -1, ii;
    virNetworkIpDefPtr ipdef;
    bool oldDhcpActive = false;

2924 2925 2926 2927 2928 2929 2930 2931 2932 2933 2934 2935 2936 2937

    virCheckFlags(VIR_NETWORK_UPDATE_AFFECT_LIVE |
                  VIR_NETWORK_UPDATE_AFFECT_CONFIG,
                  -1);

    networkDriverLock(driver);

    network = virNetworkFindByUUID(&driver->networks, net->uuid);
    if (!network) {
        virReportError(VIR_ERR_NO_NETWORK,
                       "%s", _("no network with matching uuid"));
        goto cleanup;
    }

2938 2939 2940 2941 2942 2943 2944 2945 2946 2947
    /* see if we are listening for dhcp pre-modification */
    for (ii = 0;
         (ipdef = virNetworkDefGetIpByIndex(network->def, AF_INET, ii));
         ii++) {
        if (ipdef->nranges || ipdef->nhosts) {
            oldDhcpActive = true;
            break;
        }
    }

2948 2949 2950 2951
    /* VIR_NETWORK_UPDATE_AFFECT_CURRENT means "change LIVE if network
     * is active, else change CONFIG
    */
    isActive = virNetworkObjIsActive(network);
2952 2953
    if ((flags & (VIR_NETWORK_UPDATE_AFFECT_LIVE |
                  VIR_NETWORK_UPDATE_AFFECT_CONFIG)) ==
2954 2955 2956 2957 2958 2959 2960 2961 2962 2963 2964 2965 2966 2967 2968 2969 2970 2971 2972 2973 2974 2975 2976 2977 2978 2979 2980 2981 2982 2983 2984 2985 2986 2987 2988
        VIR_NETWORK_UPDATE_AFFECT_CURRENT) {
        if (isActive)
            flags |= VIR_NETWORK_UPDATE_AFFECT_LIVE;
        else
            flags |= VIR_NETWORK_UPDATE_AFFECT_CONFIG;
    }

    /* update the network config in memory/on disk */
    if (virNetworkObjUpdate(network, command, section, parentIndex, xml, flags) < 0)
        goto cleanup;

    if (flags & VIR_NETWORK_UPDATE_AFFECT_CONFIG) {
        /* save updated persistent config to disk */
        if (virNetworkSaveConfig(driver->networkConfigDir,
                                 virNetworkObjGetPersistentDef(network)) < 0) {
            goto cleanup;
        }
    }

    if (isActive && (flags & VIR_NETWORK_UPDATE_AFFECT_LIVE)) {
        /* rewrite dnsmasq host files, restart dnsmasq, update iptables
         * rules, etc, according to which section was modified. Note that
         * some sections require multiple actions, so a single switch
         * statement is inadequate.
         */
        if (section == VIR_NETWORK_SECTION_BRIDGE ||
            section == VIR_NETWORK_SECTION_DOMAIN ||
            section == VIR_NETWORK_SECTION_IP ||
            section == VIR_NETWORK_SECTION_IP_DHCP_RANGE) {
            /* these sections all change things on the dnsmasq commandline,
             * so we need to kill and restart dnsmasq.
             */
            if (networkRestartDhcpDaemon(network) < 0)
                goto cleanup;

2989 2990 2991 2992 2993 2994 2995 2996 2997 2998 2999 3000 3001 3002 3003 3004 3005 3006 3007 3008 3009 3010 3011 3012
        } else if (section == VIR_NETWORK_SECTION_IP_DHCP_HOST) {
            /* if we previously weren't listening for dhcp and now we
             * are (or vice-versa) then we need to do a restart,
             * otherwise we just need to do a refresh (redo the config
             * files and send SIGHUP)
             */
            bool newDhcpActive = false;

            for (ii = 0;
                 (ipdef = virNetworkDefGetIpByIndex(network->def, AF_INET, ii));
                 ii++) {
                if (ipdef->nranges || ipdef->nhosts) {
                    newDhcpActive = true;
                    break;
                }
            }

            if ((newDhcpActive != oldDhcpActive &&
                networkRestartDhcpDaemon(network) < 0) ||
                networkRefreshDhcpDaemon(network) < 0) {
                goto cleanup;
            }

        } else if (section == VIR_NETWORK_SECTION_DNS_HOST ||
3013 3014 3015 3016 3017 3018 3019 3020 3021 3022 3023 3024 3025 3026 3027 3028 3029 3030 3031
                   section == VIR_NETWORK_SECTION_DNS_TXT ||
                   section == VIR_NETWORK_SECTION_DNS_SRV) {
            /* these sections only change things in config files, so we
             * can just update the config files and send SIGHUP to
             * dnsmasq.
             */
            if (networkRefreshDhcpDaemon(network) < 0)
                goto cleanup;

        }

        if (section == VIR_NETWORK_SECTION_IP) {
            /* only a change in IP addresses will affect radvd, and all of radvd's
             * config is stored in the conf file which will be re-read with a SIGHUP.
             */
            if (networkRefreshRadvd(network) < 0)
                goto cleanup;
        }

3032 3033 3034 3035 3036 3037
        if ((section == VIR_NETWORK_SECTION_IP ||
             section == VIR_NETWORK_SECTION_FORWARD ||
             section == VIR_NETWORK_SECTION_FORWARD_INTERFACE) &&
           (network->def->forwardType == VIR_NETWORK_FORWARD_NONE ||
            network->def->forwardType == VIR_NETWORK_FORWARD_NAT ||
            network->def->forwardType == VIR_NETWORK_FORWARD_ROUTE)) {
3038 3039 3040 3041 3042 3043 3044 3045 3046 3047 3048 3049 3050 3051 3052 3053 3054 3055 3056
            /* these could affect the iptables rules */
            networkRemoveIptablesRules(driver, network);
            if (networkAddIptablesRules(driver, network) < 0)
                goto cleanup;

        }

        /* save current network state to disk */
        if ((ret = virNetworkSaveStatus(NETWORK_STATE_DIR, network)) < 0)
            goto cleanup;
    }
    ret = 0;
cleanup:
    if (network)
        virNetworkObjUnlock(network);
    networkDriverUnlock(driver);
    return ret;
}

3057
static int networkStart(virNetworkPtr net) {
3058 3059 3060
    struct network_driver *driver = net->conn->networkPrivateData;
    virNetworkObjPtr network;
    int ret = -1;
3061

3062
    networkDriverLock(driver);
3063
    network = virNetworkFindByUUID(&driver->networks, net->uuid);
3064

3065
    if (!network) {
3066 3067
        virReportError(VIR_ERR_NO_NETWORK,
                       "%s", _("no network with matching uuid"));
3068
        goto cleanup;
3069 3070
    }

3071
    ret = networkStartNetwork(driver, network);
3072 3073

cleanup:
3074 3075
    if (network)
        virNetworkObjUnlock(network);
3076
    networkDriverUnlock(driver);
3077
    return ret;
3078 3079 3080
}

static int networkDestroy(virNetworkPtr net) {
3081 3082 3083
    struct network_driver *driver = net->conn->networkPrivateData;
    virNetworkObjPtr network;
    int ret = -1;
3084

3085
    networkDriverLock(driver);
3086
    network = virNetworkFindByUUID(&driver->networks, net->uuid);
3087

3088
    if (!network) {
3089 3090
        virReportError(VIR_ERR_NO_NETWORK,
                       "%s", _("no network with matching uuid"));
3091
        goto cleanup;
3092 3093
    }

D
Daniel P. Berrange 已提交
3094
    if (!virNetworkObjIsActive(network)) {
3095 3096
        virReportError(VIR_ERR_OPERATION_INVALID,
                       "%s", _("network is not active"));
3097 3098 3099
        goto cleanup;
    }

3100
    ret = networkShutdownNetwork(driver, network);
3101
    if (!network->persistent) {
3102 3103 3104 3105
        virNetworkRemoveInactive(&driver->networks,
                                 network);
        network = NULL;
    }
3106

3107
cleanup:
3108 3109
    if (network)
        virNetworkObjUnlock(network);
3110
    networkDriverUnlock(driver);
3111 3112 3113
    return ret;
}

3114
static char *networkGetXMLDesc(virNetworkPtr net,
3115
                               unsigned int flags)
3116
{
3117 3118
    struct network_driver *driver = net->conn->networkPrivateData;
    virNetworkObjPtr network;
3119
    virNetworkDefPtr def;
3120
    char *ret = NULL;
3121

3122
    virCheckFlags(VIR_NETWORK_XML_INACTIVE, NULL);
3123

3124
    networkDriverLock(driver);
3125
    network = virNetworkFindByUUID(&driver->networks, net->uuid);
3126 3127
    networkDriverUnlock(driver);

3128
    if (!network) {
3129 3130
        virReportError(VIR_ERR_NO_NETWORK,
                       "%s", _("no network with matching uuid"));
3131
        goto cleanup;
3132 3133
    }

3134 3135 3136 3137 3138 3139
    if ((flags & VIR_NETWORK_XML_INACTIVE) && network->newDef)
        def = network->newDef;
    else
        def = network->def;

    ret = virNetworkDefFormat(def, flags);
3140 3141

cleanup:
3142 3143
    if (network)
        virNetworkObjUnlock(network);
3144
    return ret;
3145 3146 3147
}

static char *networkGetBridgeName(virNetworkPtr net) {
3148 3149 3150 3151
    struct network_driver *driver = net->conn->networkPrivateData;
    virNetworkObjPtr network;
    char *bridge = NULL;

3152
    networkDriverLock(driver);
3153
    network = virNetworkFindByUUID(&driver->networks, net->uuid);
3154 3155
    networkDriverUnlock(driver);

3156
    if (!network) {
3157 3158
        virReportError(VIR_ERR_NO_NETWORK,
                       "%s", _("no network with matching id"));
3159
        goto cleanup;
3160 3161
    }

3162
    if (!(network->def->bridge)) {
3163 3164 3165
        virReportError(VIR_ERR_INTERNAL_ERROR,
                       _("network '%s' does not have a bridge name."),
                       network->def->name);
3166 3167 3168
        goto cleanup;
    }

3169
    bridge = strdup(network->def->bridge);
3170
    if (!bridge)
3171
        virReportOOMError();
3172 3173

cleanup:
3174 3175
    if (network)
        virNetworkObjUnlock(network);
3176 3177 3178 3179 3180
    return bridge;
}

static int networkGetAutostart(virNetworkPtr net,
                             int *autostart) {
3181 3182 3183
    struct network_driver *driver = net->conn->networkPrivateData;
    virNetworkObjPtr network;
    int ret = -1;
3184

3185
    networkDriverLock(driver);
3186
    network = virNetworkFindByUUID(&driver->networks, net->uuid);
3187
    networkDriverUnlock(driver);
3188
    if (!network) {
3189 3190
        virReportError(VIR_ERR_NO_NETWORK,
                       "%s", _("no network with matching uuid"));
3191
        goto cleanup;
3192 3193 3194
    }

    *autostart = network->autostart;
3195
    ret = 0;
3196

3197
cleanup:
3198 3199
    if (network)
        virNetworkObjUnlock(network);
3200
    return ret;
3201 3202 3203
}

static int networkSetAutostart(virNetworkPtr net,
3204
                               int autostart) {
3205 3206
    struct network_driver *driver = net->conn->networkPrivateData;
    virNetworkObjPtr network;
3207
    char *configFile = NULL, *autostartLink = NULL;
3208
    int ret = -1;
3209

3210
    networkDriverLock(driver);
3211
    network = virNetworkFindByUUID(&driver->networks, net->uuid);
3212

3213
    if (!network) {
3214 3215
        virReportError(VIR_ERR_NO_NETWORK,
                       "%s", _("no network with matching uuid"));
3216
        goto cleanup;
3217 3218
    }

3219
    if (!network->persistent) {
3220 3221
        virReportError(VIR_ERR_OPERATION_INVALID,
                       "%s", _("cannot set autostart for transient network"));
3222 3223 3224
        goto cleanup;
    }

3225 3226
    autostart = (autostart != 0);

3227
    if (network->autostart != autostart) {
3228
        if ((configFile = virNetworkConfigFile(driver->networkConfigDir, network->def->name)) == NULL)
3229
            goto cleanup;
3230
        if ((autostartLink = virNetworkConfigFile(driver->networkAutostartDir, network->def->name)) == NULL)
3231 3232
            goto cleanup;

3233
        if (autostart) {
3234
            if (virFileMakePath(driver->networkAutostartDir) < 0) {
3235
                virReportSystemError(errno,
3236 3237
                                     _("cannot create autostart directory '%s'"),
                                     driver->networkAutostartDir);
3238 3239
                goto cleanup;
            }
3240

3241
            if (symlink(configFile, autostartLink) < 0) {
3242
                virReportSystemError(errno,
3243
                                     _("Failed to create symlink '%s' to '%s'"),
3244
                                     autostartLink, configFile);
3245 3246 3247
                goto cleanup;
            }
        } else {
3248
            if (unlink(autostartLink) < 0 && errno != ENOENT && errno != ENOTDIR) {
3249
                virReportSystemError(errno,
3250
                                     _("Failed to delete symlink '%s'"),
3251
                                     autostartLink);
3252 3253
                goto cleanup;
            }
3254 3255
        }

3256
        network->autostart = autostart;
3257
    }
3258
    ret = 0;
3259

3260
cleanup:
3261 3262
    VIR_FREE(configFile);
    VIR_FREE(autostartLink);
3263 3264
    if (network)
        virNetworkObjUnlock(network);
3265
    networkDriverUnlock(driver);
3266
    return ret;
3267 3268 3269 3270 3271
}


static virNetworkDriver networkDriver = {
    "Network",
3272 3273 3274 3275 3276 3277
    .open = networkOpenNetwork, /* 0.2.0 */
    .close = networkCloseNetwork, /* 0.2.0 */
    .numOfNetworks = networkNumNetworks, /* 0.2.0 */
    .listNetworks = networkListNetworks, /* 0.2.0 */
    .numOfDefinedNetworks = networkNumDefinedNetworks, /* 0.2.0 */
    .listDefinedNetworks = networkListDefinedNetworks, /* 0.2.0 */
3278
    .listAllNetworks = networkListAllNetworks, /* 0.10.2 */
3279 3280 3281 3282 3283
    .networkLookupByUUID = networkLookupByUUID, /* 0.2.0 */
    .networkLookupByName = networkLookupByName, /* 0.2.0 */
    .networkCreateXML = networkCreate, /* 0.2.0 */
    .networkDefineXML = networkDefine, /* 0.2.0 */
    .networkUndefine = networkUndefine, /* 0.2.0 */
3284
    .networkUpdate = networkUpdate, /* 0.10.2 */
3285 3286 3287 3288 3289 3290 3291 3292
    .networkCreate = networkStart, /* 0.2.0 */
    .networkDestroy = networkDestroy, /* 0.2.0 */
    .networkGetXMLDesc = networkGetXMLDesc, /* 0.2.0 */
    .networkGetBridgeName = networkGetBridgeName, /* 0.2.0 */
    .networkGetAutostart = networkGetAutostart, /* 0.2.1 */
    .networkSetAutostart = networkSetAutostart, /* 0.2.1 */
    .networkIsActive = networkIsActive, /* 0.7.3 */
    .networkIsPersistent = networkIsPersistent, /* 0.7.3 */
3293 3294 3295
};

static virStateDriver networkStateDriver = {
3296
    "Network",
3297 3298 3299 3300 3301 3302 3303 3304 3305 3306 3307
    networkStartup,
    networkShutdown,
    networkReload,
    networkActive,
};

int networkRegister(void) {
    virRegisterNetworkDriver(&networkDriver);
    virRegisterStateDriver(&networkStateDriver);
    return 0;
}
3308 3309 3310 3311 3312 3313 3314 3315 3316 3317 3318

/********************************************************/

/* Private API to deal with logical switch capabilities.
 * These functions are exported so that other parts of libvirt can
 * call them, but are not part of the public API and not in the
 * driver's function table. If we ever have more than one network
 * driver, we will need to present these functions via a second
 * "backend" function table.
 */

3319 3320 3321 3322 3323 3324 3325 3326 3327
/* networkCreateInterfacePool:
 * @netdef: the original NetDef from the network
 *
 * Creates an implicit interface pool of VF's when a PF dev is given
 */
static int
networkCreateInterfacePool(virNetworkDefPtr netdef) {
    unsigned int num_virt_fns = 0;
    char **vfname = NULL;
3328
    struct pci_config_address **virt_fns;
3329 3330 3331
    int ret = -1, ii = 0;

    if ((virNetDevGetVirtualFunctions(netdef->forwardPfs->dev,
3332
                                      &vfname, &virt_fns, &num_virt_fns)) < 0) {
3333 3334 3335 3336 3337 3338 3339 3340 3341 3342 3343 3344 3345 3346 3347 3348 3349 3350 3351 3352 3353
        virReportError(VIR_ERR_INTERNAL_ERROR,
                       _("Could not get Virtual functions on %s"),
                       netdef->forwardPfs->dev);
        goto finish;
    }

    if (num_virt_fns == 0) {
        virReportError(VIR_ERR_INTERNAL_ERROR,
                       _("No Vf's present on SRIOV PF %s"),
                       netdef->forwardPfs->dev);
       goto finish;
    }

    if ((VIR_ALLOC_N(netdef->forwardIfs, num_virt_fns)) < 0) {
        virReportOOMError();
        goto finish;
    }

    netdef->nForwardIfs = num_virt_fns;

    for (ii = 0; ii < netdef->nForwardIfs; ii++) {
3354 3355 3356 3357 3358 3359 3360 3361 3362 3363 3364 3365 3366 3367
        if ((netdef->forwardType == VIR_NETWORK_FORWARD_BRIDGE) ||
            (netdef->forwardType == VIR_NETWORK_FORWARD_PRIVATE) ||
            (netdef->forwardType == VIR_NETWORK_FORWARD_VEPA) ||
            (netdef->forwardType == VIR_NETWORK_FORWARD_PASSTHROUGH)) {
            netdef->forwardIfs[ii].type = VIR_NETWORK_FORWARD_HOSTDEV_DEVICE_NETDEV;
            if(vfname[ii]) {
                netdef->forwardIfs[ii].device.dev = strdup(vfname[ii]);
                if (!netdef->forwardIfs[ii].device.dev) {
                    virReportOOMError();
                    goto finish;
                }
            }
            else {
                virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
3368
                               _("Direct mode types require interface names"));
3369 3370
                goto finish;
            }
3371
        }
3372 3373 3374 3375 3376 3377 3378 3379
        else if (netdef->forwardType == VIR_NETWORK_FORWARD_HOSTDEV) {
            /* VF's are always PCI devices */
            netdef->forwardIfs[ii].type = VIR_NETWORK_FORWARD_HOSTDEV_DEVICE_PCI;
            netdef->forwardIfs[ii].device.pci.domain = virt_fns[ii]->domain;
            netdef->forwardIfs[ii].device.pci.bus = virt_fns[ii]->bus;
            netdef->forwardIfs[ii].device.pci.slot = virt_fns[ii]->slot;
            netdef->forwardIfs[ii].device.pci.function = virt_fns[ii]->function;
        }
3380 3381 3382 3383
    }

    ret = 0;
finish:
3384
    for (ii = 0; ii < num_virt_fns; ii++) {
3385
        VIR_FREE(vfname[ii]);
3386 3387
        VIR_FREE(virt_fns[ii]);
    }
3388
    VIR_FREE(vfname);
3389
    VIR_FREE(virt_fns);
3390 3391 3392
    return ret;
}

3393 3394 3395 3396 3397 3398 3399 3400 3401 3402 3403 3404 3405 3406 3407
/* networkAllocateActualDevice:
 * @iface: the original NetDef from the domain
 *
 * Looks up the network reference by iface, allocates a physical
 * device from that network (if appropriate), and returns with the
 * virDomainActualNetDef filled in accordingly. If there are no
 * changes to be made in the netdef, then just leave the actualdef
 * empty.
 *
 * Returns 0 on success, -1 on failure.
 */
int
networkAllocateActualDevice(virDomainNetDefPtr iface)
{
    struct network_driver *driver = driverState;
3408 3409 3410 3411 3412 3413
    enum virDomainNetType actualType = iface->type;
    virNetworkObjPtr network = NULL;
    virNetworkDefPtr netdef = NULL;
    virPortGroupDefPtr portgroup = NULL;
    virNetDevVPortProfilePtr virtport = iface->virtPortProfile;
    virNetDevVlanPtr vlan = NULL;
3414
    virNetworkForwardIfDefPtr dev = NULL;
3415
    int ii;
3416 3417
    int ret = -1;

3418 3419 3420 3421
    /* it's handy to have this initialized if we skip directly to validate */
    if (iface->vlan.nTags > 0)
        vlan = &iface->vlan;

3422
    if (iface->type != VIR_DOMAIN_NET_TYPE_NETWORK)
3423
        goto validate;
3424 3425 3426 3427 3428 3429 3430 3431

    virDomainActualNetDefFree(iface->data.network.actual);
    iface->data.network.actual = NULL;

    networkDriverLock(driver);
    network = virNetworkFindByName(&driver->networks, iface->data.network.name);
    networkDriverUnlock(driver);
    if (!network) {
3432 3433 3434
        virReportError(VIR_ERR_NO_NETWORK,
                       _("no network with matching name '%s'"),
                       iface->data.network.name);
3435
        goto error;
3436 3437
    }
    netdef = network->def;
3438 3439 3440 3441 3442 3443 3444 3445 3446 3447 3448 3449 3450 3451 3452

    /* portgroup can be present for any type of network, in particular
     * for bandwidth information, so we need to check for that and
     * fill it in appropriately for all forward types.
    */
    portgroup = virPortGroupFindByName(netdef, iface->data.network.portgroup);

    /* If there is already interface-specific bandwidth, just use that
     * (already in NetDef). Otherwise, if there is bandwidth info in
     * the portgroup, fill that into the ActualDef.
     */
    if (portgroup && !iface->bandwidth) {
        if (!iface->data.network.actual
            && (VIR_ALLOC(iface->data.network.actual) < 0)) {
            virReportOOMError();
3453
            goto error;
3454 3455
        }

3456 3457
        if (virNetDevBandwidthCopy(&iface->data.network.actual->bandwidth,
                                   portgroup->bandwidth) < 0)
3458
            goto error;
3459 3460 3461 3462 3463 3464 3465 3466 3467 3468 3469 3470 3471
    }

    if ((netdef->forwardType == VIR_NETWORK_FORWARD_NONE) ||
        (netdef->forwardType == VIR_NETWORK_FORWARD_NAT) ||
        (netdef->forwardType == VIR_NETWORK_FORWARD_ROUTE)) {
        /* for these forward types, the actual net type really *is*
         *NETWORK; we just keep the info from the portgroup in
         * iface->data.network.actual
        */
        if (iface->data.network.actual)
            iface->data.network.actual->type = VIR_DOMAIN_NET_TYPE_NETWORK;
    } else if ((netdef->forwardType == VIR_NETWORK_FORWARD_BRIDGE) &&
               netdef->bridge) {
3472 3473 3474 3475 3476

        /* <forward type='bridge'/> <bridge name='xxx'/>
         * is VIR_DOMAIN_NET_TYPE_BRIDGE
         */

3477 3478
        if (!iface->data.network.actual
            && (VIR_ALLOC(iface->data.network.actual) < 0)) {
3479
            virReportOOMError();
3480
            goto error;
3481 3482
        }

3483
        iface->data.network.actual->type = actualType = VIR_DOMAIN_NET_TYPE_BRIDGE;
3484 3485 3486
        iface->data.network.actual->data.bridge.brname = strdup(netdef->bridge);
        if (!iface->data.network.actual->data.bridge.brname) {
            virReportOOMError();
3487
            goto error;
3488 3489
        }

3490 3491 3492 3493 3494 3495 3496 3497
        /* merge virtualports from interface, network, and portgroup to
         * arrive at actual virtualport to use
         */
        if (virNetDevVPortProfileMerge3(&iface->data.network.actual->virtPortProfile,
                                        iface->virtPortProfile,
                                        netdef->virtPortProfile,
                                        portgroup
                                        ? portgroup->virtPortProfile : NULL) < 0) {
3498
            goto error;
3499 3500 3501 3502 3503 3504 3505 3506 3507 3508
        }
        virtport = iface->data.network.actual->virtPortProfile;
        if (virtport) {
            /* only type='openvswitch' is allowed for bridges */
            if (virtport->virtPortType != VIR_NETDEV_VPORT_PROFILE_OPENVSWITCH) {
                virReportError(VIR_ERR_CONFIG_UNSUPPORTED,
                               _("<virtualport type='%s'> not supported for network "
                                 "'%s' which uses a bridge device"),
                               virNetDevVPortTypeToString(virtport->virtPortType),
                               netdef->name);
3509
                goto error;
3510 3511 3512
            }
        }

3513 3514 3515 3516 3517 3518 3519 3520 3521 3522 3523 3524 3525 3526 3527 3528 3529 3530 3531 3532 3533 3534 3535 3536 3537 3538 3539 3540 3541 3542 3543 3544 3545 3546 3547 3548 3549 3550 3551 3552 3553 3554 3555 3556 3557 3558 3559 3560 3561 3562 3563 3564 3565 3566 3567 3568 3569 3570 3571 3572 3573
    } else if (netdef->forwardType == VIR_NETWORK_FORWARD_HOSTDEV) {

        if (!iface->data.network.actual
            && (VIR_ALLOC(iface->data.network.actual) < 0)) {
            virReportOOMError();
            goto error;
        }

        iface->data.network.actual->type = actualType = VIR_DOMAIN_NET_TYPE_HOSTDEV;
        if (netdef->nForwardPfs > 0 && netdef->nForwardIfs <= 0 &&
            networkCreateInterfacePool(netdef) < 0) {
            goto error;
        }

        /* pick first dev with 0 connections */
        for (ii = 0; ii < netdef->nForwardIfs; ii++) {
            if (netdef->forwardIfs[ii].connections == 0) {
                dev = &netdef->forwardIfs[ii];
                break;
            }
        }
        if (!dev) {
            virReportError(VIR_ERR_INTERNAL_ERROR,
                           _("network '%s' requires exclusive access "
                             "to interfaces, but none are available"),
                           netdef->name);
            goto error;
        }
        iface->data.network.actual->data.hostdev.def.parent.type = VIR_DOMAIN_DEVICE_NET;
        iface->data.network.actual->data.hostdev.def.parent.data.net = iface;
        iface->data.network.actual->data.hostdev.def.info = &iface->info;
        iface->data.network.actual->data.hostdev.def.mode = VIR_DOMAIN_HOSTDEV_MODE_SUBSYS;
        iface->data.network.actual->data.hostdev.def.managed = netdef->managed;
        iface->data.network.actual->data.hostdev.def.source.subsys.type = dev->type;
        iface->data.network.actual->data.hostdev.def.source.subsys.u.pci = dev->device.pci;

        /* merge virtualports from interface, network, and portgroup to
         * arrive at actual virtualport to use
         */
        if (virNetDevVPortProfileMerge3(&iface->data.network.actual->virtPortProfile,
                                        iface->virtPortProfile,
                                        netdef->virtPortProfile,
                                        portgroup
                                        ? portgroup->virtPortProfile : NULL) < 0) {
            goto error;
        }
        virtport = iface->data.network.actual->virtPortProfile;
        if (virtport) {
            /* make sure type is supported for hostdev connections */
            if (virtport->virtPortType != VIR_NETDEV_VPORT_PROFILE_8021QBG &&
                virtport->virtPortType != VIR_NETDEV_VPORT_PROFILE_8021QBH) {
                virReportError(VIR_ERR_CONFIG_UNSUPPORTED,
                               _("<virtualport type='%s'> not supported for network "
                                 "'%s' which uses an SR-IOV Virtual Function "
                                 "via PCI passthrough"),
                               virNetDevVPortTypeToString(virtport->virtPortType),
                               netdef->name);
                goto error;
            }
        }

3574 3575 3576 3577 3578 3579 3580 3581 3582
    } else if ((netdef->forwardType == VIR_NETWORK_FORWARD_BRIDGE) ||
               (netdef->forwardType == VIR_NETWORK_FORWARD_PRIVATE) ||
               (netdef->forwardType == VIR_NETWORK_FORWARD_VEPA) ||
               (netdef->forwardType == VIR_NETWORK_FORWARD_PASSTHROUGH)) {

        /* <forward type='bridge|private|vepa|passthrough'> are all
         * VIR_DOMAIN_NET_TYPE_DIRECT.
         */

3583 3584
        if (!iface->data.network.actual
            && (VIR_ALLOC(iface->data.network.actual) < 0)) {
3585
            virReportOOMError();
3586
            goto error;
3587 3588 3589
        }

        /* Set type=direct and appropriate <source mode='xxx'/> */
3590
        iface->data.network.actual->type = actualType = VIR_DOMAIN_NET_TYPE_DIRECT;
3591 3592
        switch (netdef->forwardType) {
        case VIR_NETWORK_FORWARD_BRIDGE:
3593
            iface->data.network.actual->data.direct.mode = VIR_NETDEV_MACVLAN_MODE_BRIDGE;
3594 3595
            break;
        case VIR_NETWORK_FORWARD_PRIVATE:
3596
            iface->data.network.actual->data.direct.mode = VIR_NETDEV_MACVLAN_MODE_PRIVATE;
3597 3598
            break;
        case VIR_NETWORK_FORWARD_VEPA:
3599
            iface->data.network.actual->data.direct.mode = VIR_NETDEV_MACVLAN_MODE_VEPA;
3600 3601
            break;
        case VIR_NETWORK_FORWARD_PASSTHROUGH:
3602
            iface->data.network.actual->data.direct.mode = VIR_NETDEV_MACVLAN_MODE_PASSTHRU;
3603 3604 3605
            break;
        }

3606 3607 3608 3609 3610 3611 3612 3613
        /* merge virtualports from interface, network, and portgroup to
         * arrive at actual virtualport to use
         */
        if (virNetDevVPortProfileMerge3(&iface->data.network.actual->virtPortProfile,
                                        iface->virtPortProfile,
                                        netdef->virtPortProfile,
                                        portgroup
                                        ? portgroup->virtPortProfile : NULL) < 0) {
3614
            goto error;
3615
        }
3616
        virtport = iface->data.network.actual->virtPortProfile;
3617
        if (virtport) {
3618 3619 3620 3621 3622 3623 3624 3625
            /* make sure type is supported for macvtap connections */
            if (virtport->virtPortType != VIR_NETDEV_VPORT_PROFILE_8021QBG &&
                virtport->virtPortType != VIR_NETDEV_VPORT_PROFILE_8021QBH) {
                virReportError(VIR_ERR_CONFIG_UNSUPPORTED,
                               _("<virtualport type='%s'> not supported for network "
                                 "'%s' which uses a macvtap device"),
                               virNetDevVPortTypeToString(virtport->virtPortType),
                               netdef->name);
3626
                goto error;
3627 3628
            }
        }
3629

3630 3631 3632
        /* If there is only a single device, just return it (caller will detect
         * any error if exclusive use is required but could not be acquired).
         */
3633
        if ((netdef->nForwardIfs <= 0) && (netdef->nForwardPfs <= 0)) {
3634 3635 3636 3637
            virReportError(VIR_ERR_INTERNAL_ERROR,
                           _("network '%s' uses a direct mode, but "
                             "has no forward dev and no interface pool"),
                           netdef->name);
3638
            goto error;
3639 3640 3641
        } else {
            /* pick an interface from the pool */

3642 3643 3644 3645 3646
            if (netdef->nForwardPfs > 0 && netdef->nForwardIfs == 0 &&
                networkCreateInterfacePool(netdef) < 0) {
                goto error;
            }

3647 3648 3649 3650 3651
            /* PASSTHROUGH mode, and PRIVATE Mode + 802.1Qbh both
             * require exclusive access to a device, so current
             * connections count must be 0.  Other modes can share, so
             * just search for the one with the lowest number of
             * connections.
3652
             */
3653 3654 3655 3656 3657
            if ((netdef->forwardType == VIR_NETWORK_FORWARD_PASSTHROUGH) ||
                ((netdef->forwardType == VIR_NETWORK_FORWARD_PRIVATE) &&
                 iface->data.network.actual->virtPortProfile &&
                 (iface->data.network.actual->virtPortProfile->virtPortType
                  == VIR_NETDEV_VPORT_PROFILE_8021QBH))) {
3658

3659
                /* pick first dev with 0 connections */
3660
                for (ii = 0; ii < netdef->nForwardIfs; ii++) {
3661
                    if (netdef->forwardIfs[ii].connections == 0) {
3662 3663 3664 3665 3666 3667 3668 3669
                        dev = &netdef->forwardIfs[ii];
                        break;
                    }
                }
            } else {
                /* pick least used dev */
                dev = &netdef->forwardIfs[0];
                for (ii = 1; ii < netdef->nForwardIfs; ii++) {
3670
                    if (netdef->forwardIfs[ii].connections < dev->connections)
3671 3672 3673 3674 3675
                        dev = &netdef->forwardIfs[ii];
                }
            }
            /* dev points at the physical device we want to use */
            if (!dev) {
3676 3677 3678 3679
                virReportError(VIR_ERR_INTERNAL_ERROR,
                               _("network '%s' requires exclusive access "
                                 "to interfaces, but none are available"),
                               netdef->name);
3680
                goto error;
3681
            }
3682
            iface->data.network.actual->data.direct.linkdev = strdup(dev->device.dev);
3683 3684
            if (!iface->data.network.actual->data.direct.linkdev) {
                virReportOOMError();
3685
                goto error;
3686 3687 3688 3689
            }
        }
    }

3690
    if (virNetDevVPortProfileCheckComplete(virtport, true) < 0)
3691
        goto error;
3692

3693 3694 3695 3696 3697 3698 3699 3700 3701 3702 3703 3704 3705 3706 3707 3708 3709 3710 3711 3712 3713 3714 3715 3716 3717 3718 3719 3720 3721 3722 3723 3724 3725 3726 3727 3728 3729 3730 3731 3732 3733 3734 3735
    /* copy appropriate vlan info to actualNet */
    if (iface->vlan.nTags > 0)
        vlan = &iface->vlan;
    else if (portgroup && portgroup->vlan.nTags > 0)
        vlan = &portgroup->vlan;
    else if (netdef && netdef->vlan.nTags > 0)
        vlan = &netdef->vlan;

    if (virNetDevVlanCopy(&iface->data.network.actual->vlan, vlan) < 0)
        goto error;

validate:
    /* make sure that everything now specified for the device is
     * actually supported on this type of network. NB: network,
     * netdev, and iface->data.network.actual may all be NULL.
     */

    if (vlan) {
        /* vlan configuration via libvirt is only supported for
         * PCI Passthrough SR-IOV devices and openvswitch bridges.
         * otherwise log an error and fail
         */
        if (!(actualType == VIR_DOMAIN_NET_TYPE_HOSTDEV ||
              (actualType == VIR_DOMAIN_NET_TYPE_BRIDGE &&
               virtport && virtport->virtPortType
               == VIR_NETDEV_VPORT_PROFILE_OPENVSWITCH))) {
            if (netdef) {
                virReportError(VIR_ERR_CONFIG_UNSUPPORTED,
                               _("an interface connecting to network '%s' "
                                 "is requesting a vlan tag, but that is not "
                                 "supported for this type of network"),
                               netdef->name);
            } else {
                virReportError(VIR_ERR_CONFIG_UNSUPPORTED,
                               _("an interface of type '%s' "
                                 "is requesting a vlan tag, but that is not "
                                 "supported for this type of connection"),
                               virDomainNetTypeToString(iface->type));
            }
            goto error;
        }
    }

3736 3737
    if (dev) {
        /* we are now assured of success, so mark the allocation */
3738
        dev->connections++;
3739 3740 3741 3742 3743 3744 3745 3746 3747
        if (actualType != VIR_DOMAIN_NET_TYPE_HOSTDEV) {
            VIR_DEBUG("Using physical device %s, %d connections",
                      dev->device.dev, dev->connections);
        } else {
            VIR_DEBUG("Using physical device %04x:%02x:%02x.%x, connections %d",
                      dev->device.pci.domain, dev->device.pci.bus,
                      dev->device.pci.slot, dev->device.pci.function,
                      dev->connections);
        }
3748
    }
3749

3750 3751 3752 3753 3754
    if (netdef) {
        netdef->connections++;
        VIR_DEBUG("Using network %s, %d connections",
                  netdef->name, netdef->connections);
    }
3755
    ret = 0;
3756

3757 3758 3759
cleanup:
    if (network)
        virNetworkObjUnlock(network);
3760 3761 3762 3763
    return ret;

error:
    if (iface->type == VIR_DOMAIN_NET_TYPE_NETWORK) {
3764 3765 3766
        virDomainActualNetDefFree(iface->data.network.actual);
        iface->data.network.actual = NULL;
    }
3767
    goto cleanup;
3768 3769 3770 3771 3772 3773 3774 3775 3776 3777 3778 3779 3780 3781 3782 3783
}

/* networkNotifyActualDevice:
 * @iface:  the domain's NetDef with an "actual" device already filled in.
 *
 * Called to notify the network driver when libvirtd is restarted and
 * finds an already running domain. If appropriate it will force an
 * allocation of the actual->direct.linkdev to get everything back in
 * order.
 *
 * Returns 0 on success, -1 on failure.
 */
int
networkNotifyActualDevice(virDomainNetDefPtr iface)
{
    struct network_driver *driver = driverState;
3784
    enum virDomainNetType actualType = virDomainNetGetActualType(iface);
3785 3786
    virNetworkObjPtr network;
    virNetworkDefPtr netdef;
3787 3788
    virNetworkForwardIfDefPtr dev = NULL;
    int ii, ret = -1;
3789 3790 3791 3792 3793 3794 3795 3796

    if (iface->type != VIR_DOMAIN_NET_TYPE_NETWORK)
       return 0;

    networkDriverLock(driver);
    network = virNetworkFindByName(&driver->networks, iface->data.network.name);
    networkDriverUnlock(driver);
    if (!network) {
3797 3798 3799
        virReportError(VIR_ERR_NO_NETWORK,
                       _("no network with matching name '%s'"),
                       iface->data.network.name);
3800 3801 3802 3803 3804
        goto error;
    }
    netdef = network->def;

    if (!iface->data.network.actual ||
3805 3806
        (actualType != VIR_DOMAIN_NET_TYPE_DIRECT &&
         actualType != VIR_DOMAIN_NET_TYPE_HOSTDEV)) {
3807 3808
        VIR_DEBUG("Nothing to claim from network %s", iface->data.network.name);
        goto success;
3809 3810
    }

3811 3812
    if (netdef->nForwardPfs > 0 && netdef->nForwardIfs == 0 &&
        networkCreateInterfacePool(netdef) < 0) {
3813
        goto error;
3814
    }
3815
    if (netdef->nForwardIfs == 0) {
3816
        virReportError(VIR_ERR_INTERNAL_ERROR,
3817 3818
                       _("network '%s' uses a direct or hostdev mode, "
                         "but has no forward dev and no interface pool"),
3819
                       netdef->name);
3820
        goto error;
3821
    }
3822

3823 3824
    if (actualType == VIR_DOMAIN_NET_TYPE_DIRECT) {
        const char *actualDev;
3825

3826 3827 3828 3829 3830 3831 3832 3833 3834
        actualDev = virDomainNetGetActualDirectDev(iface);
        if (!actualDev) {
            virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
                           _("the interface uses a direct mode, "
                             "but has no source dev"));
            goto error;
        }

        /* find the matching interface and increment its connections */
3835
        for (ii = 0; ii < netdef->nForwardIfs; ii++) {
3836 3837 3838
            if (netdef->forwardIfs[ii].type
                == VIR_NETWORK_FORWARD_HOSTDEV_DEVICE_NETDEV &&
                STREQ(actualDev, netdef->forwardIfs[ii].device.dev)) {
3839 3840 3841 3842 3843 3844
                dev = &netdef->forwardIfs[ii];
                break;
            }
        }
        /* dev points at the physical device we want to use */
        if (!dev) {
3845
            virReportError(VIR_ERR_INTERNAL_ERROR,
3846 3847
                           _("network '%s' doesn't have dev='%s' "
                             "in use by domain"),
3848
                           netdef->name, actualDev);
3849
            goto error;
3850 3851
        }

3852
        /* PASSTHROUGH mode and PRIVATE Mode + 802.1Qbh both require
3853 3854
         * exclusive access to a device, so current connections count
         * must be 0 in those cases.
3855
         */
3856
        if ((dev->connections > 0) &&
3857 3858
            ((netdef->forwardType == VIR_NETWORK_FORWARD_PASSTHROUGH) ||
             ((netdef->forwardType == VIR_NETWORK_FORWARD_PRIVATE) &&
3859 3860
              iface->data.network.actual->virtPortProfile &&
              (iface->data.network.actual->virtPortProfile->virtPortType
3861
               == VIR_NETDEV_VPORT_PROFILE_8021QBH)))) {
3862
            virReportError(VIR_ERR_INTERNAL_ERROR,
3863 3864
                           _("network '%s' claims dev='%s' is already in "
                             "use by a different domain"),
3865
                           netdef->name, actualDev);
3866
            goto error;
3867
        }
3868

3869
        /* we are now assured of success, so mark the allocation */
3870
        dev->connections++;
3871
        VIR_DEBUG("Using physical device %s, connections %d",
3872
                  dev->device.dev, dev->connections);
3873 3874 3875 3876 3877 3878 3879 3880 3881 3882 3883 3884 3885 3886 3887 3888

    }  else /* if (actualType == VIR_DOMAIN_NET_TYPE_HOSTDEV) */ {
        virDomainHostdevDefPtr hostdev;

        hostdev = virDomainNetGetActualHostdev(iface);
        if (!hostdev) {
            virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
                           _("the interface uses a hostdev mode, "
                             "but has no hostdev"));
            goto error;
        }

        /* find the matching interface and increment its connections */
        for (ii = 0; ii < netdef->nForwardIfs; ii++) {
            if (netdef->forwardIfs[ii].type
                == VIR_NETWORK_FORWARD_HOSTDEV_DEVICE_PCI &&
3889 3890
                virDevicePCIAddressEqual(&hostdev->source.subsys.u.pci,
                                         &netdef->forwardIfs[ii].device.pci)) {
3891 3892 3893 3894 3895 3896 3897 3898 3899 3900 3901 3902 3903 3904 3905 3906 3907 3908 3909 3910 3911 3912 3913 3914 3915 3916 3917 3918 3919 3920 3921 3922 3923 3924 3925 3926 3927 3928 3929
                dev = &netdef->forwardIfs[ii];
                break;
            }
        }
        /* dev points at the physical device we want to use */
        if (!dev) {
            virReportError(VIR_ERR_INTERNAL_ERROR,
                           _("network '%s' doesn't have "
                             "PCI device %04x:%02x:%02x.%x in use by domain"),
                           netdef->name,
                           hostdev->source.subsys.u.pci.domain,
                           hostdev->source.subsys.u.pci.bus,
                           hostdev->source.subsys.u.pci.slot,
                           hostdev->source.subsys.u.pci.function);
                goto error;
        }

        /* PASSTHROUGH mode, PRIVATE Mode + 802.1Qbh, and hostdev (PCI
         * passthrough) all require exclusive access to a device, so
         * current connections count must be 0 in those cases.
         */
        if ((dev->connections > 0) &&
            netdef->forwardType == VIR_NETWORK_FORWARD_HOSTDEV) {
            virReportError(VIR_ERR_INTERNAL_ERROR,
                           _("network '%s' claims the PCI device at "
                             "domain=%d bus=%d slot=%d function=%d "
                             "is already in use by a different domain"),
                           netdef->name,
                           dev->device.pci.domain, dev->device.pci.bus,
                           dev->device.pci.slot, dev->device.pci.function);
            goto error;
        }

        /* we are now assured of success, so mark the allocation */
        dev->connections++;
        VIR_DEBUG("Using physical device %04x:%02x:%02x.%x, connections %d",
                  dev->device.pci.domain, dev->device.pci.bus,
                  dev->device.pci.slot, dev->device.pci.function,
                  dev->connections);
3930 3931
    }

3932
success:
3933 3934 3935
    netdef->connections++;
    VIR_DEBUG("Using network %s, %d connections",
              netdef->name, netdef->connections);
3936 3937 3938 3939 3940
    ret = 0;
cleanup:
    if (network)
        virNetworkObjUnlock(network);
    return ret;
3941 3942 3943

error:
    goto cleanup;
3944 3945 3946 3947 3948 3949 3950 3951 3952 3953 3954 3955 3956 3957 3958 3959 3960
}


/* networkReleaseActualDevice:
 * @iface:  a domain's NetDef (interface definition)
 *
 * Given a domain <interface> element that previously had its <actual>
 * element filled in (and possibly a physical device allocated to it),
 * free up the physical device for use by someone else, and free the
 * virDomainActualNetDef.
 *
 * Returns 0 on success, -1 on failure.
 */
int
networkReleaseActualDevice(virDomainNetDefPtr iface)
{
    struct network_driver *driver = driverState;
3961
    enum virDomainNetType actualType = virDomainNetGetActualType(iface);
3962
    virNetworkObjPtr network;
3963
    virNetworkDefPtr netdef;
3964 3965
    virNetworkForwardIfDefPtr dev = NULL;
    int ii, ret = -1;
3966 3967 3968 3969 3970 3971 3972 3973

    if (iface->type != VIR_DOMAIN_NET_TYPE_NETWORK)
       return 0;

    networkDriverLock(driver);
    network = virNetworkFindByName(&driver->networks, iface->data.network.name);
    networkDriverUnlock(driver);
    if (!network) {
3974 3975 3976
        virReportError(VIR_ERR_NO_NETWORK,
                       _("no network with matching name '%s'"),
                       iface->data.network.name);
3977 3978 3979 3980
        goto error;
    }
    netdef = network->def;

3981 3982 3983
    if ((!iface->data.network.actual) ||
        ((actualType != VIR_DOMAIN_NET_TYPE_DIRECT) &&
         (actualType != VIR_DOMAIN_NET_TYPE_HOSTDEV))) {
3984 3985
        VIR_DEBUG("Nothing to release to network %s", iface->data.network.name);
        goto success;
3986 3987 3988
    }

    if (netdef->nForwardIfs == 0) {
3989
        virReportError(VIR_ERR_INTERNAL_ERROR,
3990
                       _("network '%s' uses a direct/hostdev mode, but "
3991 3992
                         "has no forward dev and no interface pool"),
                       netdef->name);
3993
        goto error;
3994 3995 3996 3997 3998 3999 4000 4001 4002 4003 4004 4005
    }

    if (actualType == VIR_DOMAIN_NET_TYPE_DIRECT) {
        const char *actualDev;

        actualDev = virDomainNetGetActualDirectDev(iface);
        if (!actualDev) {
            virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
                           _("the interface uses a direct mode, "
                             "but has no source dev"));
            goto error;
        }
4006 4007

        for (ii = 0; ii < netdef->nForwardIfs; ii++) {
4008 4009 4010
            if (netdef->forwardIfs[ii].type
                == VIR_NETWORK_FORWARD_HOSTDEV_DEVICE_NETDEV &&
                STREQ(actualDev, netdef->forwardIfs[ii].device.dev)) {
4011 4012 4013 4014
                dev = &netdef->forwardIfs[ii];
                break;
            }
        }
4015

4016
        if (!dev) {
4017
            virReportError(VIR_ERR_INTERNAL_ERROR,
4018 4019
                           _("network '%s' doesn't have dev='%s' "
                             "in use by domain"),
4020
                           netdef->name, actualDev);
4021
            goto error;
4022 4023
        }

4024
        dev->connections--;
4025
        VIR_DEBUG("Releasing physical device %s, connections %d",
4026
                  dev->device.dev, dev->connections);
4027 4028 4029 4030 4031 4032 4033 4034 4035 4036 4037 4038 4039 4040

    } else /* if (actualType == VIR_DOMAIN_NET_TYPE_HOSTDEV) */ {
        virDomainHostdevDefPtr hostdev;

        hostdev = virDomainNetGetActualHostdev(iface);
        if (!hostdev) {
            virReportError(VIR_ERR_INTERNAL_ERROR,
                           "%s", _("the interface uses a hostdev mode, but has no hostdev"));
            goto error;
        }

        for (ii = 0; ii < netdef->nForwardIfs; ii++) {
            if (netdef->forwardIfs[ii].type
                == VIR_NETWORK_FORWARD_HOSTDEV_DEVICE_PCI &&
4041 4042
                virDevicePCIAddressEqual(&hostdev->source.subsys.u.pci,
                                          &netdef->forwardIfs[ii].device.pci)) {
4043 4044 4045 4046 4047 4048 4049 4050 4051 4052 4053 4054 4055 4056 4057 4058 4059 4060 4061 4062 4063 4064 4065
                dev = &netdef->forwardIfs[ii];
                break;
            }
        }

        if (!dev) {
            virReportError(VIR_ERR_INTERNAL_ERROR,
                           _("network '%s' doesn't have "
                             "PCI device %04x:%02x:%02x.%x in use by domain"),
                           netdef->name,
                           hostdev->source.subsys.u.pci.domain,
                           hostdev->source.subsys.u.pci.bus,
                           hostdev->source.subsys.u.pci.slot,
                           hostdev->source.subsys.u.pci.function);
                goto error;
        }

        dev->connections--;
        VIR_DEBUG("Releasing physical device %04x:%02x:%02x.%x, connections %d",
                  dev->device.pci.domain, dev->device.pci.bus,
                  dev->device.pci.slot, dev->device.pci.function,
                  dev->connections);
   }
4066

4067
success:
4068 4069 4070
    netdef->connections--;
    VIR_DEBUG("Releasing network %s, %d connections",
              netdef->name, netdef->connections);
4071 4072 4073 4074
    ret = 0;
cleanup:
    if (network)
        virNetworkObjUnlock(network);
4075 4076 4077 4078
    if (iface->type == VIR_DOMAIN_NET_TYPE_NETWORK) {
        virDomainActualNetDefFree(iface->data.network.actual);
        iface->data.network.actual = NULL;
    }
4079
    return ret;
4080 4081 4082

error:
    goto cleanup;
4083
}
4084 4085 4086 4087 4088 4089 4090 4091 4092 4093 4094 4095 4096 4097 4098 4099 4100 4101 4102 4103 4104 4105 4106 4107 4108

/*
 * networkGetNetworkAddress:
 * @netname: the name of a network
 * @netaddr: string representation of IP address for that network.
 *
 * Attempt to return an IP (v4) address associated with the named
 * network. If a libvirt virtual network, that will be provided in the
 * configuration. For host bridge and direct (macvtap) networks, we
 * must do an ioctl to learn the address.
 *
 * Note: This function returns the 1st IPv4 address it finds. It might
 * be useful if it was more flexible, but the current use (getting a
 * listen address for qemu's vnc/spice graphics server) can only use a
 * single address anyway.
 *
 * Returns 0 on success, and puts a string (which must be free'd by
 * the caller) into *netaddr. Returns -1 on failure or -2 if
 * completely unsupported.
 */
int
networkGetNetworkAddress(const char *netname, char **netaddr)
{
    int ret = -1;
    struct network_driver *driver = driverState;
4109
    virNetworkObjPtr network;
4110 4111 4112 4113
    virNetworkDefPtr netdef;
    virNetworkIpDefPtr ipdef;
    virSocketAddr addr;
    virSocketAddrPtr addrptr = NULL;
4114
    char *dev_name = NULL;
4115 4116 4117 4118 4119 4120

    *netaddr = NULL;
    networkDriverLock(driver);
    network = virNetworkFindByName(&driver->networks, netname);
    networkDriverUnlock(driver);
    if (!network) {
4121 4122 4123
        virReportError(VIR_ERR_NO_NETWORK,
                       _("no network with matching name '%s'"),
                       netname);
4124
        goto error;
4125 4126 4127 4128 4129 4130 4131 4132 4133 4134
    }
    netdef = network->def;

    switch (netdef->forwardType) {
    case VIR_NETWORK_FORWARD_NONE:
    case VIR_NETWORK_FORWARD_NAT:
    case VIR_NETWORK_FORWARD_ROUTE:
        /* if there's an ipv4def, get it's address */
        ipdef = virNetworkDefGetIpByIndex(netdef, AF_INET, 0);
        if (!ipdef) {
4135 4136 4137
            virReportError(VIR_ERR_INTERNAL_ERROR,
                           _("network '%s' doesn't have an IPv4 address"),
                           netdef->name);
4138 4139 4140 4141 4142 4143
            break;
        }
        addrptr = &ipdef->address;
        break;

    case VIR_NETWORK_FORWARD_BRIDGE:
4144
        if ((dev_name = netdef->bridge))
4145 4146 4147 4148 4149 4150 4151 4152 4153
            break;
        /*
         * fall through if netdef->bridge wasn't set, since this is
         * also a direct-mode interface.
         */
    case VIR_NETWORK_FORWARD_PRIVATE:
    case VIR_NETWORK_FORWARD_VEPA:
    case VIR_NETWORK_FORWARD_PASSTHROUGH:
        if ((netdef->nForwardIfs > 0) && netdef->forwardIfs)
4154
            dev_name = netdef->forwardIfs[0].device.dev;
4155

4156
        if (!dev_name) {
4157 4158 4159
            virReportError(VIR_ERR_INTERNAL_ERROR,
                           _("network '%s' has no associated interface or bridge"),
                           netdef->name);
4160 4161 4162 4163
        }
        break;
    }

4164
    if (dev_name) {
4165
        if (virNetDevGetIPv4Address(dev_name, &addr) < 0)
4166
            goto error;
4167
        addrptr = &addr;
4168 4169
    }

4170 4171 4172
    if (!(addrptr &&
          (*netaddr = virSocketAddrFormat(addrptr)))) {
        goto error;
4173 4174
    }

4175
    ret = 0;
4176 4177 4178 4179
cleanup:
    if (network)
        virNetworkObjUnlock(network);
    return ret;
4180 4181 4182

error:
    goto cleanup;
4183
}