virnetdevmacvlan.c 36.6 KB
Newer Older
1
/*
2
 * Copyright (C) 2010-2015 Red Hat, Inc.
3
 * Copyright (C) 2010-2012 IBM Corporation
4 5 6 7 8 9 10 11 12 13 14 15
 *
 * This library is free software; you can redistribute it and/or
 * modify it under the terms of the GNU Lesser General Public
 * License as published by the Free Software Foundation; either
 * version 2.1 of the License, or (at your option) any later version.
 *
 * This library is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * Lesser General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public
16
 * License along with this library.  If not, see
O
Osier Yang 已提交
17
 * <http://www.gnu.org/licenses/>.
18 19 20 21 22 23 24 25 26 27 28 29 30
 *
 * Authors:
 *     Stefan Berger <stefanb@us.ibm.com>
 *
 * Notes:
 * netlink: http://lovezutto.googlepages.com/netlink.pdf
 *          iproute2 package
 *
 */

#include <config.h>

#include "virnetdevmacvlan.h"
31
#include "virmacaddr.h"
32
#include "virerror.h"
33
#include "virthread.h"
34
#include "virstring.h"
35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51

#define VIR_FROM_THIS VIR_FROM_NET

VIR_ENUM_IMPL(virNetDevMacVLanMode, VIR_NETDEV_MACVLAN_MODE_LAST,
              "vepa",
              "private",
              "bridge",
              "passthrough")

#if WITH_MACVTAP
# include <stdint.h>
# include <stdio.h>
# include <errno.h>
# include <fcntl.h>
# include <sys/socket.h>
# include <sys/ioctl.h>

52
# include <net/if.h>
53 54 55 56 57 58 59
# include <linux/if_tun.h>

/* Older kernels lacked this enum value.  */
# if !HAVE_DECL_MACVLAN_MODE_PASSTHRU
#  define MACVLAN_MODE_PASSTHRU 8
# endif

60
# include "viralloc.h"
61
# include "virlog.h"
62
# include "viruuid.h"
63
# include "virfile.h"
64
# include "virnetlink.h"
65
# include "virnetdev.h"
66 67
# include "virpidfile.h"

68
VIR_LOG_INIT("util.netdevmacvlan");
69 70 71 72

# define MACVTAP_NAME_PREFIX	"macvtap"
# define MACVTAP_NAME_PATTERN	"macvtap%d"

73 74 75
# define MACVLAN_NAME_PREFIX	"macvlan"
# define MACVLAN_NAME_PATTERN	"macvlan%d"

76
virMutex virNetDevMacVLanCreateMutex = VIR_MUTEX_INITIALIZER;
77

78 79 80 81
/**
 * virNetDevMacVLanCreate:
 *
 * @ifname: The name the interface is supposed to have; optional parameter
82
 * @type: The type of device, i.e., "macvtap", "macvlan"
83 84 85 86 87 88 89 90 91 92 93 94 95 96
 * @macaddress: The MAC address of the device
 * @srcdev: The name of the 'link' device
 * @macvlan_mode: The macvlan mode to use
 * @retry: Pointer to integer that will be '1' upon return if an interface
 *         with the same name already exists and it is worth to try
 *         again with a different name
 *
 * Create a macvtap device with the given properties.
 *
 * Returns 0 on success, -1 on fatal error.
 */
int
virNetDevMacVLanCreate(const char *ifname,
                       const char *type,
97
                       const virMacAddr *macaddress,
98 99 100 101
                       const char *srcdev,
                       uint32_t macvlan_mode,
                       int *retry)
{
102
    int rc = -1;
103
    struct nlmsghdr *resp = NULL;
104 105 106 107 108 109
    struct nlmsgerr *err;
    struct ifinfomsg ifinfo = { .ifi_family = AF_UNSPEC };
    int ifindex;
    unsigned int recvbuflen;
    struct nl_msg *nl_msg;
    struct nlattr *linkinfo, *info_data;
110
    char macstr[VIR_MAC_STRING_BUFLEN];
111

112
    if (virNetDevGetIndex(srcdev, &ifindex) < 0)
113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155
        return -1;

    *retry = 0;

    nl_msg = nlmsg_alloc_simple(RTM_NEWLINK,
                                NLM_F_REQUEST | NLM_F_CREATE | NLM_F_EXCL);
    if (!nl_msg) {
        virReportOOMError();
        return -1;
    }

    if (nlmsg_append(nl_msg,  &ifinfo, sizeof(ifinfo), NLMSG_ALIGNTO) < 0)
        goto buffer_too_small;

    if (nla_put_u32(nl_msg, IFLA_LINK, ifindex) < 0)
        goto buffer_too_small;

    if (nla_put(nl_msg, IFLA_ADDRESS, VIR_MAC_BUFLEN, macaddress) < 0)
        goto buffer_too_small;

    if (ifname &&
        nla_put(nl_msg, IFLA_IFNAME, strlen(ifname)+1, ifname) < 0)
        goto buffer_too_small;

    if (!(linkinfo = nla_nest_start(nl_msg, IFLA_LINKINFO)))
        goto buffer_too_small;

    if (nla_put(nl_msg, IFLA_INFO_KIND, strlen(type), type) < 0)
        goto buffer_too_small;

    if (macvlan_mode > 0) {
        if (!(info_data = nla_nest_start(nl_msg, IFLA_INFO_DATA)))
            goto buffer_too_small;

        if (nla_put(nl_msg, IFLA_MACVLAN_MODE, sizeof(macvlan_mode),
                    &macvlan_mode) < 0)
            goto buffer_too_small;

        nla_nest_end(nl_msg, info_data);
    }

    nla_nest_end(nl_msg, linkinfo);

156
    if (virNetlinkCommand(nl_msg, &resp, &recvbuflen, 0, 0,
157
                          NETLINK_ROUTE, 0) < 0) {
158 159 160
        goto cleanup;
    }

161
    if (recvbuflen < NLMSG_LENGTH(0) || resp == NULL)
162 163 164 165 166 167 168 169 170 171 172 173 174 175 176
        goto malformed_resp;

    switch (resp->nlmsg_type) {
    case NLMSG_ERROR:
        err = (struct nlmsgerr *)NLMSG_DATA(resp);
        if (resp->nlmsg_len < NLMSG_LENGTH(sizeof(*err)))
            goto malformed_resp;

        switch (err->error) {

        case 0:
            break;

        case -EEXIST:
            *retry = 1;
177
            goto cleanup;
178 179 180

        default:
            virReportSystemError(-err->error,
181 182 183
                                 _("error creating %s interface %s@%s (%s)"),
                                 type, ifname, srcdev,
                                 virMacAddrFormat(macaddress, macstr));
184
            goto cleanup;
185 186 187 188 189 190 191 192 193 194
        }
        break;

    case NLMSG_DONE:
        break;

    default:
        goto malformed_resp;
    }

195
    rc = 0;
196
 cleanup:
197
    nlmsg_free(nl_msg);
198
    VIR_FREE(resp);
199 200
    return rc;

201
 malformed_resp:
202
    virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
203
                   _("malformed netlink response message"));
204
    goto cleanup;
205

206
 buffer_too_small:
207
    virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
208
                   _("allocated netlink buffer is too small"));
209
    goto cleanup;
210 211 212 213 214 215 216 217 218 219 220 221 222
}

/**
 * virNetDevMacVLanDelete:
 *
 * @ifname: Name of the interface
 *
 * Tear down an interface with the given name.
 *
 * Returns 0 on success, -1 on fatal error.
 */
int virNetDevMacVLanDelete(const char *ifname)
{
223
    return virNetlinkDelLink(ifname, NULL);
224 225 226
}


227 228 229
/**
 * virNetDevMacVLanTapOpen:
 * @ifname: Name of the macvtap interface
230 231
 * @tapfd: array of file descriptor return value for the new macvtap device
 * @tapfdSize: number of file descriptors in @tapfd
232 233
 * @retries : Number of retries in case udev for example may need to be
 *            waited for to create the tap chardev
234 235 236 237
 *
 * Open the macvtap's tap device, possibly multiple times if @tapfdSize > 1.
 *
 * Returns 0 on success, -1 otherwise.
238
 */
239 240 241 242 243
static int
virNetDevMacVLanTapOpen(const char *ifname,
                        int *tapfd,
                        size_t tapfdSize,
                        int retries)
244
{
245
    int ret = -1;
246
    int ifindex;
247
    char *tapname = NULL;
248
    size_t i = 0;
249

250
    if (virNetDevGetIndex(ifname, &ifindex) < 0)
251 252
        return -1;

253
    if (virAsprintf(&tapname, "/dev/tap%d", ifindex) < 0)
254
        goto cleanup;
255

256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271
    for (i = 0; i < tapfdSize; i++) {
        int fd = -1;

        while (fd < 0) {
            if ((fd = open(tapname, O_RDWR)) >= 0) {
                tapfd[i] = fd;
            } else if (retries-- > 0) {
                /* may need to wait for udev to be done */
                usleep(20000);
            } else {
                /* However, if haven't succeeded, quit. */
                virReportSystemError(errno,
                                     _("cannot open macvtap tap device %s"),
                                     tapname);
                goto cleanup;
            }
272 273 274
        }
    }

275 276
    ret = 0;

277
 cleanup:
278 279 280 281
    if (ret < 0) {
        while (i--)
            VIR_FORCE_CLOSE(tapfd[i]);
    }
282
    VIR_FREE(tapname);
283
    return ret;
284 285 286 287 288
}


/**
 * virNetDevMacVLanTapSetup:
289 290 291
 * @tapfd: array of file descriptors of the macvtap tap
 * @tapfdSize: number of file descriptors in @tapfd
 * @vnet_hdr: whether to enable or disable IFF_VNET_HDR
292 293 294 295 296 297
 * @multiqueue: whether to enable or disable IFF_MULTI_QUEUE
 *
 * Turn on the IFF_VNET_HDR flag if requested and available, but make sure it's
 * off otherwise. Similarly, turn on IFF_MULTI_QUEUE if requested, but if it
 * can't be set, consider it a fatal error (rather than ignoring as with
 * @vnet_hdr).
298 299 300 301 302
 *
 * A fatal error is defined as the VNET_HDR flag being set but it cannot
 * be turned off for some reason. This is reported with -1. Other fatal
 * error is not being able to read the interface flags. In that case the
 * macvtap device should not be used.
303
 *
304
 * Returns 0 on success, -1 in case of fatal error.
305 306
 */
static int
307
virNetDevMacVLanTapSetup(int *tapfd, size_t tapfdSize, bool vnet_hdr, bool multiqueue)
308 309 310 311
{
    unsigned int features;
    struct ifreq ifreq;
    short new_flags = 0;
312
    size_t i;
313

314 315
    for (i = 0; i < tapfdSize; i++) {
        memset(&ifreq, 0, sizeof(ifreq));
316

317
        if (ioctl(tapfd[i], TUNGETIFF, &ifreq) < 0) {
318
            virReportSystemError(errno, "%s",
319
                                 _("cannot get interface flags on macvtap tap"));
320 321
            return -1;
        }
322 323 324

        new_flags = ifreq.ifr_flags;

325
        if (vnet_hdr) {
326 327 328 329 330
            if (ioctl(tapfd[i], TUNGETFEATURES, &features) < 0) {
                virReportSystemError(errno, "%s",
                                     _("cannot get feature flags on macvtap tap"));
                return -1;
            }
331 332 333 334
            if (features & IFF_VNET_HDR)
                new_flags |= IFF_VNET_HDR;
        } else {
            new_flags &= ~IFF_VNET_HDR;
335 336
        }

337 338 339 340 341
        if (multiqueue)
            new_flags |= IFF_MULTI_QUEUE;
        else
            new_flags &= ~IFF_MULTI_QUEUE;

342 343 344
        if (new_flags != ifreq.ifr_flags) {
            ifreq.ifr_flags = new_flags;
            if (ioctl(tapfd[i], TUNSETIFF, &ifreq) < 0) {
345 346 347
                virReportSystemError(errno, "%s",
                                     _("unable to set vnet or multiqueue flags on macvtap"));
                return -1;
348
            }
349 350 351 352 353 354 355 356 357 358 359 360 361 362
        }
    }

    return 0;
}


static const uint32_t modeMap[VIR_NETDEV_MACVLAN_MODE_LAST] = {
    [VIR_NETDEV_MACVLAN_MODE_VEPA] = MACVLAN_MODE_VEPA,
    [VIR_NETDEV_MACVLAN_MODE_PRIVATE] = MACVLAN_MODE_PRIVATE,
    [VIR_NETDEV_MACVLAN_MODE_BRIDGE] = MACVLAN_MODE_BRIDGE,
    [VIR_NETDEV_MACVLAN_MODE_PASSTHRU] = MACVLAN_MODE_PASSTHRU,
};

363 364 365 366
/* Struct to hold the state and configuration of a 802.1qbg port */
struct virNetlinkCallbackData {
    char *cr_ifname;
    virNetDevVPortProfilePtr virtPortProfile;
367
    virMacAddr macaddress;
368
    char *linkdev;
369
    int vf;
S
Stefan Berger 已提交
370
    unsigned char vmuuid[VIR_UUID_BUFLEN];
371
    virNetDevVPortProfileOp vmOp;
372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397
    unsigned int linkState;
};

typedef struct virNetlinkCallbackData *virNetlinkCallbackDataPtr;

# define INSTANCE_STRLEN 36

static int instance2str(const unsigned char *p, char *dst, size_t size)
{
    if (dst && size > INSTANCE_STRLEN) {
        snprintf(dst, size, "%02x%02x%02x%02x-%02x%02x-%02x%02x-"
                 "%02x%02x-%02x%02x%02x%02x%02x%02x",
                 p[0], p[1], p[2], p[3],
                 p[4], p[5], p[6], p[7],
                 p[8], p[9], p[10], p[11], p[12], p[13], p[14], p[15]);
        return 0;
    }
    return -1;
}

# define LLDPAD_PID_FILE  "/var/run/lldpad.pid"
# define VIRIP_PID_FILE   "/var/run/virip.pid"

/**
 * virNetDevMacVLanVPortProfileCallback:
 *
398
 * @hdr: The buffer containing the received netlink header + payload
399 400 401 402 403 404 405 406 407 408 409
 * @length: The length of the received netlink message.
 * @peer: The netling sockaddr containing the peer information
 * @handled: Contains information if the message has been replied to yet
 * @opaque: Contains vital information regarding the associated vm an interface
 *
 * This function is called when a netlink message is received. The function
 * reads the message and responds if it is pertinent to the running VMs
 * network interface.
 */

static void
410 411
virNetDevMacVLanVPortProfileCallback(struct nlmsghdr *hdr,
                                     unsigned int length,
412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437
                                     struct sockaddr_nl *peer,
                                     bool *handled,
                                     void *opaque)
{
   struct nla_policy ifla_vf_policy[IFLA_VF_MAX + 1] = {
       [IFLA_VF_MAC] = {.minlen = sizeof(struct ifla_vf_mac),
                        .maxlen = sizeof(struct ifla_vf_mac)},
       [IFLA_VF_VLAN] = {.minlen = sizeof(struct ifla_vf_vlan),
                         .maxlen = sizeof(struct ifla_vf_vlan)},
    };

    struct nla_policy ifla_port_policy[IFLA_PORT_MAX + 1] = {
        [IFLA_PORT_RESPONSE] = {.type = NLA_U16},
    };

    struct nlattr *tb[IFLA_MAX + 1], *tb3[IFLA_PORT_MAX + 1],
        *tb_vfinfo[IFLA_VF_MAX + 1], *tb_vfinfo_list;

    struct ifinfomsg ifinfo;
    void *data;
    int rem;
    char *ifname;
    bool indicate = false;
    virNetlinkCallbackDataPtr calld = opaque;
    pid_t lldpad_pid = 0;
    pid_t virip_pid = 0;
438
    char macaddr[VIR_MAC_STRING_BUFLEN];
439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470

    data = nlmsg_data(hdr);

    /* Quickly decide if we want this or not */

    if (virPidFileReadPath(LLDPAD_PID_FILE, &lldpad_pid) < 0)
        return;

    ignore_value(virPidFileReadPath(VIRIP_PID_FILE, &virip_pid));

    if (hdr->nlmsg_pid != lldpad_pid && hdr->nlmsg_pid != virip_pid)
        return; /* we only care for lldpad and virip messages */
    if (hdr->nlmsg_type != RTM_SETLINK)
        return; /* we only care for RTM_SETLINK */
    if (*handled)
        return; /* if it has been handled - dont handle again */

    /* DEBUG start */
    VIR_INFO("netlink message nl_sockaddr: %p len: %d", peer, length);
    VIR_DEBUG("nlmsg_type  = 0x%02x", hdr->nlmsg_type);
    VIR_DEBUG("nlmsg_len   = 0x%04x", hdr->nlmsg_len);
    VIR_DEBUG("nlmsg_pid   = %d", hdr->nlmsg_pid);
    VIR_DEBUG("nlmsg_seq   = 0x%08x", hdr->nlmsg_seq);
    VIR_DEBUG("nlmsg_flags = 0x%04x", hdr->nlmsg_flags);

    VIR_DEBUG("lldpad pid  = %d", lldpad_pid);

    switch (hdr->nlmsg_type) {
    case RTM_NEWLINK:
    case RTM_DELLINK:
    case RTM_SETLINK:
    case RTM_GETLINK:
J
Jiri Denemark 已提交
471 472
        VIR_DEBUG(" IFINFOMSG");
        VIR_DEBUG("        ifi_family = 0x%02x",
473
            ((struct ifinfomsg *)data)->ifi_family);
J
Jiri Denemark 已提交
474
        VIR_DEBUG("        ifi_type   = 0x%x",
475
            ((struct ifinfomsg *)data)->ifi_type);
J
Jiri Denemark 已提交
476
        VIR_DEBUG("        ifi_index  = %i",
477
            ((struct ifinfomsg *)data)->ifi_index);
J
Jiri Denemark 已提交
478
        VIR_DEBUG("        ifi_flags  = 0x%04x",
479
            ((struct ifinfomsg *)data)->ifi_flags);
J
Jiri Denemark 已提交
480
        VIR_DEBUG("        ifi_change = 0x%04x",
481 482 483 484 485
            ((struct ifinfomsg *)data)->ifi_change);
    }
    /* DEBUG end */

    /* Parse netlink message assume a setlink with vfports */
486
    memcpy(&ifinfo, NLMSG_DATA(hdr), sizeof(ifinfo));
487 488 489
    VIR_DEBUG("family:%#x type:%#x index:%d flags:%#x change:%#x",
        ifinfo.ifi_family, ifinfo.ifi_type, ifinfo.ifi_index,
        ifinfo.ifi_flags, ifinfo.ifi_change);
490
    if (nlmsg_parse(hdr, sizeof(ifinfo),
491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519
        (struct nlattr **)&tb, IFLA_MAX, NULL)) {
        VIR_DEBUG("error parsing request...");
        return;
    }

    if (tb[IFLA_VFINFO_LIST]) {
        VIR_DEBUG("FOUND IFLA_VFINFO_LIST!");

        nla_for_each_nested(tb_vfinfo_list, tb[IFLA_VFINFO_LIST], rem) {
            if (nla_type(tb_vfinfo_list) != IFLA_VF_INFO) {
                VIR_DEBUG("nested parsing of"
                    "IFLA_VFINFO_LIST failed.");
                return;
            }
            if (nla_parse_nested(tb_vfinfo, IFLA_VF_MAX,
                tb_vfinfo_list, ifla_vf_policy)) {
                VIR_DEBUG("nested parsing of "
                    "IFLA_VF_INFO failed.");
                return;
            }
        }

        if (tb_vfinfo[IFLA_VF_MAC]) {
            struct ifla_vf_mac *mac = RTA_DATA(tb_vfinfo[IFLA_VF_MAC]);
            unsigned char *m = mac->mac;

            VIR_DEBUG("IFLA_VF_MAC = %2x:%2x:%2x:%2x:%2x:%2x",
                      m[0], m[1], m[2], m[3], m[4], m[5]);

E
Eric Blake 已提交
520
            if (virMacAddrCmpRaw(&calld->macaddress, mac->mac)) {
521
                /* Repeat the same check for a broadcast mac */
522
                size_t i;
523

524 525
                for (i = 0; i < VIR_MAC_BUFLEN; i++) {
                    if (calld->macaddress.addr[i] != 0xff) {
526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541
                        VIR_DEBUG("MAC address match failed (wasn't broadcast)");
                        return;
                    }
                }
            }
        }

        if (tb_vfinfo[IFLA_VF_VLAN]) {
            struct ifla_vf_vlan *vlan = RTA_DATA(tb_vfinfo[IFLA_VF_VLAN]);

            VIR_DEBUG("IFLA_VF_VLAN = %d", vlan->vlan);
        }
    }

    if (tb[IFLA_IFNAME]) {
        ifname = (char *)RTA_DATA(tb[IFLA_IFNAME]);
J
Jiri Denemark 已提交
542
        VIR_DEBUG("IFLA_IFNAME = %s", ifname);
543 544 545 546
    }

    if (tb[IFLA_OPERSTATE]) {
        rem = *(unsigned short *)RTA_DATA(tb[IFLA_OPERSTATE]);
J
Jiri Denemark 已提交
547
        VIR_DEBUG("IFLA_OPERSTATE = %d", rem);
548 549 550 551 552
    }

    if (tb[IFLA_VF_PORTS]) {
        struct nlattr *tb_vf_ports;

J
Jiri Denemark 已提交
553
        VIR_DEBUG("found IFLA_VF_PORTS");
554 555
        nla_for_each_nested(tb_vf_ports, tb[IFLA_VF_PORTS], rem) {

J
Jiri Denemark 已提交
556
            VIR_DEBUG("iterating");
557
            if (nla_type(tb_vf_ports) != IFLA_VF_PORT) {
J
Jiri Denemark 已提交
558
                VIR_DEBUG("not a IFLA_VF_PORT. skipping");
559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597
                continue;
            }
            if (nla_parse_nested(tb3, IFLA_PORT_MAX, tb_vf_ports,
                ifla_port_policy)) {
                VIR_DEBUG("nested parsing on level 2"
                          " failed.");
            }
            if (tb3[IFLA_PORT_VF]) {
                VIR_DEBUG("IFLA_PORT_VF = %d",
                          *(uint32_t *) (RTA_DATA(tb3[IFLA_PORT_VF])));
            }
            if (tb3[IFLA_PORT_PROFILE]) {
                VIR_DEBUG("IFLA_PORT_PROFILE = %s",
                          (char *) RTA_DATA(tb3[IFLA_PORT_PROFILE]));
            }

            if (tb3[IFLA_PORT_VSI_TYPE]) {
                struct ifla_port_vsi *pvsi;
                int tid = 0;

                pvsi = (struct ifla_port_vsi *)
                    RTA_DATA(tb3[IFLA_PORT_VSI_TYPE]);
                tid = ((pvsi->vsi_type_id[2] << 16) |
                       (pvsi->vsi_type_id[1] << 8) |
                       pvsi->vsi_type_id[0]);

                VIR_DEBUG("mgr_id: %d", pvsi->vsi_mgr_id);
                VIR_DEBUG("type_id: %d", tid);
                VIR_DEBUG("type_version: %d",
                          pvsi->vsi_type_version);
            }

            if (tb3[IFLA_PORT_INSTANCE_UUID]) {
                char instance[INSTANCE_STRLEN + 2];
                unsigned char *uuid;

                uuid = (unsigned char *)
                    RTA_DATA(tb3[IFLA_PORT_INSTANCE_UUID]);
                instance2str(uuid, instance, sizeof(instance));
J
Jiri Denemark 已提交
598
                VIR_DEBUG("IFLA_PORT_INSTANCE_UUID = %s",
599 600 601 602 603 604 605 606 607 608 609 610 611 612
                          instance);
            }

            if (tb3[IFLA_PORT_REQUEST]) {
                uint8_t req = *(uint8_t *) RTA_DATA(tb3[IFLA_PORT_REQUEST]);
                VIR_DEBUG("IFLA_PORT_REQUEST = %d", req);

                if (req == PORT_REQUEST_DISASSOCIATE) {
                    VIR_DEBUG("Set dissaccociated.");
                    indicate = true;
                }
            }

            if (tb3[IFLA_PORT_RESPONSE]) {
J
Jiri Denemark 已提交
613
                VIR_DEBUG("IFLA_PORT_RESPONSE = %d", *(uint16_t *)
614 615 616 617 618
                    RTA_DATA(tb3[IFLA_PORT_RESPONSE]));
            }
        }
    }

619
    if (!indicate)
620 621 622 623 624
        return;

    VIR_INFO("Re-send 802.1qbg associate request:");
    VIR_INFO("  if: %s", calld->cr_ifname);
    VIR_INFO("  lf: %s", calld->linkdev);
625
    VIR_INFO(" mac: %s", virMacAddrFormat(&calld->macaddress, macaddr));
626 627
    ignore_value(virNetDevVPortProfileAssociate(calld->cr_ifname,
                                                calld->virtPortProfile,
628
                                                &calld->macaddress,
629
                                                calld->linkdev,
630
                                                calld->vf,
631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670
                                                calld->vmuuid,
                                                calld->vmOp, true));
    *handled = true;
    return;
}

/**
 * virNetlinkCallbackDataFree
 *
 * @calld: pointer to a virNetlinkCallbackData object to free
 *
 * This function frees all the data associated with a virNetlinkCallbackData object
 * as well as the object itself. If called with NULL, it does nothing.
 *
 * Returns nothing.
 */
static void
virNetlinkCallbackDataFree(virNetlinkCallbackDataPtr calld)
{
    if (calld) {
        VIR_FREE(calld->cr_ifname);
        VIR_FREE(calld->virtPortProfile);
        VIR_FREE(calld->linkdev);
    }
    VIR_FREE(calld);
}

/**
 * virNetDevMacVLanVPortProfileDestroyCallback:
 *
 * @watch: watch whose handle to remove
 * @macaddr: macaddr whose handle to remove
 * @opaque: Contains vital information regarding the associated vm
 *
 * This function is called when a netlink message handler is terminated.
 * The function frees locally allocated data referenced in the opaque
 * data, and the opaque object itself.
 */
static void
virNetDevMacVLanVPortProfileDestroyCallback(int watch ATTRIBUTE_UNUSED,
671
                                            const virMacAddr *macaddr ATTRIBUTE_UNUSED,
672 673 674 675 676
                                            void *opaque)
{
    virNetlinkCallbackDataFree((virNetlinkCallbackDataPtr)opaque);
}

677
int
678
virNetDevMacVLanVPortProfileRegisterCallback(const char *ifname,
679
                                             const virMacAddr *macaddress,
680 681 682
                                             const char *linkdev,
                                             const unsigned char *vmuuid,
                                             virNetDevVPortProfilePtr virtPortProfile,
683
                                             virNetDevVPortProfileOp vmOp)
684 685 686
{
    virNetlinkCallbackDataPtr calld = NULL;

687
    if (virtPortProfile && virNetlinkEventServiceIsRunning(NETLINK_ROUTE)) {
688
        if (VIR_ALLOC(calld) < 0)
689
            goto error;
690 691
        if (VIR_STRDUP(calld->cr_ifname, ifname) < 0)
            goto error;
692
        if (VIR_ALLOC(calld->virtPortProfile) < 0)
693
            goto error;
694
        memcpy(calld->virtPortProfile, virtPortProfile, sizeof(*virtPortProfile));
695
        virMacAddrSet(&calld->macaddress, macaddress);
696 697
        if (VIR_STRDUP(calld->linkdev, linkdev) < 0)
            goto error;
S
Stefan Berger 已提交
698
        memcpy(calld->vmuuid, vmuuid, sizeof(calld->vmuuid));
699 700 701 702 703

        calld->vmOp = vmOp;

        if (virNetlinkEventAddClient(virNetDevMacVLanVPortProfileCallback,
                                     virNetDevMacVLanVPortProfileDestroyCallback,
704
                                     calld, macaddress, NETLINK_ROUTE) < 0)
705 706 707 708 709
            goto error;
    }

    return 0;

710
 error:
711 712 713 714
    virNetlinkCallbackDataFree(calld);
    return -1;
}

715

716
/**
717
 * virNetDevMacVLanCreateWithVPortProfile:
718 719 720 721 722 723 724 725 726 727 728 729
 * Create an instance of a macvtap device and open its tap character
 * device.
 * @tgifname: Interface name that the macvtap is supposed to have. May
 *    be NULL if this function is supposed to choose a name
 * @macaddress: The MAC address for the macvtap device
 * @linkdev: The interface name of the NIC to connect to the external bridge
 * @mode: int describing the mode for 'bridge', 'vepa', 'private' or 'passthru'.
 * @vmuuid: The UUID of the VM the macvtap belongs to
 * @virtPortProfile: pointer to object holding the virtual port profile data
 * @res_ifname: Pointer to a string pointer where the actual name of the
 *     interface will be stored into if everything succeeded. It is up
 *     to the caller to free the string.
730 731
 * @tapfd: array of file descriptor return value for the new tap device
 * @tapfdSize: number of file descriptors in @tapfd
732
 * @flags: OR of virNetDevMacVLanCreateFlags.
733
 *
734 735 736 737 738
 * Creates a macvlan device. Optionally, if flags &
 * VIR_NETDEV_MACVLAN_CREATE_WITH_TAP is set, @tapfd is populated with FDs of
 * tap devices up to @tapfdSize.
 *
 * Return 0 on success, -1 on error.
739
 */
740
int virNetDevMacVLanCreateWithVPortProfile(const char *tgifname,
741
                                           const virMacAddr *macaddress,
742
                                           const char *linkdev,
743
                                           virNetDevMacVLanMode mode,
744 745 746
                                           const unsigned char *vmuuid,
                                           virNetDevVPortProfilePtr virtPortProfile,
                                           char **res_ifname,
747
                                           virNetDevVPortProfileOp vmOp,
748
                                           char *stateDir,
749 750
                                           int *tapfd,
                                           size_t tapfdSize,
751
                                           unsigned int flags)
752
{
753 754 755 756 757 758
    const char *type = (flags & VIR_NETDEV_MACVLAN_CREATE_WITH_TAP) ?
        "macvtap" : "macvlan";
    const char *prefix = (flags & VIR_NETDEV_MACVLAN_CREATE_WITH_TAP) ?
        MACVTAP_NAME_PREFIX : MACVLAN_NAME_PREFIX;
    const char *pattern = (flags & VIR_NETDEV_MACVLAN_CREATE_WITH_TAP) ?
        MACVTAP_NAME_PATTERN : MACVLAN_NAME_PATTERN;
759 760 761 762
    int c, rc;
    char ifname[IFNAMSIZ];
    int retries, do_retry = 0;
    uint32_t macvtapMode;
763
    const char *cr_ifname = NULL;
764
    int ret;
765
    int vf = -1;
766
    bool vnet_hdr = flags & VIR_NETDEV_MACVLAN_VNET_HDR;
767 768 769 770 771 772 773 774 775 776 777 778 779 780

    macvtapMode = modeMap[mode];

    *res_ifname = NULL;

    VIR_DEBUG("%s: VM OPERATION: %s", __FUNCTION__, virNetDevVPortProfileOpTypeToString(vmOp));

    /** Note: When using PASSTHROUGH mode with MACVTAP devices the link
     * device's MAC address must be set to the VMs MAC address. In
     * order to not confuse the first switch or bridge in line this MAC
     * address must be reset when the VM is shut down.
     * This is especially important when using SRIOV capable cards that
     * emulate their switch in firmware.
     */
781

782
    if (mode == VIR_NETDEV_MACVLAN_MODE_PASSTHRU) {
783 784 785 786 787 788 789 790 791 792 793 794 795 796
        if (virtPortProfile &&
            virtPortProfile->virtPortType == VIR_NETDEV_VPORT_PROFILE_8021QBH) {
            /* The Cisco enic driver (the only card that uses
             * 802.1Qbh) doesn't support IFLA_VFINFO_LIST, which is
             * required for virNetDevReplaceNetConfig(), so we must
             * use this function (which uses ioctl(SIOCGIFHWADDR)
             * instead or virNetDevReplaceNetConfig()
             */
            if (virNetDevReplaceMacAddress(linkdev, macaddress, stateDir) < 0)
                return -1;
        } else {
            if (virNetDevReplaceNetConfig(linkdev, -1, macaddress, -1, stateDir) < 0)
                return -1;
        }
797 798 799
    }

    if (tgifname) {
800 801 802 803
        if ((ret = virNetDevExists(tgifname)) < 0)
            return -1;

        if (ret) {
804
            if (STRPREFIX(tgifname, prefix))
805
                goto create_name;
806 807
            virReportSystemError(EEXIST,
                                 _("Unable to create macvlan device %s"), tgifname);
808 809 810
            return -1;
        }
        cr_ifname = tgifname;
811 812
        rc = virNetDevMacVLanCreate(tgifname, type, macaddress, linkdev,
                                    macvtapMode, &do_retry);
813 814 815
        if (rc < 0)
            return -1;
    } else {
816
 create_name:
817
        retries = 5;
818
        virMutexLock(&virNetDevMacVLanCreateMutex);
819
        for (c = 0; c < 8192; c++) {
820
            snprintf(ifname, sizeof(ifname), pattern, c);
821 822
            if ((ret = virNetDevExists(ifname)) < 0) {
                virMutexUnlock(&virNetDevMacVLanCreateMutex);
823
                return -1;
824
            }
825
            if (!ret) {
826 827
                rc = virNetDevMacVLanCreate(ifname, type, macaddress, linkdev,
                                            macvtapMode, &do_retry);
828 829
                if (rc == 0) {
                    cr_ifname = ifname;
830
                    break;
831
                }
832 833 834

                if (do_retry && --retries)
                    continue;
835
                break;
836 837
            }
        }
838 839

        virMutexUnlock(&virNetDevMacVLanCreateMutex);
840
        if (!cr_ifname)
841
            return -1;
842 843 844 845 846 847
    }

    if (virNetDevVPortProfileAssociate(cr_ifname,
                                       virtPortProfile,
                                       macaddress,
                                       linkdev,
848
                                       vf,
849
                                       vmuuid, vmOp, false) < 0) {
850 851 852 853
        rc = -1;
        goto link_del_exit;
    }

854 855 856 857 858
    if (flags & VIR_NETDEV_MACVLAN_CREATE_IFUP) {
        if (virNetDevSetOnline(cr_ifname, true) < 0) {
            rc = -1;
            goto disassociate_exit;
        }
859 860
    }

861
    if (flags & VIR_NETDEV_MACVLAN_CREATE_WITH_TAP) {
862
        if (virNetDevMacVLanTapOpen(cr_ifname, tapfd, tapfdSize, 10) < 0)
863 864
            goto disassociate_exit;

865
        if (virNetDevMacVLanTapSetup(tapfd, tapfdSize, vnet_hdr, tapfdSize > 0) < 0) {
866 867 868
            VIR_FORCE_CLOSE(rc); /* sets rc to -1 */
            goto disassociate_exit;
        }
869
        if (VIR_STRDUP(*res_ifname, cr_ifname) < 0) {
870 871 872 873
            VIR_FORCE_CLOSE(rc); /* sets rc to -1 */
            goto disassociate_exit;
        }
    } else {
874
        if (VIR_STRDUP(*res_ifname, cr_ifname) < 0)
875 876 877
            goto disassociate_exit;
        rc = 0;
    }
878

879 880 881 882 883 884 885 886 887
    if (vmOp == VIR_NETDEV_VPORT_PROFILE_OP_CREATE ||
        vmOp == VIR_NETDEV_VPORT_PROFILE_OP_RESTORE) {
        /* Only directly register upon a create or restore (restarting
         * a saved image) - migration and libvirtd restart are handled
         * elsewhere.
         */
        if (virNetDevMacVLanVPortProfileRegisterCallback(cr_ifname, macaddress,
                                                         linkdev, vmuuid,
                                                         virtPortProfile,
888
                                                         vmOp) < 0)
889
        goto disassociate_exit;
890
    }
891 892 893

    return rc;

894
 disassociate_exit:
895 896 897 898
    ignore_value(virNetDevVPortProfileDisassociate(cr_ifname,
                                                   virtPortProfile,
                                                   macaddress,
                                                   linkdev,
899
                                                   vf,
900
                                                   vmOp));
901 902
    while (tapfdSize--)
        VIR_FORCE_CLOSE(tapfd[tapfdSize]);
903

904
 link_del_exit:
905
    ignore_value(virNetDevMacVLanDelete(cr_ifname));
906 907 908 909 910 911

    return rc;
}


/**
912
 * virNetDevMacVLanDeleteWithVPortProfile:
913 914 915 916 917 918 919 920
 * @ifname : The name of the macvtap interface
 * @linkdev: The interface name of the NIC to connect to the external bridge
 * @virtPortProfile: pointer to object holding the virtual port profile data
 *
 * Delete an interface given its name. Disassociate
 * it with the switch if port profile parameters
 * were provided.
 */
921
int virNetDevMacVLanDeleteWithVPortProfile(const char *ifname,
922
                                           const virMacAddr *macaddr,
923 924 925 926
                                           const char *linkdev,
                                           int mode,
                                           virNetDevVPortProfilePtr virtPortProfile,
                                           char *stateDir)
927 928
{
    int ret = 0;
929 930
    int vf = -1;

931 932 933 934 935 936 937
    if (mode == VIR_NETDEV_MACVLAN_MODE_PASSTHRU) {
        if (virtPortProfile &&
             virtPortProfile->virtPortType == VIR_NETDEV_VPORT_PROFILE_8021QBH)
            ignore_value(virNetDevRestoreMacAddress(linkdev, stateDir));
        else
            ignore_value(virNetDevRestoreNetConfig(linkdev, vf, stateDir));
    }
938 939 940 941 942 943

    if (ifname) {
        if (virNetDevVPortProfileDisassociate(ifname,
                                              virtPortProfile,
                                              macaddr,
                                              linkdev,
944
                                              vf,
945 946
                                              VIR_NETDEV_VPORT_PROFILE_OP_DESTROY) < 0)
            ret = -1;
947
        if (virNetDevMacVLanDelete(ifname) < 0)
948 949
            ret = -1;
    }
950

951
    virNetlinkEventRemoveClient(0, macaddr, NETLINK_ROUTE);
952

953 954 955
    return ret;
}

956 957 958 959 960 961 962 963 964 965 966 967 968 969 970
/**
 * virNetDevMacVLanRestartWithVPortProfile:
 * Register a port profile callback handler for a VM that
 * is already running
 * .
 * @cr_ifname: Interface name that the macvtap has.
 * @macaddress: The MAC address for the macvtap device
 * @linkdev: The interface name of the NIC to connect to the external bridge
 * @vmuuid: The UUID of the VM the macvtap belongs to
 * @virtPortProfile: pointer to object holding the virtual port profile data
 * @vmOp: Operation to use during setup of the association
 *
 * Returns 0; returns -1 on error.
 */
int virNetDevMacVLanRestartWithVPortProfile(const char *cr_ifname,
971
                                           const virMacAddr *macaddress,
972 973 974
                                           const char *linkdev,
                                           const unsigned char *vmuuid,
                                           virNetDevVPortProfilePtr virtPortProfile,
975
                                           virNetDevVPortProfileOp vmOp)
976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992
{
    int rc = 0;

    rc = virNetDevMacVLanVPortProfileRegisterCallback(cr_ifname, macaddress,
                                                      linkdev, vmuuid,
                                                      virtPortProfile, vmOp);
    if (rc < 0)
        goto error;

    ignore_value(virNetDevVPortProfileAssociate(cr_ifname,
                                                virtPortProfile,
                                                macaddress,
                                                linkdev,
                                                -1,
                                                vmuuid,
                                                vmOp, true));

993
 error:
994 995 996 997
    return rc;

}

998
#else /* ! WITH_MACVTAP */
999 1000
int virNetDevMacVLanCreate(const char *ifname ATTRIBUTE_UNUSED,
                           const char *type ATTRIBUTE_UNUSED,
1001
                           const virMacAddr *macaddress ATTRIBUTE_UNUSED,
1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017
                           const char *srcdev ATTRIBUTE_UNUSED,
                           uint32_t macvlan_mode ATTRIBUTE_UNUSED,
                           int *retry ATTRIBUTE_UNUSED)
{
    virReportSystemError(ENOSYS, "%s",
                         _("Cannot create macvlan devices on this platform"));
    return -1;
}

int virNetDevMacVLanDelete(const char *ifname ATTRIBUTE_UNUSED)
{
    virReportSystemError(ENOSYS, "%s",
                         _("Cannot create macvlan devices on this platform"));
    return -1;
}

1018
int virNetDevMacVLanCreateWithVPortProfile(const char *ifname ATTRIBUTE_UNUSED,
1019
                                           const virMacAddr *macaddress ATTRIBUTE_UNUSED,
1020
                                           const char *linkdev ATTRIBUTE_UNUSED,
1021
                                           virNetDevMacVLanMode mode ATTRIBUTE_UNUSED,
1022 1023 1024
                                           const unsigned char *vmuuid ATTRIBUTE_UNUSED,
                                           virNetDevVPortProfilePtr virtPortProfile ATTRIBUTE_UNUSED,
                                           char **res_ifname ATTRIBUTE_UNUSED,
1025
                                           virNetDevVPortProfileOp vmop ATTRIBUTE_UNUSED,
1026
                                           char *stateDir ATTRIBUTE_UNUSED,
1027 1028
                                           int *tapfd ATTRIBUTE_UNUSED,
                                           size_t tapfdSize ATTRIBUTE_UNUSED,
J
Ján Tomko 已提交
1029
                                           unsigned int unused_flags ATTRIBUTE_UNUSED)
1030 1031 1032 1033 1034 1035
{
    virReportSystemError(ENOSYS, "%s",
                         _("Cannot create macvlan devices on this platform"));
    return -1;
}

1036
int virNetDevMacVLanDeleteWithVPortProfile(const char *ifname ATTRIBUTE_UNUSED,
1037
                                           const virMacAddr *macaddress ATTRIBUTE_UNUSED,
1038 1039 1040 1041
                                           const char *linkdev ATTRIBUTE_UNUSED,
                                           int mode ATTRIBUTE_UNUSED,
                                           virNetDevVPortProfilePtr virtPortProfile ATTRIBUTE_UNUSED,
                                           char *stateDir ATTRIBUTE_UNUSED)
1042 1043 1044 1045 1046
{
    virReportSystemError(ENOSYS, "%s",
                         _("Cannot create macvlan devices on this platform"));
    return -1;
}
1047 1048

int virNetDevMacVLanRestartWithVPortProfile(const char *cr_ifname ATTRIBUTE_UNUSED,
1049
                                           const virMacAddr *macaddress ATTRIBUTE_UNUSED,
1050 1051 1052
                                           const char *linkdev ATTRIBUTE_UNUSED,
                                           const unsigned char *vmuuid ATTRIBUTE_UNUSED,
                                           virNetDevVPortProfilePtr virtPortProfile ATTRIBUTE_UNUSED,
1053
                                           virNetDevVPortProfileOp vmOp ATTRIBUTE_UNUSED)
1054 1055 1056 1057 1058
{
    virReportSystemError(ENOSYS, "%s",
                         _("Cannot create macvlan devices on this platform"));
    return -1;
}
1059 1060

int virNetDevMacVLanVPortProfileRegisterCallback(const char *ifname ATTRIBUTE_UNUSED,
1061
                                             const virMacAddr *macaddress ATTRIBUTE_UNUSED,
1062 1063 1064
                                             const char *linkdev ATTRIBUTE_UNUSED,
                                             const unsigned char *vmuuid ATTRIBUTE_UNUSED,
                                             virNetDevVPortProfilePtr virtPortProfile ATTRIBUTE_UNUSED,
1065
                                             virNetDevVPortProfileOp vmOp ATTRIBUTE_UNUSED)
1066 1067 1068 1069 1070
{
    virReportSystemError(ENOSYS, "%s",
                         _("Cannot create macvlan devices on this platform"));
    return -1;
}
1071
#endif /* ! WITH_MACVTAP */