virnetdevmacvlan.c 35.5 KB
Newer Older
1
/*
2
 * Copyright (C) 2010-2015 Red Hat, Inc.
3
 * Copyright (C) 2010-2012 IBM Corporation
4 5 6 7 8 9 10 11 12 13 14 15
 *
 * This library is free software; you can redistribute it and/or
 * modify it under the terms of the GNU Lesser General Public
 * License as published by the Free Software Foundation; either
 * version 2.1 of the License, or (at your option) any later version.
 *
 * This library is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * Lesser General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public
16
 * License along with this library.  If not, see
O
Osier Yang 已提交
17
 * <http://www.gnu.org/licenses/>.
18 19 20 21 22 23 24 25 26 27 28 29 30
 *
 * Authors:
 *     Stefan Berger <stefanb@us.ibm.com>
 *
 * Notes:
 * netlink: http://lovezutto.googlepages.com/netlink.pdf
 *          iproute2 package
 *
 */

#include <config.h>

#include "virnetdevmacvlan.h"
31
#include "virmacaddr.h"
32
#include "virerror.h"
33
#include "virthread.h"
34
#include "virstring.h"
35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51

#define VIR_FROM_THIS VIR_FROM_NET

VIR_ENUM_IMPL(virNetDevMacVLanMode, VIR_NETDEV_MACVLAN_MODE_LAST,
              "vepa",
              "private",
              "bridge",
              "passthrough")

#if WITH_MACVTAP
# include <stdint.h>
# include <stdio.h>
# include <errno.h>
# include <fcntl.h>
# include <sys/socket.h>
# include <sys/ioctl.h>

52
# include <net/if.h>
53 54 55 56 57 58 59
# include <linux/if_tun.h>

/* Older kernels lacked this enum value.  */
# if !HAVE_DECL_MACVLAN_MODE_PASSTHRU
#  define MACVLAN_MODE_PASSTHRU 8
# endif

60
# include "viralloc.h"
61
# include "virlog.h"
62
# include "viruuid.h"
63
# include "virfile.h"
64
# include "virnetlink.h"
65
# include "virnetdev.h"
66 67
# include "virpidfile.h"

68
VIR_LOG_INIT("util.netdevmacvlan");
69 70 71 72

# define MACVTAP_NAME_PREFIX	"macvtap"
# define MACVTAP_NAME_PATTERN	"macvtap%d"

73 74 75
# define MACVLAN_NAME_PREFIX	"macvlan"
# define MACVLAN_NAME_PATTERN	"macvlan%d"

76
virMutex virNetDevMacVLanCreateMutex = VIR_MUTEX_INITIALIZER;
77

78 79 80 81
/**
 * virNetDevMacVLanCreate:
 *
 * @ifname: The name the interface is supposed to have; optional parameter
82
 * @type: The type of device, i.e., "macvtap", "macvlan"
83 84 85 86 87 88 89 90 91 92 93 94 95 96
 * @macaddress: The MAC address of the device
 * @srcdev: The name of the 'link' device
 * @macvlan_mode: The macvlan mode to use
 * @retry: Pointer to integer that will be '1' upon return if an interface
 *         with the same name already exists and it is worth to try
 *         again with a different name
 *
 * Create a macvtap device with the given properties.
 *
 * Returns 0 on success, -1 on fatal error.
 */
int
virNetDevMacVLanCreate(const char *ifname,
                       const char *type,
97
                       const virMacAddr *macaddress,
98 99 100 101
                       const char *srcdev,
                       uint32_t macvlan_mode,
                       int *retry)
{
102
    int rc = -1;
103
    struct nlmsghdr *resp = NULL;
104 105 106 107 108 109
    struct nlmsgerr *err;
    struct ifinfomsg ifinfo = { .ifi_family = AF_UNSPEC };
    int ifindex;
    unsigned int recvbuflen;
    struct nl_msg *nl_msg;
    struct nlattr *linkinfo, *info_data;
110
    char macstr[VIR_MAC_STRING_BUFLEN];
111

112
    if (virNetDevGetIndex(srcdev, &ifindex) < 0)
113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155
        return -1;

    *retry = 0;

    nl_msg = nlmsg_alloc_simple(RTM_NEWLINK,
                                NLM_F_REQUEST | NLM_F_CREATE | NLM_F_EXCL);
    if (!nl_msg) {
        virReportOOMError();
        return -1;
    }

    if (nlmsg_append(nl_msg,  &ifinfo, sizeof(ifinfo), NLMSG_ALIGNTO) < 0)
        goto buffer_too_small;

    if (nla_put_u32(nl_msg, IFLA_LINK, ifindex) < 0)
        goto buffer_too_small;

    if (nla_put(nl_msg, IFLA_ADDRESS, VIR_MAC_BUFLEN, macaddress) < 0)
        goto buffer_too_small;

    if (ifname &&
        nla_put(nl_msg, IFLA_IFNAME, strlen(ifname)+1, ifname) < 0)
        goto buffer_too_small;

    if (!(linkinfo = nla_nest_start(nl_msg, IFLA_LINKINFO)))
        goto buffer_too_small;

    if (nla_put(nl_msg, IFLA_INFO_KIND, strlen(type), type) < 0)
        goto buffer_too_small;

    if (macvlan_mode > 0) {
        if (!(info_data = nla_nest_start(nl_msg, IFLA_INFO_DATA)))
            goto buffer_too_small;

        if (nla_put(nl_msg, IFLA_MACVLAN_MODE, sizeof(macvlan_mode),
                    &macvlan_mode) < 0)
            goto buffer_too_small;

        nla_nest_end(nl_msg, info_data);
    }

    nla_nest_end(nl_msg, linkinfo);

156
    if (virNetlinkCommand(nl_msg, &resp, &recvbuflen, 0, 0,
157
                          NETLINK_ROUTE, 0) < 0) {
158 159 160
        goto cleanup;
    }

161
    if (recvbuflen < NLMSG_LENGTH(0) || resp == NULL)
162 163 164 165 166 167 168 169 170 171 172 173 174 175 176
        goto malformed_resp;

    switch (resp->nlmsg_type) {
    case NLMSG_ERROR:
        err = (struct nlmsgerr *)NLMSG_DATA(resp);
        if (resp->nlmsg_len < NLMSG_LENGTH(sizeof(*err)))
            goto malformed_resp;

        switch (err->error) {

        case 0:
            break;

        case -EEXIST:
            *retry = 1;
177
            goto cleanup;
178 179 180

        default:
            virReportSystemError(-err->error,
181 182 183
                                 _("error creating %s interface %s@%s (%s)"),
                                 type, ifname, srcdev,
                                 virMacAddrFormat(macaddress, macstr));
184
            goto cleanup;
185 186 187 188 189 190 191 192 193 194
        }
        break;

    case NLMSG_DONE:
        break;

    default:
        goto malformed_resp;
    }

195
    rc = 0;
196
 cleanup:
197
    nlmsg_free(nl_msg);
198
    VIR_FREE(resp);
199 200
    return rc;

201
 malformed_resp:
202
    virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
203
                   _("malformed netlink response message"));
204
    goto cleanup;
205

206
 buffer_too_small:
207
    virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
208
                   _("allocated netlink buffer is too small"));
209
    goto cleanup;
210 211 212 213 214 215 216 217 218 219 220 221 222
}

/**
 * virNetDevMacVLanDelete:
 *
 * @ifname: Name of the interface
 *
 * Tear down an interface with the given name.
 *
 * Returns 0 on success, -1 on fatal error.
 */
int virNetDevMacVLanDelete(const char *ifname)
{
223
    return virNetlinkDelLink(ifname);
224 225 226
}


227 228 229 230 231 232 233 234 235 236 237 238
/**
 * virNetDevMacVLanTapOpen:
 * Open the macvtap's tap device.
 * @ifname: Name of the macvtap interface
 * @retries : Number of retries in case udev for example may need to be
 *            waited for to create the tap chardev
 * Returns negative value in case of error, the file descriptor otherwise.
 */
static
int virNetDevMacVLanTapOpen(const char *ifname,
                            int retries)
{
239 240 241
    int ret = -1;
    FILE *file = NULL;
    char *path;
242 243 244 245
    int ifindex;
    char tapname[50];
    int tapfd;

246
    if (virNetDevSysfsFile(&path, ifname, "ifindex") < 0)
247 248 249 250 251 252 253 254
        return -1;

    file = fopen(path, "r");

    if (!file) {
        virReportSystemError(errno,
                             _("cannot open macvtap file %s to determine "
                               "interface index"), path);
255
        goto cleanup;
256 257 258 259
    }

    if (fscanf(file, "%d", &ifindex) != 1) {
        virReportSystemError(errno,
E
Eric Blake 已提交
260
                             "%s", _("cannot determine macvtap's tap device "
261
                             "interface index"));
262
        goto cleanup;
263 264 265 266 267 268 269 270 271
    }

    VIR_FORCE_FCLOSE(file);

    if (snprintf(tapname, sizeof(tapname),
                 "/dev/tap%d", ifindex) >= sizeof(tapname)) {
        virReportSystemError(errno,
                             "%s",
                             _("internal buffer for tap device is too small"));
272
        goto cleanup;
273 274 275 276 277 278 279 280 281 282 283 284 285
    }

    while (1) {
        /* may need to wait for udev to be done */
        tapfd = open(tapname, O_RDWR);
        if (tapfd < 0 && retries > 0) {
            retries--;
            usleep(20000);
            continue;
        }
        break;
    }

286
    if (tapfd < 0) {
287 288 289
        virReportSystemError(errno,
                             _("cannot open macvtap tap device %s"),
                             tapname);
290 291 292 293 294 295 296
        goto cleanup;
    }
    ret = tapfd;
 cleanup:
    VIR_FREE(path);
    VIR_FORCE_FCLOSE(file);
    return ret;
297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367
}


/**
 * virNetDevMacVLanTapSetup:
 * @tapfd: file descriptor of the macvtap tap
 * @vnet_hdr: 1 to enable IFF_VNET_HDR, 0 to disable it
 *
 * Returns 0 on success, -1 in case of fatal error, error code otherwise.
 *
 * Turn the IFF_VNET_HDR flag, if requested and available, make sure
 * it's off in the other cases.
 * A fatal error is defined as the VNET_HDR flag being set but it cannot
 * be turned off for some reason. This is reported with -1. Other fatal
 * error is not being able to read the interface flags. In that case the
 * macvtap device should not be used.
 */
static int
virNetDevMacVLanTapSetup(int tapfd, int vnet_hdr)
{
    unsigned int features;
    struct ifreq ifreq;
    short new_flags = 0;
    int rc_on_fail = 0;
    const char *errmsg = NULL;

    memset(&ifreq, 0, sizeof(ifreq));

    if (ioctl(tapfd, TUNGETIFF, &ifreq) < 0) {
        virReportSystemError(errno, "%s",
                             _("cannot get interface flags on macvtap tap"));
        return -1;
    }

    new_flags = ifreq.ifr_flags;

    if ((ifreq.ifr_flags & IFF_VNET_HDR) && !vnet_hdr) {
        new_flags = ifreq.ifr_flags & ~IFF_VNET_HDR;
        rc_on_fail = -1;
        errmsg = _("cannot clean IFF_VNET_HDR flag on macvtap tap");
    } else if ((ifreq.ifr_flags & IFF_VNET_HDR) == 0 && vnet_hdr) {
        if (ioctl(tapfd, TUNGETFEATURES, &features) < 0) {
            virReportSystemError(errno, "%s",
                   _("cannot get feature flags on macvtap tap"));
            return -1;
        }
        if ((features & IFF_VNET_HDR)) {
            new_flags = ifreq.ifr_flags | IFF_VNET_HDR;
            errmsg = _("cannot set IFF_VNET_HDR flag on macvtap tap");
        }
    }

    if (new_flags != ifreq.ifr_flags) {
        ifreq.ifr_flags = new_flags;
        if (ioctl(tapfd, TUNSETIFF, &ifreq) < 0) {
            virReportSystemError(errno, "%s", errmsg);
            return rc_on_fail;
        }
    }

    return 0;
}


static const uint32_t modeMap[VIR_NETDEV_MACVLAN_MODE_LAST] = {
    [VIR_NETDEV_MACVLAN_MODE_VEPA] = MACVLAN_MODE_VEPA,
    [VIR_NETDEV_MACVLAN_MODE_PRIVATE] = MACVLAN_MODE_PRIVATE,
    [VIR_NETDEV_MACVLAN_MODE_BRIDGE] = MACVLAN_MODE_BRIDGE,
    [VIR_NETDEV_MACVLAN_MODE_PASSTHRU] = MACVLAN_MODE_PASSTHRU,
};

368 369 370 371
/* Struct to hold the state and configuration of a 802.1qbg port */
struct virNetlinkCallbackData {
    char *cr_ifname;
    virNetDevVPortProfilePtr virtPortProfile;
372
    virMacAddr macaddress;
373
    char *linkdev;
374
    int vf;
S
Stefan Berger 已提交
375
    unsigned char vmuuid[VIR_UUID_BUFLEN];
376
    virNetDevVPortProfileOp vmOp;
377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402
    unsigned int linkState;
};

typedef struct virNetlinkCallbackData *virNetlinkCallbackDataPtr;

# define INSTANCE_STRLEN 36

static int instance2str(const unsigned char *p, char *dst, size_t size)
{
    if (dst && size > INSTANCE_STRLEN) {
        snprintf(dst, size, "%02x%02x%02x%02x-%02x%02x-%02x%02x-"
                 "%02x%02x-%02x%02x%02x%02x%02x%02x",
                 p[0], p[1], p[2], p[3],
                 p[4], p[5], p[6], p[7],
                 p[8], p[9], p[10], p[11], p[12], p[13], p[14], p[15]);
        return 0;
    }
    return -1;
}

# define LLDPAD_PID_FILE  "/var/run/lldpad.pid"
# define VIRIP_PID_FILE   "/var/run/virip.pid"

/**
 * virNetDevMacVLanVPortProfileCallback:
 *
403
 * @hdr: The buffer containing the received netlink header + payload
404 405 406 407 408 409 410 411 412 413 414
 * @length: The length of the received netlink message.
 * @peer: The netling sockaddr containing the peer information
 * @handled: Contains information if the message has been replied to yet
 * @opaque: Contains vital information regarding the associated vm an interface
 *
 * This function is called when a netlink message is received. The function
 * reads the message and responds if it is pertinent to the running VMs
 * network interface.
 */

static void
415 416
virNetDevMacVLanVPortProfileCallback(struct nlmsghdr *hdr,
                                     unsigned int length,
417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442
                                     struct sockaddr_nl *peer,
                                     bool *handled,
                                     void *opaque)
{
   struct nla_policy ifla_vf_policy[IFLA_VF_MAX + 1] = {
       [IFLA_VF_MAC] = {.minlen = sizeof(struct ifla_vf_mac),
                        .maxlen = sizeof(struct ifla_vf_mac)},
       [IFLA_VF_VLAN] = {.minlen = sizeof(struct ifla_vf_vlan),
                         .maxlen = sizeof(struct ifla_vf_vlan)},
    };

    struct nla_policy ifla_port_policy[IFLA_PORT_MAX + 1] = {
        [IFLA_PORT_RESPONSE] = {.type = NLA_U16},
    };

    struct nlattr *tb[IFLA_MAX + 1], *tb3[IFLA_PORT_MAX + 1],
        *tb_vfinfo[IFLA_VF_MAX + 1], *tb_vfinfo_list;

    struct ifinfomsg ifinfo;
    void *data;
    int rem;
    char *ifname;
    bool indicate = false;
    virNetlinkCallbackDataPtr calld = opaque;
    pid_t lldpad_pid = 0;
    pid_t virip_pid = 0;
443
    char macaddr[VIR_MAC_STRING_BUFLEN];
444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490

    data = nlmsg_data(hdr);

    /* Quickly decide if we want this or not */

    if (virPidFileReadPath(LLDPAD_PID_FILE, &lldpad_pid) < 0)
        return;

    ignore_value(virPidFileReadPath(VIRIP_PID_FILE, &virip_pid));

    if (hdr->nlmsg_pid != lldpad_pid && hdr->nlmsg_pid != virip_pid)
        return; /* we only care for lldpad and virip messages */
    if (hdr->nlmsg_type != RTM_SETLINK)
        return; /* we only care for RTM_SETLINK */
    if (*handled)
        return; /* if it has been handled - dont handle again */

    /* DEBUG start */
    VIR_INFO("netlink message nl_sockaddr: %p len: %d", peer, length);
    VIR_DEBUG("nlmsg_type  = 0x%02x", hdr->nlmsg_type);
    VIR_DEBUG("nlmsg_len   = 0x%04x", hdr->nlmsg_len);
    VIR_DEBUG("nlmsg_pid   = %d", hdr->nlmsg_pid);
    VIR_DEBUG("nlmsg_seq   = 0x%08x", hdr->nlmsg_seq);
    VIR_DEBUG("nlmsg_flags = 0x%04x", hdr->nlmsg_flags);

    VIR_DEBUG("lldpad pid  = %d", lldpad_pid);

    switch (hdr->nlmsg_type) {
    case RTM_NEWLINK:
    case RTM_DELLINK:
    case RTM_SETLINK:
    case RTM_GETLINK:
        VIR_DEBUG(" IFINFOMSG\n");
        VIR_DEBUG("        ifi_family = 0x%02x\n",
            ((struct ifinfomsg *)data)->ifi_family);
        VIR_DEBUG("        ifi_type   = 0x%x\n",
            ((struct ifinfomsg *)data)->ifi_type);
        VIR_DEBUG("        ifi_index  = %i\n",
            ((struct ifinfomsg *)data)->ifi_index);
        VIR_DEBUG("        ifi_flags  = 0x%04x\n",
            ((struct ifinfomsg *)data)->ifi_flags);
        VIR_DEBUG("        ifi_change = 0x%04x\n",
            ((struct ifinfomsg *)data)->ifi_change);
    }
    /* DEBUG end */

    /* Parse netlink message assume a setlink with vfports */
491
    memcpy(&ifinfo, NLMSG_DATA(hdr), sizeof(ifinfo));
492 493 494
    VIR_DEBUG("family:%#x type:%#x index:%d flags:%#x change:%#x",
        ifinfo.ifi_family, ifinfo.ifi_type, ifinfo.ifi_index,
        ifinfo.ifi_flags, ifinfo.ifi_change);
495
    if (nlmsg_parse(hdr, sizeof(ifinfo),
496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524
        (struct nlattr **)&tb, IFLA_MAX, NULL)) {
        VIR_DEBUG("error parsing request...");
        return;
    }

    if (tb[IFLA_VFINFO_LIST]) {
        VIR_DEBUG("FOUND IFLA_VFINFO_LIST!");

        nla_for_each_nested(tb_vfinfo_list, tb[IFLA_VFINFO_LIST], rem) {
            if (nla_type(tb_vfinfo_list) != IFLA_VF_INFO) {
                VIR_DEBUG("nested parsing of"
                    "IFLA_VFINFO_LIST failed.");
                return;
            }
            if (nla_parse_nested(tb_vfinfo, IFLA_VF_MAX,
                tb_vfinfo_list, ifla_vf_policy)) {
                VIR_DEBUG("nested parsing of "
                    "IFLA_VF_INFO failed.");
                return;
            }
        }

        if (tb_vfinfo[IFLA_VF_MAC]) {
            struct ifla_vf_mac *mac = RTA_DATA(tb_vfinfo[IFLA_VF_MAC]);
            unsigned char *m = mac->mac;

            VIR_DEBUG("IFLA_VF_MAC = %2x:%2x:%2x:%2x:%2x:%2x",
                      m[0], m[1], m[2], m[3], m[4], m[5]);

E
Eric Blake 已提交
525
            if (virMacAddrCmpRaw(&calld->macaddress, mac->mac)) {
526
                /* Repeat the same check for a broadcast mac */
527
                size_t i;
528

529 530
                for (i = 0; i < VIR_MAC_BUFLEN; i++) {
                    if (calld->macaddress.addr[i] != 0xff) {
531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623
                        VIR_DEBUG("MAC address match failed (wasn't broadcast)");
                        return;
                    }
                }
            }
        }

        if (tb_vfinfo[IFLA_VF_VLAN]) {
            struct ifla_vf_vlan *vlan = RTA_DATA(tb_vfinfo[IFLA_VF_VLAN]);

            VIR_DEBUG("IFLA_VF_VLAN = %d", vlan->vlan);
        }
    }

    if (tb[IFLA_IFNAME]) {
        ifname = (char *)RTA_DATA(tb[IFLA_IFNAME]);
        VIR_DEBUG("IFLA_IFNAME = %s\n", ifname);
    }

    if (tb[IFLA_OPERSTATE]) {
        rem = *(unsigned short *)RTA_DATA(tb[IFLA_OPERSTATE]);
        VIR_DEBUG("IFLA_OPERSTATE = %d\n", rem);
    }

    if (tb[IFLA_VF_PORTS]) {
        struct nlattr *tb_vf_ports;

        VIR_DEBUG("found IFLA_VF_PORTS\n");
        nla_for_each_nested(tb_vf_ports, tb[IFLA_VF_PORTS], rem) {

            VIR_DEBUG("iterating\n");
            if (nla_type(tb_vf_ports) != IFLA_VF_PORT) {
                VIR_DEBUG("not a IFLA_VF_PORT. skipping\n");
                continue;
            }
            if (nla_parse_nested(tb3, IFLA_PORT_MAX, tb_vf_ports,
                ifla_port_policy)) {
                VIR_DEBUG("nested parsing on level 2"
                          " failed.");
            }
            if (tb3[IFLA_PORT_VF]) {
                VIR_DEBUG("IFLA_PORT_VF = %d",
                          *(uint32_t *) (RTA_DATA(tb3[IFLA_PORT_VF])));
            }
            if (tb3[IFLA_PORT_PROFILE]) {
                VIR_DEBUG("IFLA_PORT_PROFILE = %s",
                          (char *) RTA_DATA(tb3[IFLA_PORT_PROFILE]));
            }

            if (tb3[IFLA_PORT_VSI_TYPE]) {
                struct ifla_port_vsi *pvsi;
                int tid = 0;

                pvsi = (struct ifla_port_vsi *)
                    RTA_DATA(tb3[IFLA_PORT_VSI_TYPE]);
                tid = ((pvsi->vsi_type_id[2] << 16) |
                       (pvsi->vsi_type_id[1] << 8) |
                       pvsi->vsi_type_id[0]);

                VIR_DEBUG("mgr_id: %d", pvsi->vsi_mgr_id);
                VIR_DEBUG("type_id: %d", tid);
                VIR_DEBUG("type_version: %d",
                          pvsi->vsi_type_version);
            }

            if (tb3[IFLA_PORT_INSTANCE_UUID]) {
                char instance[INSTANCE_STRLEN + 2];
                unsigned char *uuid;

                uuid = (unsigned char *)
                    RTA_DATA(tb3[IFLA_PORT_INSTANCE_UUID]);
                instance2str(uuid, instance, sizeof(instance));
                VIR_DEBUG("IFLA_PORT_INSTANCE_UUID = %s\n",
                          instance);
            }

            if (tb3[IFLA_PORT_REQUEST]) {
                uint8_t req = *(uint8_t *) RTA_DATA(tb3[IFLA_PORT_REQUEST]);
                VIR_DEBUG("IFLA_PORT_REQUEST = %d", req);

                if (req == PORT_REQUEST_DISASSOCIATE) {
                    VIR_DEBUG("Set dissaccociated.");
                    indicate = true;
                }
            }

            if (tb3[IFLA_PORT_RESPONSE]) {
                VIR_DEBUG("IFLA_PORT_RESPONSE = %d\n", *(uint16_t *)
                    RTA_DATA(tb3[IFLA_PORT_RESPONSE]));
            }
        }
    }

624
    if (!indicate)
625 626 627 628 629
        return;

    VIR_INFO("Re-send 802.1qbg associate request:");
    VIR_INFO("  if: %s", calld->cr_ifname);
    VIR_INFO("  lf: %s", calld->linkdev);
630
    VIR_INFO(" mac: %s", virMacAddrFormat(&calld->macaddress, macaddr));
631 632
    ignore_value(virNetDevVPortProfileAssociate(calld->cr_ifname,
                                                calld->virtPortProfile,
633
                                                &calld->macaddress,
634
                                                calld->linkdev,
635
                                                calld->vf,
636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675
                                                calld->vmuuid,
                                                calld->vmOp, true));
    *handled = true;
    return;
}

/**
 * virNetlinkCallbackDataFree
 *
 * @calld: pointer to a virNetlinkCallbackData object to free
 *
 * This function frees all the data associated with a virNetlinkCallbackData object
 * as well as the object itself. If called with NULL, it does nothing.
 *
 * Returns nothing.
 */
static void
virNetlinkCallbackDataFree(virNetlinkCallbackDataPtr calld)
{
    if (calld) {
        VIR_FREE(calld->cr_ifname);
        VIR_FREE(calld->virtPortProfile);
        VIR_FREE(calld->linkdev);
    }
    VIR_FREE(calld);
}

/**
 * virNetDevMacVLanVPortProfileDestroyCallback:
 *
 * @watch: watch whose handle to remove
 * @macaddr: macaddr whose handle to remove
 * @opaque: Contains vital information regarding the associated vm
 *
 * This function is called when a netlink message handler is terminated.
 * The function frees locally allocated data referenced in the opaque
 * data, and the opaque object itself.
 */
static void
virNetDevMacVLanVPortProfileDestroyCallback(int watch ATTRIBUTE_UNUSED,
676
                                            const virMacAddr *macaddr ATTRIBUTE_UNUSED,
677 678 679 680 681
                                            void *opaque)
{
    virNetlinkCallbackDataFree((virNetlinkCallbackDataPtr)opaque);
}

682
int
683
virNetDevMacVLanVPortProfileRegisterCallback(const char *ifname,
684
                                             const virMacAddr *macaddress,
685 686 687
                                             const char *linkdev,
                                             const unsigned char *vmuuid,
                                             virNetDevVPortProfilePtr virtPortProfile,
688
                                             virNetDevVPortProfileOp vmOp)
689 690 691
{
    virNetlinkCallbackDataPtr calld = NULL;

692
    if (virtPortProfile && virNetlinkEventServiceIsRunning(NETLINK_ROUTE)) {
693
        if (VIR_ALLOC(calld) < 0)
694
            goto error;
695 696
        if (VIR_STRDUP(calld->cr_ifname, ifname) < 0)
            goto error;
697
        if (VIR_ALLOC(calld->virtPortProfile) < 0)
698
            goto error;
699
        memcpy(calld->virtPortProfile, virtPortProfile, sizeof(*virtPortProfile));
700
        virMacAddrSet(&calld->macaddress, macaddress);
701 702
        if (VIR_STRDUP(calld->linkdev, linkdev) < 0)
            goto error;
S
Stefan Berger 已提交
703
        memcpy(calld->vmuuid, vmuuid, sizeof(calld->vmuuid));
704 705 706 707 708

        calld->vmOp = vmOp;

        if (virNetlinkEventAddClient(virNetDevMacVLanVPortProfileCallback,
                                     virNetDevMacVLanVPortProfileDestroyCallback,
709
                                     calld, macaddress, NETLINK_ROUTE) < 0)
710 711 712 713 714
            goto error;
    }

    return 0;

715
 error:
716 717 718 719
    virNetlinkCallbackDataFree(calld);
    return -1;
}

720

721
/**
722
 * virNetDevMacVLanCreateWithVPortProfile:
723 724 725 726 727 728 729 730 731 732 733 734 735
 * Create an instance of a macvtap device and open its tap character
 * device.
 * @tgifname: Interface name that the macvtap is supposed to have. May
 *    be NULL if this function is supposed to choose a name
 * @macaddress: The MAC address for the macvtap device
 * @linkdev: The interface name of the NIC to connect to the external bridge
 * @mode: int describing the mode for 'bridge', 'vepa', 'private' or 'passthru'.
 * @vnet_hdr: 1 to enable IFF_VNET_HDR, 0 to disable it
 * @vmuuid: The UUID of the VM the macvtap belongs to
 * @virtPortProfile: pointer to object holding the virtual port profile data
 * @res_ifname: Pointer to a string pointer where the actual name of the
 *     interface will be stored into if everything succeeded. It is up
 *     to the caller to free the string.
736
 * @flags: OR of virNetDevMacVLanCreateFlags.
737
 *
738 739 740
 * Returns file descriptor of the tap device in case of success with
 * @flags & VIR_NETDEV_MACVLAN_CREATE_WITH_TAP, otherwise returns 0; returns
 * -1 on error.
741
 */
742
int virNetDevMacVLanCreateWithVPortProfile(const char *tgifname,
743
                                           const virMacAddr *macaddress,
744
                                           const char *linkdev,
745
                                           virNetDevMacVLanMode mode,
746 747 748 749
                                           int vnet_hdr,
                                           const unsigned char *vmuuid,
                                           virNetDevVPortProfilePtr virtPortProfile,
                                           char **res_ifname,
750
                                           virNetDevVPortProfileOp vmOp,
751
                                           char *stateDir,
752
                                           unsigned int flags)
753
{
754 755 756 757 758 759
    const char *type = (flags & VIR_NETDEV_MACVLAN_CREATE_WITH_TAP) ?
        "macvtap" : "macvlan";
    const char *prefix = (flags & VIR_NETDEV_MACVLAN_CREATE_WITH_TAP) ?
        MACVTAP_NAME_PREFIX : MACVLAN_NAME_PREFIX;
    const char *pattern = (flags & VIR_NETDEV_MACVLAN_CREATE_WITH_TAP) ?
        MACVTAP_NAME_PATTERN : MACVLAN_NAME_PATTERN;
760 761 762 763
    int c, rc;
    char ifname[IFNAMSIZ];
    int retries, do_retry = 0;
    uint32_t macvtapMode;
764
    const char *cr_ifname = NULL;
765
    int ret;
766
    int vf = -1;
767 768 769 770 771 772 773 774 775 776 777 778 779 780 781

    macvtapMode = modeMap[mode];

    *res_ifname = NULL;

    VIR_DEBUG("%s: VM OPERATION: %s", __FUNCTION__, virNetDevVPortProfileOpTypeToString(vmOp));

    /** Note: When using PASSTHROUGH mode with MACVTAP devices the link
     * device's MAC address must be set to the VMs MAC address. In
     * order to not confuse the first switch or bridge in line this MAC
     * address must be reset when the VM is shut down.
     * This is especially important when using SRIOV capable cards that
     * emulate their switch in firmware.
     */
    if (mode == VIR_NETDEV_MACVLAN_MODE_PASSTHRU) {
782
        if (virNetDevReplaceNetConfig(linkdev, -1, macaddress, -1, stateDir) < 0)
783 784 785 786
            return -1;
    }

    if (tgifname) {
787 788 789 790
        if ((ret = virNetDevExists(tgifname)) < 0)
            return -1;

        if (ret) {
791
            if (STRPREFIX(tgifname, prefix))
792
                goto create_name;
793 794
            virReportSystemError(EEXIST,
                                 _("Unable to create macvlan device %s"), tgifname);
795 796 797
            return -1;
        }
        cr_ifname = tgifname;
798 799
        rc = virNetDevMacVLanCreate(tgifname, type, macaddress, linkdev,
                                    macvtapMode, &do_retry);
800 801 802
        if (rc < 0)
            return -1;
    } else {
803
 create_name:
804
        retries = 5;
805
        virMutexLock(&virNetDevMacVLanCreateMutex);
806
        for (c = 0; c < 8192; c++) {
807
            snprintf(ifname, sizeof(ifname), pattern, c);
808 809
            if ((ret = virNetDevExists(ifname)) < 0) {
                virMutexUnlock(&virNetDevMacVLanCreateMutex);
810
                return -1;
811
            }
812
            if (!ret) {
813 814
                rc = virNetDevMacVLanCreate(ifname, type, macaddress, linkdev,
                                            macvtapMode, &do_retry);
815 816
                if (rc == 0) {
                    cr_ifname = ifname;
817
                    break;
818
                }
819 820 821

                if (do_retry && --retries)
                    continue;
822
                break;
823 824
            }
        }
825 826

        virMutexUnlock(&virNetDevMacVLanCreateMutex);
827
        if (!cr_ifname)
828
            return -1;
829 830 831 832 833 834
    }

    if (virNetDevVPortProfileAssociate(cr_ifname,
                                       virtPortProfile,
                                       macaddress,
                                       linkdev,
835
                                       vf,
836
                                       vmuuid, vmOp, false) < 0) {
837 838 839 840
        rc = -1;
        goto link_del_exit;
    }

841 842 843 844 845
    if (flags & VIR_NETDEV_MACVLAN_CREATE_IFUP) {
        if (virNetDevSetOnline(cr_ifname, true) < 0) {
            rc = -1;
            goto disassociate_exit;
        }
846 847
    }

848
    if (flags & VIR_NETDEV_MACVLAN_CREATE_WITH_TAP) {
849 850 851
        if ((rc = virNetDevMacVLanTapOpen(cr_ifname, 10)) < 0)
            goto disassociate_exit;

852 853 854 855
        if (virNetDevMacVLanTapSetup(rc, vnet_hdr) < 0) {
            VIR_FORCE_CLOSE(rc); /* sets rc to -1 */
            goto disassociate_exit;
        }
856
        if (VIR_STRDUP(*res_ifname, cr_ifname) < 0) {
857 858 859 860
            VIR_FORCE_CLOSE(rc); /* sets rc to -1 */
            goto disassociate_exit;
        }
    } else {
861
        if (VIR_STRDUP(*res_ifname, cr_ifname) < 0)
862 863 864
            goto disassociate_exit;
        rc = 0;
    }
865

866 867 868 869 870 871 872 873 874
    if (vmOp == VIR_NETDEV_VPORT_PROFILE_OP_CREATE ||
        vmOp == VIR_NETDEV_VPORT_PROFILE_OP_RESTORE) {
        /* Only directly register upon a create or restore (restarting
         * a saved image) - migration and libvirtd restart are handled
         * elsewhere.
         */
        if (virNetDevMacVLanVPortProfileRegisterCallback(cr_ifname, macaddress,
                                                         linkdev, vmuuid,
                                                         virtPortProfile,
875
                                                         vmOp) < 0)
876
        goto disassociate_exit;
877
    }
878 879 880

    return rc;

881
 disassociate_exit:
882 883 884 885
    ignore_value(virNetDevVPortProfileDisassociate(cr_ifname,
                                                   virtPortProfile,
                                                   macaddress,
                                                   linkdev,
886
                                                   vf,
887 888
                                                   vmOp));

889
 link_del_exit:
890
    ignore_value(virNetDevMacVLanDelete(cr_ifname));
891 892 893 894 895 896

    return rc;
}


/**
897
 * virNetDevMacVLanDeleteWithVPortProfile:
898 899 900 901 902 903 904 905
 * @ifname : The name of the macvtap interface
 * @linkdev: The interface name of the NIC to connect to the external bridge
 * @virtPortProfile: pointer to object holding the virtual port profile data
 *
 * Delete an interface given its name. Disassociate
 * it with the switch if port profile parameters
 * were provided.
 */
906
int virNetDevMacVLanDeleteWithVPortProfile(const char *ifname,
907
                                           const virMacAddr *macaddr,
908 909 910 911
                                           const char *linkdev,
                                           int mode,
                                           virNetDevVPortProfilePtr virtPortProfile,
                                           char *stateDir)
912 913
{
    int ret = 0;
914 915
    int vf = -1;

916
    if (mode == VIR_NETDEV_MACVLAN_MODE_PASSTHRU)
917
        ignore_value(virNetDevRestoreNetConfig(linkdev, vf, stateDir));
918 919 920 921 922 923

    if (ifname) {
        if (virNetDevVPortProfileDisassociate(ifname,
                                              virtPortProfile,
                                              macaddr,
                                              linkdev,
924
                                              vf,
925 926
                                              VIR_NETDEV_VPORT_PROFILE_OP_DESTROY) < 0)
            ret = -1;
927
        if (virNetDevMacVLanDelete(ifname) < 0)
928 929
            ret = -1;
    }
930

931
    virNetlinkEventRemoveClient(0, macaddr, NETLINK_ROUTE);
932

933 934 935
    return ret;
}

936 937 938 939 940 941 942 943 944 945 946 947 948 949 950
/**
 * virNetDevMacVLanRestartWithVPortProfile:
 * Register a port profile callback handler for a VM that
 * is already running
 * .
 * @cr_ifname: Interface name that the macvtap has.
 * @macaddress: The MAC address for the macvtap device
 * @linkdev: The interface name of the NIC to connect to the external bridge
 * @vmuuid: The UUID of the VM the macvtap belongs to
 * @virtPortProfile: pointer to object holding the virtual port profile data
 * @vmOp: Operation to use during setup of the association
 *
 * Returns 0; returns -1 on error.
 */
int virNetDevMacVLanRestartWithVPortProfile(const char *cr_ifname,
951
                                           const virMacAddr *macaddress,
952 953 954
                                           const char *linkdev,
                                           const unsigned char *vmuuid,
                                           virNetDevVPortProfilePtr virtPortProfile,
955
                                           virNetDevVPortProfileOp vmOp)
956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972
{
    int rc = 0;

    rc = virNetDevMacVLanVPortProfileRegisterCallback(cr_ifname, macaddress,
                                                      linkdev, vmuuid,
                                                      virtPortProfile, vmOp);
    if (rc < 0)
        goto error;

    ignore_value(virNetDevVPortProfileAssociate(cr_ifname,
                                                virtPortProfile,
                                                macaddress,
                                                linkdev,
                                                -1,
                                                vmuuid,
                                                vmOp, true));

973
 error:
974 975 976 977
    return rc;

}

978
#else /* ! WITH_MACVTAP */
979 980
int virNetDevMacVLanCreate(const char *ifname ATTRIBUTE_UNUSED,
                           const char *type ATTRIBUTE_UNUSED,
981
                           const virMacAddr *macaddress ATTRIBUTE_UNUSED,
982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997
                           const char *srcdev ATTRIBUTE_UNUSED,
                           uint32_t macvlan_mode ATTRIBUTE_UNUSED,
                           int *retry ATTRIBUTE_UNUSED)
{
    virReportSystemError(ENOSYS, "%s",
                         _("Cannot create macvlan devices on this platform"));
    return -1;
}

int virNetDevMacVLanDelete(const char *ifname ATTRIBUTE_UNUSED)
{
    virReportSystemError(ENOSYS, "%s",
                         _("Cannot create macvlan devices on this platform"));
    return -1;
}

998
int virNetDevMacVLanCreateWithVPortProfile(const char *ifname ATTRIBUTE_UNUSED,
999
                                           const virMacAddr *macaddress ATTRIBUTE_UNUSED,
1000
                                           const char *linkdev ATTRIBUTE_UNUSED,
1001
                                           virNetDevMacVLanMode mode ATTRIBUTE_UNUSED,
1002 1003 1004 1005
                                           int vnet_hdr ATTRIBUTE_UNUSED,
                                           const unsigned char *vmuuid ATTRIBUTE_UNUSED,
                                           virNetDevVPortProfilePtr virtPortProfile ATTRIBUTE_UNUSED,
                                           char **res_ifname ATTRIBUTE_UNUSED,
1006
                                           virNetDevVPortProfileOp vmop ATTRIBUTE_UNUSED,
1007
                                           char *stateDir ATTRIBUTE_UNUSED,
J
Ján Tomko 已提交
1008
                                           unsigned int unused_flags ATTRIBUTE_UNUSED)
1009 1010 1011 1012 1013 1014
{
    virReportSystemError(ENOSYS, "%s",
                         _("Cannot create macvlan devices on this platform"));
    return -1;
}

1015
int virNetDevMacVLanDeleteWithVPortProfile(const char *ifname ATTRIBUTE_UNUSED,
1016
                                           const virMacAddr *macaddress ATTRIBUTE_UNUSED,
1017 1018 1019 1020
                                           const char *linkdev ATTRIBUTE_UNUSED,
                                           int mode ATTRIBUTE_UNUSED,
                                           virNetDevVPortProfilePtr virtPortProfile ATTRIBUTE_UNUSED,
                                           char *stateDir ATTRIBUTE_UNUSED)
1021 1022 1023 1024 1025
{
    virReportSystemError(ENOSYS, "%s",
                         _("Cannot create macvlan devices on this platform"));
    return -1;
}
1026 1027

int virNetDevMacVLanRestartWithVPortProfile(const char *cr_ifname ATTRIBUTE_UNUSED,
1028
                                           const virMacAddr *macaddress ATTRIBUTE_UNUSED,
1029 1030 1031
                                           const char *linkdev ATTRIBUTE_UNUSED,
                                           const unsigned char *vmuuid ATTRIBUTE_UNUSED,
                                           virNetDevVPortProfilePtr virtPortProfile ATTRIBUTE_UNUSED,
1032
                                           virNetDevVPortProfileOp vmOp ATTRIBUTE_UNUSED)
1033 1034 1035 1036 1037
{
    virReportSystemError(ENOSYS, "%s",
                         _("Cannot create macvlan devices on this platform"));
    return -1;
}
1038 1039

int virNetDevMacVLanVPortProfileRegisterCallback(const char *ifname ATTRIBUTE_UNUSED,
1040
                                             const virMacAddr *macaddress ATTRIBUTE_UNUSED,
1041 1042 1043
                                             const char *linkdev ATTRIBUTE_UNUSED,
                                             const unsigned char *vmuuid ATTRIBUTE_UNUSED,
                                             virNetDevVPortProfilePtr virtPortProfile ATTRIBUTE_UNUSED,
1044
                                             virNetDevVPortProfileOp vmOp ATTRIBUTE_UNUSED)
1045 1046 1047 1048 1049
{
    virReportSystemError(ENOSYS, "%s",
                         _("Cannot create macvlan devices on this platform"));
    return -1;
}
1050
#endif /* ! WITH_MACVTAP */