virnetdevtap.c 10.7 KB
Newer Older
1
/*
2
 * Copyright (C) 2007-2012 Red Hat, Inc.
3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24
 *
 * This library is free software; you can redistribute it and/or
 * modify it under the terms of the GNU Lesser General Public
 * License as published by the Free Software Foundation; either
 * version 2.1 of the License, or (at your option) any later version.
 *
 * This library is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * Lesser General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public
 * License along with this library; if not, write to the Free Software
 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307  USA
 *
 * Authors:
 *     Mark McLoughlin <markmc@redhat.com>
 *     Daniel P. Berrange <berrange@redhat.com>
 */

#include <config.h>

25
#include "virmacaddr.h"
26 27 28
#include "virnetdevtap.h"
#include "virnetdev.h"
#include "virnetdevbridge.h"
A
Ansis Atteka 已提交
29
#include "virnetdevopenvswitch.h"
30 31 32 33 34
#include "virterror_internal.h"
#include "virfile.h"
#include "virterror_internal.h"
#include "memory.h"
#include "logging.h"
35
#include "util.h"
36 37 38 39 40 41 42 43 44 45 46 47

#include <sys/ioctl.h>
#ifdef HAVE_NET_IF_H
# include <net/if.h>
#endif
#include <fcntl.h>
#ifdef __linux__
# include <linux/if_tun.h>    /* IFF_TUN, IFF_NO_PI */
#endif

#define VIR_FROM_THIS VIR_FROM_NONE

48 49 50 51
#define virNetDevTapError(code, ...)                    \
    virReportErrorHelper(VIR_FROM_NONE, code, __FILE__, \
                         __FUNCTION__, __LINE__, __VA_ARGS__)

52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113
/**
 * virNetDevProbeVnetHdr:
 * @tapfd: a tun/tap file descriptor
 *
 * Check whether it is safe to enable the IFF_VNET_HDR flag on the
 * tap interface.
 *
 * Setting IFF_VNET_HDR enables QEMU's virtio_net driver to allow
 * guests to pass larger (GSO) packets, with partial checksums, to
 * the host. This greatly increases the achievable throughput.
 *
 * It is only useful to enable this when we're setting up a virtio
 * interface. And it is only *safe* to enable it when we know for
 * sure that a) qemu has support for IFF_VNET_HDR and b) the running
 * kernel implements the TUNGETIFF ioctl(), which qemu needs to query
 * the supplied tapfd.
 *
 * Returns 1 if VnetHdr is supported, 0 if not supported
 */
#ifdef IFF_VNET_HDR
static int
virNetDevProbeVnetHdr(int tapfd)
{
# if defined(IFF_VNET_HDR) && defined(TUNGETFEATURES) && defined(TUNGETIFF)
    unsigned int features;
    struct ifreq dummy;

    if (ioctl(tapfd, TUNGETFEATURES, &features) != 0) {
        VIR_INFO("Not enabling IFF_VNET_HDR; "
                 "TUNGETFEATURES ioctl() not implemented");
        return 0;
    }

    if (!(features & IFF_VNET_HDR)) {
        VIR_INFO("Not enabling IFF_VNET_HDR; "
                 "TUNGETFEATURES ioctl() reports no IFF_VNET_HDR");
        return 0;
    }

    /* The kernel will always return -1 at this point.
     * If TUNGETIFF is not implemented then errno == EBADFD.
     */
    if (ioctl(tapfd, TUNGETIFF, &dummy) != -1 || errno != EBADFD) {
        VIR_INFO("Not enabling IFF_VNET_HDR; "
                 "TUNGETIFF ioctl() not implemented");
        return 0;
    }

    VIR_INFO("Enabling IFF_VNET_HDR");

    return 1;
# else
    (void) tapfd;
    VIR_INFO("Not enabling IFF_VNET_HDR; disabled at build time");
    return 0;
# endif
}
#endif


#ifdef TUNSETIFF
/**
114
 * virNetDevTapCreate:
115 116
 * @ifname: the interface name
 * @tapfd: file descriptor return value for the new tap device
117 118 119 120
 * @flags: OR of virNetDevTapCreateFlags. Only one flag is recognized:
 *
 *   VIR_NETDEV_TAP_CREATE_VNET_HDR
 *     - Enable IFF_VNET_HDR on the tap device
121 122 123 124
 *
 * Creates a tap interface.
 * If the @tapfd parameter is supplied, the open tap device file
 * descriptor will be returned, otherwise the TAP device will be made
125 126
 * persistent and closed. The caller must use virNetDevTapDelete to
 * remove a persistent TAP devices when it is no longer needed.
127 128 129 130
 *
 * Returns 0 in case of success or an errno code in case of failure.
 */
int virNetDevTapCreate(char **ifname,
131
                       int *tapfd,
132
                       unsigned int flags ATTRIBUTE_UNUSED)
133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148
{
    int fd;
    struct ifreq ifr;
    int ret = -1;

    if ((fd = open("/dev/net/tun", O_RDWR)) < 0) {
        virReportSystemError(errno, "%s",
                             _("Unable to open /dev/net/tun, is tun module loaded?"));
        return -1;
    }

    memset(&ifr, 0, sizeof(ifr));

    ifr.ifr_flags = IFF_TAP|IFF_NO_PI;

# ifdef IFF_VNET_HDR
149 150
    if ((flags &  VIR_NETDEV_TAP_CREATE_VNET_HDR) &&
        virNetDevProbeVnetHdr(fd))
151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238
        ifr.ifr_flags |= IFF_VNET_HDR;
# endif

    if (virStrcpyStatic(ifr.ifr_name, *ifname) == NULL) {
        virReportSystemError(ERANGE,
                             _("Network interface name '%s' is too long"),
                             *ifname);
        goto cleanup;

    }

    if (ioctl(fd, TUNSETIFF, &ifr) < 0) {
        virReportSystemError(errno,
                             _("Unable to create tap device %s"),
                             NULLSTR(*ifname));
        goto cleanup;
    }

    if (!tapfd &&
        (errno = ioctl(fd, TUNSETPERSIST, 1))) {
        virReportSystemError(errno,
                             _("Unable to set tap device %s to persistent"),
                             NULLSTR(*ifname));
        goto cleanup;
    }

    VIR_FREE(*ifname);
    if (!(*ifname = strdup(ifr.ifr_name))) {
        virReportOOMError();
        goto cleanup;
    }
    if (tapfd)
        *tapfd = fd;
    else
        VIR_FORCE_CLOSE(fd);

    ret = 0;

cleanup:
    if (ret < 0)
        VIR_FORCE_CLOSE(fd);

    return ret;
}


int virNetDevTapDelete(const char *ifname)
{
    struct ifreq try;
    int fd;
    int ret = -1;

    if ((fd = open("/dev/net/tun", O_RDWR)) < 0) {
        virReportSystemError(errno, "%s",
                             _("Unable to open /dev/net/tun, is tun module loaded?"));
        return -1;
    }

    memset(&try, 0, sizeof(struct ifreq));
    try.ifr_flags = IFF_TAP|IFF_NO_PI;

    if (virStrcpyStatic(try.ifr_name, ifname) == NULL) {
        virReportSystemError(ERANGE,
                             _("Network interface name '%s' is too long"),
                             ifname);
        goto cleanup;
    }

    if (ioctl(fd, TUNSETIFF, &try) < 0) {
        virReportSystemError(errno, "%s",
                             _("Unable to associate TAP device"));
        goto cleanup;
    }

    if (ioctl(fd, TUNSETPERSIST, 0) < 0) {
        virReportSystemError(errno, "%s",
                             _("Unable to make TAP device non-persistent"));
        goto cleanup;
    }

    ret = 0;

cleanup:
    VIR_FORCE_CLOSE(fd);
    return ret;
}
#else /* ! TUNSETIFF */
int virNetDevTapCreate(char **ifname ATTRIBUTE_UNUSED,
239
                       int *tapfd ATTRIBUTE_UNUSED,
240
                       unsigned int flags ATTRIBUTE_UNUSED)
241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260
{
    virReportSystemError(ENOSYS, "%s",
                         _("Unable to create TAP devices on this platform"));
    return -1;
}
int virNetDevTapDelete(const char *ifname ATTRIBUTE_UNUSED)
{
    virReportSystemError(ENOSYS, "%s",
                         _("Unable to delete TAP devices on this platform"));
    return -1;
}
#endif /* ! TUNSETIFF */


/**
 * virNetDevTapCreateInBridgePort:
 * @brname: the bridge name
 * @ifname: the interface name (or name template)
 * @macaddr: desired MAC address (VIR_MAC_BUFLEN long)
 * @tapfd: file descriptor return value for the new tap device
A
Ansis Atteka 已提交
261
 * @virtPortProfile: bridge/port specific configuration
262 263 264 265 266 267 268 269
 * @flags: OR of virNetDevTapCreateFlags:

 *   VIR_NETDEV_TAP_CREATE_IFUP
 *     - Bring the interface up
 *   VIR_NETDEV_TAP_CREATE_VNET_HDR
 *     - Enable IFF_VNET_HDR on the tap device
 *   VIR_NETDEV_TAP_CREATE_USE_MAC_FOR_BRIDGE
 *     - Set this interface's MAC as the bridge's MAC address
270 271 272 273 274 275
 *
 * This function creates a new tap device on a bridge. @ifname can be either
 * a fixed name or a name template with '%d' for dynamic name allocation.
 * in either case the final name for the bridge will be stored in @ifname.
 * If the @tapfd parameter is supplied, the open tap device file
 * descriptor will be returned, otherwise the TAP device will be made
276
 * persistent and closed. The caller must use virNetDevTapDelete to remove
277 278 279 280 281 282 283
 * a persistent TAP devices when it is no longer needed.
 *
 * Returns 0 in case of success or -1 on failure
 */
int virNetDevTapCreateInBridgePort(const char *brname,
                                   char **ifname,
                                   const unsigned char *macaddr,
284
                                   const unsigned char *vmuuid,
A
Ansis Atteka 已提交
285
                                   int *tapfd,
286 287
                                   virNetDevVPortProfilePtr virtPortProfile,
                                   unsigned int flags)
288
{
289 290
    unsigned char tapmac[VIR_MAC_BUFLEN];

291
    if (virNetDevTapCreate(ifname, tapfd, flags) < 0)
292 293 294 295 296 297 298 299
        return -1;

    /* We need to set the interface MAC before adding it
     * to the bridge, because the bridge assumes the lowest
     * MAC of all enslaved interfaces & we don't want it
     * seeing the kernel allocate random MAC for the TAP
     * device before we set our static MAC.
     */
300
    memcpy(tapmac, macaddr, VIR_MAC_BUFLEN);
301 302 303 304 305 306 307 308 309 310 311 312 313 314
    if (!(flags & VIR_NETDEV_TAP_CREATE_USE_MAC_FOR_BRIDGE)) {
        if (macaddr[0] == 0xFE) {
            /* For normal use, the tap device's MAC address cannot
             * match the MAC address used by the guest. This results
             * in "received packet on vnetX with own address as source
             * address" error logs from the kernel.
             */
            virNetDevTapError(VIR_ERR_CONFIG_UNSUPPORTED,
                              "Unable to use MAC address starting with "
                              "reserved value 0xFE - '%02X:%02X:%02X:%02X:%02X:%02X' - ",
                              macaddr[0], macaddr[1], macaddr[2],
                              macaddr[3], macaddr[4], macaddr[5]);
            goto error;
        }
315
        tapmac[0] = 0xFE; /* Discourage bridge from using TAP dev MAC */
316
    }
317 318

    if (virNetDevSetMAC(*ifname, tapmac) < 0)
319 320 321 322 323 324 325 326 327
        goto error;

    /* We need to set the interface MTU before adding it
     * to the bridge, because the bridge will have its
     * MTU adjusted automatically when we add the new interface.
     */
    if (virNetDevSetMTUFromDevice(*ifname, brname) < 0)
        goto error;

A
Ansis Atteka 已提交
328
    if (virtPortProfile) {
329
        if (virNetDevOpenvswitchAddPort(brname, *ifname, macaddr, vmuuid,
A
Ansis Atteka 已提交
330 331 332 333 334 335 336
                                        virtPortProfile) < 0) {
            goto error;
        }
    } else {
        if (virNetDevBridgeAddPort(brname, *ifname) < 0)
            goto error;
    }
337

338
    if (virNetDevSetOnline(*ifname, !!(flags & VIR_NETDEV_TAP_CREATE_IFUP)) < 0)
339 340 341 342 343 344 345 346 347
        goto error;

    return 0;

 error:
    VIR_FORCE_CLOSE(*tapfd);

    return errno;
}