tap.c 25.3 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25
/*
 * QEMU System Emulator
 *
 * Copyright (c) 2003-2008 Fabrice Bellard
 * Copyright (c) 2009 Red Hat, Inc.
 *
 * Permission is hereby granted, free of charge, to any person obtaining a copy
 * of this software and associated documentation files (the "Software"), to deal
 * in the Software without restriction, including without limitation the rights
 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 * copies of the Software, and to permit persons to whom the Software is
 * furnished to do so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice shall be included in
 * all copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
 * THE SOFTWARE.
 */

P
Paolo Bonzini 已提交
26
#include "tap_int.h"
27 28 29 30 31 32

#include "config-host.h"

#include <sys/ioctl.h>
#include <sys/stat.h>
#include <sys/wait.h>
A
Alexander Graf 已提交
33
#include <sys/socket.h>
34 35
#include <net/if.h>

P
Paolo Bonzini 已提交
36
#include "net/net.h"
37
#include "clients.h"
38
#include "monitor/monitor.h"
39
#include "sysemu/sysemu.h"
40
#include "qemu-common.h"
41
#include "qemu/error-report.h"
42

P
Paolo Bonzini 已提交
43
#include "net/tap.h"
44

P
Paolo Bonzini 已提交
45
#include "net/vhost_net.h"
46

47
typedef struct TAPState {
48
    NetClientState nc;
49 50 51
    int fd;
    char down_script[1024];
    char down_script_arg[128];
52
    uint8_t buf[NET_BUFSIZE];
53 54 55 56
    bool read_poll;
    bool write_poll;
    bool using_vnet_hdr;
    bool has_ufo;
57
    bool enabled;
58
    VHostNetState *vhost_net;
59
    unsigned host_vnet_hdr_len;
60 61
} TAPState;

62 63
static void launch_script(const char *setup_script, const char *ifname,
                          int fd, Error **errp);
64 65 66 67 68 69

static void tap_send(void *opaque);
static void tap_writable(void *opaque);

static void tap_update_fd_handler(TAPState *s)
{
70 71 72 73
    qemu_set_fd_handler(s->fd,
                        s->read_poll && s->enabled ? tap_send : NULL,
                        s->write_poll && s->enabled ? tap_writable : NULL,
                        s);
74 75
}

76
static void tap_read_poll(TAPState *s, bool enable)
77
{
78
    s->read_poll = enable;
79 80 81
    tap_update_fd_handler(s);
}

82
static void tap_write_poll(TAPState *s, bool enable)
83
{
84
    s->write_poll = enable;
85 86 87 88 89 90 91
    tap_update_fd_handler(s);
}

static void tap_writable(void *opaque)
{
    TAPState *s = opaque;

92
    tap_write_poll(s, false);
93

94
    qemu_flush_queued_packets(&s->nc);
95 96 97 98 99 100 101 102 103 104 105
}

static ssize_t tap_write_packet(TAPState *s, const struct iovec *iov, int iovcnt)
{
    ssize_t len;

    do {
        len = writev(s->fd, iov, iovcnt);
    } while (len == -1 && errno == EINTR);

    if (len == -1 && errno == EAGAIN) {
106
        tap_write_poll(s, true);
107 108 109 110 111 112
        return 0;
    }

    return len;
}

113
static ssize_t tap_receive_iov(NetClientState *nc, const struct iovec *iov,
114 115
                               int iovcnt)
{
116
    TAPState *s = DO_UPCAST(TAPState, nc, nc);
117 118
    const struct iovec *iovp = iov;
    struct iovec iov_copy[iovcnt + 1];
119
    struct virtio_net_hdr_mrg_rxbuf hdr = { };
120

121
    if (s->host_vnet_hdr_len && !s->using_vnet_hdr) {
122
        iov_copy[0].iov_base = &hdr;
123
        iov_copy[0].iov_len =  s->host_vnet_hdr_len;
124 125 126 127 128 129 130 131
        memcpy(&iov_copy[1], iov, iovcnt * sizeof(*iov));
        iovp = iov_copy;
        iovcnt++;
    }

    return tap_write_packet(s, iovp, iovcnt);
}

132
static ssize_t tap_receive_raw(NetClientState *nc, const uint8_t *buf, size_t size)
133
{
134
    TAPState *s = DO_UPCAST(TAPState, nc, nc);
135 136
    struct iovec iov[2];
    int iovcnt = 0;
137
    struct virtio_net_hdr_mrg_rxbuf hdr = { };
138

139
    if (s->host_vnet_hdr_len) {
140
        iov[iovcnt].iov_base = &hdr;
141
        iov[iovcnt].iov_len  = s->host_vnet_hdr_len;
142 143 144 145 146 147 148 149 150 151
        iovcnt++;
    }

    iov[iovcnt].iov_base = (char *)buf;
    iov[iovcnt].iov_len  = size;
    iovcnt++;

    return tap_write_packet(s, iov, iovcnt);
}

152
static ssize_t tap_receive(NetClientState *nc, const uint8_t *buf, size_t size)
153
{
154
    TAPState *s = DO_UPCAST(TAPState, nc, nc);
155 156
    struct iovec iov[1];

157
    if (s->host_vnet_hdr_len && !s->using_vnet_hdr) {
158
        return tap_receive_raw(nc, buf, size);
159 160 161 162 163 164 165 166
    }

    iov[0].iov_base = (char *)buf;
    iov[0].iov_len  = size;

    return tap_write_packet(s, iov, 1);
}

167 168
#ifndef __sun__
ssize_t tap_read_packet(int tapfd, uint8_t *buf, int maxlen)
169 170 171 172 173
{
    return read(tapfd, buf, maxlen);
}
#endif

174
static void tap_send_completed(NetClientState *nc, ssize_t len)
175
{
176
    TAPState *s = DO_UPCAST(TAPState, nc, nc);
177
    tap_read_poll(s, true);
178 179 180 181 182 183
}

static void tap_send(void *opaque)
{
    TAPState *s = opaque;
    int size;
184
    int packets = 0;
185

F
Fam Zheng 已提交
186
    while (true) {
M
Mark McLoughlin 已提交
187 188 189 190 191 192 193
        uint8_t *buf = s->buf;

        size = tap_read_packet(s->fd, s->buf, sizeof(s->buf));
        if (size <= 0) {
            break;
        }

194 195 196
        if (s->host_vnet_hdr_len && !s->using_vnet_hdr) {
            buf  += s->host_vnet_hdr_len;
            size -= s->host_vnet_hdr_len;
M
Mark McLoughlin 已提交
197 198
        }

199
        size = qemu_send_packet_async(&s->nc, buf, size, tap_send_completed);
M
Mark McLoughlin 已提交
200
        if (size == 0) {
201
            tap_read_poll(s, false);
S
Stefan Hajnoczi 已提交
202 203 204
            break;
        } else if (size < 0) {
            break;
M
Mark McLoughlin 已提交
205
        }
206 207 208 209 210 211 212 213 214 215 216

        /*
         * When the host keeps receiving more packets while tap_send() is
         * running we can hog the QEMU global mutex.  Limit the number of
         * packets that are processed per tap_send() callback to prevent
         * stalling the guest.
         */
        packets++;
        if (packets >= 50) {
            break;
        }
S
Stefan Hajnoczi 已提交
217
    }
218 219
}

220
static bool tap_has_ufo(NetClientState *nc)
221
{
222
    TAPState *s = DO_UPCAST(TAPState, nc, nc);
223

224
    assert(nc->info->type == NET_CLIENT_OPTIONS_KIND_TAP);
225 226 227 228

    return s->has_ufo;
}

229
static bool tap_has_vnet_hdr(NetClientState *nc)
230
{
231
    TAPState *s = DO_UPCAST(TAPState, nc, nc);
232

233
    assert(nc->info->type == NET_CLIENT_OPTIONS_KIND_TAP);
234

235
    return !!s->host_vnet_hdr_len;
236 237
}

238
static bool tap_has_vnet_hdr_len(NetClientState *nc, int len)
239 240 241
{
    TAPState *s = DO_UPCAST(TAPState, nc, nc);

242
    assert(nc->info->type == NET_CLIENT_OPTIONS_KIND_TAP);
243

244
    return !!tap_probe_vnet_hdr_len(s->fd, len);
245 246
}

247
static void tap_set_vnet_hdr_len(NetClientState *nc, int len)
248 249 250
{
    TAPState *s = DO_UPCAST(TAPState, nc, nc);

251
    assert(nc->info->type == NET_CLIENT_OPTIONS_KIND_TAP);
252 253 254 255 256 257 258
    assert(len == sizeof(struct virtio_net_hdr_mrg_rxbuf) ||
           len == sizeof(struct virtio_net_hdr));

    tap_fd_set_vnet_hdr_len(s->fd, len);
    s->host_vnet_hdr_len = len;
}

259
static void tap_using_vnet_hdr(NetClientState *nc, bool using_vnet_hdr)
260
{
261
    TAPState *s = DO_UPCAST(TAPState, nc, nc);
262

263
    assert(nc->info->type == NET_CLIENT_OPTIONS_KIND_TAP);
264
    assert(!!s->host_vnet_hdr_len == using_vnet_hdr);
265 266 267 268

    s->using_vnet_hdr = using_vnet_hdr;
}

G
Greg Kurz 已提交
269 270 271 272 273 274 275 276 277 278 279 280 281 282
static int tap_set_vnet_le(NetClientState *nc, bool is_le)
{
    TAPState *s = DO_UPCAST(TAPState, nc, nc);

    return tap_fd_set_vnet_le(s->fd, is_le);
}

static int tap_set_vnet_be(NetClientState *nc, bool is_be)
{
    TAPState *s = DO_UPCAST(TAPState, nc, nc);

    return tap_fd_set_vnet_be(s->fd, is_be);
}

283
static void tap_set_offload(NetClientState *nc, int csum, int tso4,
284 285
                     int tso6, int ecn, int ufo)
{
286
    TAPState *s = DO_UPCAST(TAPState, nc, nc);
287 288 289
    if (s->fd < 0) {
        return;
    }
290

291
    tap_fd_set_offload(s->fd, csum, tso4, tso6, ecn, ufo);
292 293
}

294
static void tap_cleanup(NetClientState *nc)
295
{
296
    TAPState *s = DO_UPCAST(TAPState, nc, nc);
297
    Error *err = NULL;
298

299 300
    if (s->vhost_net) {
        vhost_net_cleanup(s->vhost_net);
301
        s->vhost_net = NULL;
302 303
    }

304
    qemu_purge_queued_packets(nc);
305

306 307 308 309 310 311
    if (s->down_script[0]) {
        launch_script(s->down_script, s->down_script_arg, s->fd, &err);
        if (err) {
            error_report_err(err);
        }
    }
312

313 314
    tap_read_poll(s, false);
    tap_write_poll(s, false);
315
    close(s->fd);
316
    s->fd = -1;
317 318
}

319
static void tap_poll(NetClientState *nc, bool enable)
320 321 322 323 324 325
{
    TAPState *s = DO_UPCAST(TAPState, nc, nc);
    tap_read_poll(s, enable);
    tap_write_poll(s, enable);
}

326
int tap_get_fd(NetClientState *nc)
327 328
{
    TAPState *s = DO_UPCAST(TAPState, nc, nc);
329
    assert(nc->info->type == NET_CLIENT_OPTIONS_KIND_TAP);
330 331 332
    return s->fd;
}

333 334
/* fd support */

335
static NetClientInfo net_tap_info = {
336
    .type = NET_CLIENT_OPTIONS_KIND_TAP,
337 338 339 340
    .size = sizeof(TAPState),
    .receive = tap_receive,
    .receive_raw = tap_receive_raw,
    .receive_iov = tap_receive_iov,
341
    .poll = tap_poll,
342
    .cleanup = tap_cleanup,
343 344 345 346 347 348
    .has_ufo = tap_has_ufo,
    .has_vnet_hdr = tap_has_vnet_hdr,
    .has_vnet_hdr_len = tap_has_vnet_hdr_len,
    .using_vnet_hdr = tap_using_vnet_hdr,
    .set_offload = tap_set_offload,
    .set_vnet_hdr_len = tap_set_vnet_hdr_len,
G
Greg Kurz 已提交
349 350
    .set_vnet_le = tap_set_vnet_le,
    .set_vnet_be = tap_set_vnet_be,
351 352
};

353
static TAPState *net_tap_fd_init(NetClientState *peer,
354 355 356 357 358
                                 const char *model,
                                 const char *name,
                                 int fd,
                                 int vnet_hdr)
{
359
    NetClientState *nc;
360 361
    TAPState *s;

362
    nc = qemu_new_net_client(&net_tap_info, peer, model, name);
363 364 365

    s = DO_UPCAST(TAPState, nc, nc);

366
    s->fd = fd;
367
    s->host_vnet_hdr_len = vnet_hdr ? sizeof(struct virtio_net_hdr) : 0;
368
    s->using_vnet_hdr = false;
369
    s->has_ufo = tap_probe_has_ufo(s->fd);
370
    s->enabled = true;
371
    tap_set_offload(&s->nc, 0, 0, 0, 0, 0);
372 373 374 375 376 377 378
    /*
     * Make sure host header length is set correctly in tap:
     * it might have been modified by another instance of qemu.
     */
    if (tap_probe_vnet_hdr_len(s->fd, s->host_vnet_hdr_len)) {
        tap_fd_set_vnet_hdr_len(s->fd, s->host_vnet_hdr_len);
    }
379
    tap_read_poll(s, true);
380
    s->vhost_net = NULL;
381 382 383
    return s;
}

384 385
static void launch_script(const char *setup_script, const char *ifname,
                          int fd, Error **errp)
386 387 388 389 390 391 392
{
    int pid, status;
    char *args[3];
    char **parg;

    /* try to launch network script */
    pid = fork();
393 394 395 396 397
    if (pid < 0) {
        error_setg_errno(errp, errno, "could not launch network script %s",
                         setup_script);
        return;
    }
398 399 400
    if (pid == 0) {
        int open_max = sysconf(_SC_OPEN_MAX), i;

401 402
        for (i = 3; i < open_max; i++) {
            if (i != fd) {
403 404 405 406 407 408
                close(i);
            }
        }
        parg = args;
        *parg++ = (char *)setup_script;
        *parg++ = (char *)ifname;
409
        *parg = NULL;
410 411
        execv(setup_script, args);
        _exit(1);
412
    } else {
413 414 415 416 417
        while (waitpid(pid, &status, 0) != pid) {
            /* loop */
        }

        if (WIFEXITED(status) && WEXITSTATUS(status) == 0) {
418
            return;
419
        }
420 421
        error_setg(errp, "network script %s failed with status %d",
                   setup_script, status);
422 423 424
    }
}

C
Corey Bryant 已提交
425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458
static int recv_fd(int c)
{
    int fd;
    uint8_t msgbuf[CMSG_SPACE(sizeof(fd))];
    struct msghdr msg = {
        .msg_control = msgbuf,
        .msg_controllen = sizeof(msgbuf),
    };
    struct cmsghdr *cmsg;
    struct iovec iov;
    uint8_t req[1];
    ssize_t len;

    cmsg = CMSG_FIRSTHDR(&msg);
    cmsg->cmsg_level = SOL_SOCKET;
    cmsg->cmsg_type = SCM_RIGHTS;
    cmsg->cmsg_len = CMSG_LEN(sizeof(fd));
    msg.msg_controllen = cmsg->cmsg_len;

    iov.iov_base = req;
    iov.iov_len = sizeof(req);

    msg.msg_iov = &iov;
    msg.msg_iovlen = 1;

    len = recvmsg(c, &msg, 0);
    if (len > 0) {
        memcpy(&fd, CMSG_DATA(cmsg), sizeof(fd));
        return fd;
    }

    return len;
}

459 460
static int net_bridge_run_helper(const char *helper, const char *bridge,
                                 Error **errp)
C
Corey Bryant 已提交
461 462 463 464 465 466 467 468 469 470 471 472
{
    sigset_t oldmask, mask;
    int pid, status;
    char *args[5];
    char **parg;
    int sv[2];

    sigemptyset(&mask);
    sigaddset(&mask, SIGCHLD);
    sigprocmask(SIG_BLOCK, &mask, &oldmask);

    if (socketpair(PF_UNIX, SOCK_STREAM, 0, sv) == -1) {
473
        error_setg_errno(errp, errno, "socketpair() failed");
C
Corey Bryant 已提交
474 475 476 477 478
        return -1;
    }

    /* try to launch bridge helper */
    pid = fork();
479 480 481 482
    if (pid < 0) {
        error_setg_errno(errp, errno, "Can't fork bridge helper");
        return -1;
    }
C
Corey Bryant 已提交
483 484 485 486 487 488
    if (pid == 0) {
        int open_max = sysconf(_SC_OPEN_MAX), i;
        char fd_buf[6+10];
        char br_buf[6+IFNAMSIZ] = {0};
        char helper_cmd[PATH_MAX + sizeof(fd_buf) + sizeof(br_buf) + 15];

489 490
        for (i = 3; i < open_max; i++) {
            if (i != sv[1]) {
C
Corey Bryant 已提交
491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529
                close(i);
            }
        }

        snprintf(fd_buf, sizeof(fd_buf), "%s%d", "--fd=", sv[1]);

        if (strrchr(helper, ' ') || strrchr(helper, '\t')) {
            /* assume helper is a command */

            if (strstr(helper, "--br=") == NULL) {
                snprintf(br_buf, sizeof(br_buf), "%s%s", "--br=", bridge);
            }

            snprintf(helper_cmd, sizeof(helper_cmd), "%s %s %s %s",
                     helper, "--use-vnet", fd_buf, br_buf);

            parg = args;
            *parg++ = (char *)"sh";
            *parg++ = (char *)"-c";
            *parg++ = helper_cmd;
            *parg++ = NULL;

            execv("/bin/sh", args);
        } else {
            /* assume helper is just the executable path name */

            snprintf(br_buf, sizeof(br_buf), "%s%s", "--br=", bridge);

            parg = args;
            *parg++ = (char *)helper;
            *parg++ = (char *)"--use-vnet";
            *parg++ = fd_buf;
            *parg++ = br_buf;
            *parg++ = NULL;

            execv(helper, args);
        }
        _exit(1);

530
    } else {
C
Corey Bryant 已提交
531
        int fd;
532
        int saved_errno;
C
Corey Bryant 已提交
533 534 535 536 537 538

        close(sv[1]);

        do {
            fd = recv_fd(sv[0]);
        } while (fd == -1 && errno == EINTR);
539
        saved_errno = errno;
C
Corey Bryant 已提交
540 541 542 543 544 545 546 547

        close(sv[0]);

        while (waitpid(pid, &status, 0) != pid) {
            /* loop */
        }
        sigprocmask(SIG_SETMASK, &oldmask, NULL);
        if (fd < 0) {
548 549
            error_setg_errno(errp, saved_errno,
                             "failed to recv file descriptor");
C
Corey Bryant 已提交
550 551
            return -1;
        }
552 553 554
        if (!WIFEXITED(status) || WEXITSTATUS(status) != 0) {
            error_setg(errp, "bridge helper failed");
            return -1;
C
Corey Bryant 已提交
555
        }
556
        return fd;
C
Corey Bryant 已提交
557 558 559
    }
}

560
int net_init_bridge(const NetClientOptions *opts, const char *name,
561
                    NetClientState *peer, Error **errp)
C
Corey Bryant 已提交
562
{
563 564
    const NetdevBridgeOptions *bridge;
    const char *helper, *br;
C
Corey Bryant 已提交
565 566 567
    TAPState *s;
    int fd, vnet_hdr;

E
Eric Blake 已提交
568 569
    assert(opts->type == NET_CLIENT_OPTIONS_KIND_BRIDGE);
    bridge = opts->u.bridge;
570 571 572

    helper = bridge->has_helper ? bridge->helper : DEFAULT_BRIDGE_HELPER;
    br     = bridge->has_br     ? bridge->br     : DEFAULT_BRIDGE_INTERFACE;
C
Corey Bryant 已提交
573

574
    fd = net_bridge_run_helper(helper, br, errp);
C
Corey Bryant 已提交
575 576 577 578 579 580
    if (fd == -1) {
        return -1;
    }

    fcntl(fd, F_SETFL, O_NONBLOCK);
    vnet_hdr = tap_probe_vnet_hdr(fd);
581
    s = net_tap_fd_init(peer, "bridge", name, fd, vnet_hdr);
C
Corey Bryant 已提交
582

583 584
    snprintf(s->nc.info_str, sizeof(s->nc.info_str), "helper=%s,br=%s", helper,
             br);
C
Corey Bryant 已提交
585 586 587 588

    return 0;
}

589 590
static int net_tap_init(const NetdevTapOptions *tap, int *vnet_hdr,
                        const char *setup_script, char *ifname,
591
                        size_t ifname_sz, int mq_required, Error **errp)
592
{
593
    Error *err = NULL;
594 595
    int fd, vnet_hdr_required;

596 597
    if (tap->has_vnet_hdr) {
        *vnet_hdr = tap->vnet_hdr;
598 599
        vnet_hdr_required = *vnet_hdr;
    } else {
600
        *vnet_hdr = 1;
601 602 603
        vnet_hdr_required = 0;
    }

J
Jason Wang 已提交
604
    TFR(fd = tap_open(ifname, ifname_sz, vnet_hdr, vnet_hdr_required,
605
                      mq_required, errp));
606 607 608 609 610 611
    if (fd < 0) {
        return -1;
    }

    if (setup_script &&
        setup_script[0] != '\0' &&
612 613 614
        strcmp(setup_script, "no") != 0) {
        launch_script(setup_script, ifname, fd, &err);
        if (err) {
615
            error_propagate(errp, err);
616 617 618
            close(fd);
            return -1;
        }
619 620 621 622 623
    }

    return fd;
}

J
Jason Wang 已提交
624 625
#define MAX_TAP_QUEUES 1024

626 627 628 629 630
static void net_init_tap_one(const NetdevTapOptions *tap, NetClientState *peer,
                             const char *model, const char *name,
                             const char *ifname, const char *script,
                             const char *downscript, const char *vhostfdname,
                             int vnet_hdr, int fd, Error **errp)
631
{
632
    Error *err = NULL;
633
    TAPState *s = net_tap_fd_init(peer, model, name, fd, vnet_hdr);
634
    int vhostfd;
635

636 637
    tap_set_sndbuf(s->fd, tap, &err);
    if (err) {
638 639
        error_propagate(errp, err);
        return;
640 641
    }

J
Jason Wang 已提交
642
    if (tap->has_fd || tap->has_fds) {
643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660
        snprintf(s->nc.info_str, sizeof(s->nc.info_str), "fd=%d", fd);
    } else if (tap->has_helper) {
        snprintf(s->nc.info_str, sizeof(s->nc.info_str), "helper=%s",
                 tap->helper);
    } else {
        snprintf(s->nc.info_str, sizeof(s->nc.info_str),
                 "ifname=%s,script=%s,downscript=%s", ifname, script,
                 downscript);

        if (strcmp(downscript, "no") != 0) {
            snprintf(s->down_script, sizeof(s->down_script), "%s", downscript);
            snprintf(s->down_script_arg, sizeof(s->down_script_arg),
                     "%s", ifname);
        }
    }

    if (tap->has_vhost ? tap->vhost :
        vhostfdname || (tap->has_vhostforce && tap->vhostforce)) {
661 662
        VhostNetOptions options;

663
        options.backend_type = VHOST_BACKEND_TYPE_KERNEL;
664
        options.net_backend = &s->nc;
665

J
Jason Wang 已提交
666
        if (tap->has_vhostfd || tap->has_vhostfds) {
667
            vhostfd = monitor_fd_param(cur_mon, vhostfdname, &err);
668
            if (vhostfd == -1) {
669 670
                error_propagate(errp, err);
                return;
671 672
            }
        } else {
673 674
            vhostfd = open("/dev/vhost-net", O_RDWR);
            if (vhostfd < 0) {
675 676 677
                error_setg_errno(errp, errno,
                                 "tap: open vhost char device failed");
                return;
678
            }
679
        }
680
        options.opaque = (void *)(uintptr_t)vhostfd;
681

682
        s->vhost_net = vhost_net_init(&options);
683
        if (!s->vhost_net) {
684 685 686
            error_setg(errp,
                       "vhost-net requested but could not be initialized");
            return;
687
        }
J
Jason Wang 已提交
688
    } else if (tap->has_vhostfd || tap->has_vhostfds) {
689
        error_setg(errp, "vhostfd= is not valid without vhost");
690 691 692
    }
}

J
Jason Wang 已提交
693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718
static int get_fds(char *str, char *fds[], int max)
{
    char *ptr = str, *this;
    size_t len = strlen(str);
    int i = 0;

    while (i < max && ptr < str + len) {
        this = strchr(ptr, ':');

        if (this == NULL) {
            fds[i] = g_strdup(ptr);
        } else {
            fds[i] = g_strndup(ptr, this - ptr);
        }

        i++;
        if (this == NULL) {
            break;
        } else {
            ptr = this + 1;
        }
    }

    return i;
}

719
int net_init_tap(const NetClientOptions *opts, const char *name,
720
                 NetClientState *peer, Error **errp)
721
{
722
    const NetdevTapOptions *tap;
J
Jason Wang 已提交
723
    int fd, vnet_hdr = 0, i = 0, queues;
724 725
    /* for the no-fd, no-helper case */
    const char *script = NULL; /* suppress wrong "uninit'd use" gcc warning */
726
    const char *downscript = NULL;
727
    Error *err = NULL;
J
Jason Wang 已提交
728
    const char *vhostfdname;
729 730
    char ifname[128];

E
Eric Blake 已提交
731 732
    assert(opts->type == NET_CLIENT_OPTIONS_KIND_TAP);
    tap = opts->u.tap;
J
Jason Wang 已提交
733 734
    queues = tap->has_queues ? tap->queues : 1;
    vhostfdname = tap->has_vhostfd ? tap->vhostfd : NULL;
735

736 737 738
    /* QEMU vlans does not support multiqueue tap, in this case peer is set.
     * For -netdev, peer is always NULL. */
    if (peer && (tap->has_queues || tap->has_fds || tap->has_vhostfds)) {
739
        error_setg(errp, "Multiqueue tap cannot be used with QEMU vlans");
740 741 742
        return -1;
    }

743 744
    if (tap->has_fd) {
        if (tap->has_ifname || tap->has_script || tap->has_downscript ||
J
Jason Wang 已提交
745
            tap->has_vnet_hdr || tap->has_helper || tap->has_queues ||
746
            tap->has_fds || tap->has_vhostfds) {
747 748 749
            error_setg(errp, "ifname=, script=, downscript=, vnet_hdr=, "
                       "helper=, queues=, fds=, and vhostfds= "
                       "are invalid with fd=");
750 751 752
            return -1;
        }

753
        fd = monitor_fd_param(cur_mon, tap->fd, &err);
754
        if (fd == -1) {
755
            error_propagate(errp, err);
756 757 758 759 760 761
            return -1;
        }

        fcntl(fd, F_SETFL, O_NONBLOCK);

        vnet_hdr = tap_probe_vnet_hdr(fd);
C
Corey Bryant 已提交
762

763 764 765 766
        net_init_tap_one(tap, peer, "tap", name, NULL,
                         script, downscript,
                         vhostfdname, vnet_hdr, fd, &err);
        if (err) {
767
            error_propagate(errp, err);
J
Jason Wang 已提交
768 769 770 771 772 773 774 775 776
            return -1;
        }
    } else if (tap->has_fds) {
        char *fds[MAX_TAP_QUEUES];
        char *vhost_fds[MAX_TAP_QUEUES];
        int nfds, nvhosts;

        if (tap->has_ifname || tap->has_script || tap->has_downscript ||
            tap->has_vnet_hdr || tap->has_helper || tap->has_queues ||
777
            tap->has_vhostfd) {
778 779 780
            error_setg(errp, "ifname=, script=, downscript=, vnet_hdr=, "
                       "helper=, queues=, and vhostfd= "
                       "are invalid with fds=");
J
Jason Wang 已提交
781 782 783 784 785 786 787
            return -1;
        }

        nfds = get_fds(tap->fds, fds, MAX_TAP_QUEUES);
        if (tap->has_vhostfds) {
            nvhosts = get_fds(tap->vhostfds, vhost_fds, MAX_TAP_QUEUES);
            if (nfds != nvhosts) {
788 789
                error_setg(errp, "The number of fds passed does not match "
                           "the number of vhostfds passed");
J
Jason Wang 已提交
790 791 792 793 794
                return -1;
            }
        }

        for (i = 0; i < nfds; i++) {
795
            fd = monitor_fd_param(cur_mon, fds[i], &err);
J
Jason Wang 已提交
796
            if (fd == -1) {
797
                error_propagate(errp, err);
J
Jason Wang 已提交
798 799 800 801
                return -1;
            }

            fcntl(fd, F_SETFL, O_NONBLOCK);
C
Corey Bryant 已提交
802

J
Jason Wang 已提交
803 804 805
            if (i == 0) {
                vnet_hdr = tap_probe_vnet_hdr(fd);
            } else if (vnet_hdr != tap_probe_vnet_hdr(fd)) {
806 807
                error_setg(errp,
                           "vnet_hdr not consistent across given tap fds");
J
Jason Wang 已提交
808 809 810
                return -1;
            }

811 812 813 814 815
            net_init_tap_one(tap, peer, "tap", name, ifname,
                             script, downscript,
                             tap->has_vhostfds ? vhost_fds[i] : NULL,
                             vnet_hdr, fd, &err);
            if (err) {
816
                error_propagate(errp, err);
J
Jason Wang 已提交
817 818 819
                return -1;
            }
        }
820 821
    } else if (tap->has_helper) {
        if (tap->has_ifname || tap->has_script || tap->has_downscript ||
822
            tap->has_vnet_hdr || tap->has_queues || tap->has_vhostfds) {
823 824
            error_setg(errp, "ifname=, script=, downscript=, vnet_hdr=, "
                       "queues=, and vhostfds= are invalid with helper=");
C
Corey Bryant 已提交
825 826 827
            return -1;
        }

828 829
        fd = net_bridge_run_helper(tap->helper, DEFAULT_BRIDGE_INTERFACE,
                                   errp);
C
Corey Bryant 已提交
830 831 832 833 834 835 836
        if (fd == -1) {
            return -1;
        }

        fcntl(fd, F_SETFL, O_NONBLOCK);
        vnet_hdr = tap_probe_vnet_hdr(fd);

837 838 839 840
        net_init_tap_one(tap, peer, "bridge", name, ifname,
                         script, downscript, vhostfdname,
                         vnet_hdr, fd, &err);
        if (err) {
841
            error_propagate(errp, err);
842
            close(fd);
J
Jason Wang 已提交
843 844
            return -1;
        }
845
    } else {
846
        if (tap->has_vhostfds) {
847
            error_setg(errp, "vhostfds= is invalid if fds= wasn't specified");
848 849
            return -1;
        }
850
        script = tap->has_script ? tap->script : DEFAULT_NETWORK_SCRIPT;
851 852
        downscript = tap->has_downscript ? tap->downscript :
            DEFAULT_NETWORK_DOWN_SCRIPT;
J
Jason Wang 已提交
853 854 855 856 857

        if (tap->has_ifname) {
            pstrcpy(ifname, sizeof ifname, tap->ifname);
        } else {
            ifname[0] = '\0';
858
        }
C
Corey Bryant 已提交
859

J
Jason Wang 已提交
860 861
        for (i = 0; i < queues; i++) {
            fd = net_tap_init(tap, &vnet_hdr, i >= 1 ? "no" : script,
862
                              ifname, sizeof ifname, queues > 1, errp);
J
Jason Wang 已提交
863 864 865 866 867 868
            if (fd == -1) {
                return -1;
            }

            if (queues > 1 && i == 0 && !tap->has_ifname) {
                if (tap_fd_get_ifname(fd, ifname)) {
869
                    error_setg(errp, "Fail to get ifname");
870
                    close(fd);
J
Jason Wang 已提交
871 872 873 874
                    return -1;
                }
            }

875 876 877 878 879
            net_init_tap_one(tap, peer, "tap", name, ifname,
                             i >= 1 ? "no" : script,
                             i >= 1 ? "no" : downscript,
                             vhostfdname, vnet_hdr, fd, &err);
            if (err) {
880
                error_propagate(errp, err);
881
                close(fd);
J
Jason Wang 已提交
882 883 884
                return -1;
            }
        }
885 886
    }

J
Jason Wang 已提交
887
    return 0;
888
}
889

890
VHostNetState *tap_get_vhost_net(NetClientState *nc)
891 892
{
    TAPState *s = DO_UPCAST(TAPState, nc, nc);
893
    assert(nc->info->type == NET_CLIENT_OPTIONS_KIND_TAP);
894 895
    return s->vhost_net;
}
896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930

int tap_enable(NetClientState *nc)
{
    TAPState *s = DO_UPCAST(TAPState, nc, nc);
    int ret;

    if (s->enabled) {
        return 0;
    } else {
        ret = tap_fd_enable(s->fd);
        if (ret == 0) {
            s->enabled = true;
            tap_update_fd_handler(s);
        }
        return ret;
    }
}

int tap_disable(NetClientState *nc)
{
    TAPState *s = DO_UPCAST(TAPState, nc, nc);
    int ret;

    if (s->enabled == 0) {
        return 0;
    } else {
        ret = tap_fd_disable(s->fd);
        if (ret == 0) {
            qemu_purge_queued_packets(nc);
            s->enabled = false;
            tap_update_fd_handler(s);
        }
        return ret;
    }
}