tap.c 25.2 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25
/*
 * QEMU System Emulator
 *
 * Copyright (c) 2003-2008 Fabrice Bellard
 * Copyright (c) 2009 Red Hat, Inc.
 *
 * Permission is hereby granted, free of charge, to any person obtaining a copy
 * of this software and associated documentation files (the "Software"), to deal
 * in the Software without restriction, including without limitation the rights
 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 * copies of the Software, and to permit persons to whom the Software is
 * furnished to do so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice shall be included in
 * all copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
 * THE SOFTWARE.
 */

P
Paolo Bonzini 已提交
26
#include "tap_int.h"
27 28 29 30 31 32

#include "config-host.h"

#include <sys/ioctl.h>
#include <sys/stat.h>
#include <sys/wait.h>
A
Alexander Graf 已提交
33
#include <sys/socket.h>
34 35
#include <net/if.h>

P
Paolo Bonzini 已提交
36
#include "net/net.h"
37
#include "clients.h"
38
#include "monitor/monitor.h"
39
#include "sysemu/sysemu.h"
40
#include "qemu-common.h"
41
#include "qemu/error-report.h"
42

P
Paolo Bonzini 已提交
43
#include "net/tap.h"
44

P
Paolo Bonzini 已提交
45
#include "net/vhost_net.h"
46

47
typedef struct TAPState {
48
    NetClientState nc;
49 50 51
    int fd;
    char down_script[1024];
    char down_script_arg[128];
52
    uint8_t buf[NET_BUFSIZE];
53 54 55 56
    bool read_poll;
    bool write_poll;
    bool using_vnet_hdr;
    bool has_ufo;
57
    bool enabled;
58
    VHostNetState *vhost_net;
59
    unsigned host_vnet_hdr_len;
60 61
} TAPState;

62 63
static void launch_script(const char *setup_script, const char *ifname,
                          int fd, Error **errp);
64 65 66 67 68 69 70 71

static int tap_can_send(void *opaque);
static void tap_send(void *opaque);
static void tap_writable(void *opaque);

static void tap_update_fd_handler(TAPState *s)
{
    qemu_set_fd_handler2(s->fd,
72 73 74
                         s->read_poll && s->enabled ? tap_can_send : NULL,
                         s->read_poll && s->enabled ? tap_send     : NULL,
                         s->write_poll && s->enabled ? tap_writable : NULL,
75 76 77
                         s);
}

78
static void tap_read_poll(TAPState *s, bool enable)
79
{
80
    s->read_poll = enable;
81 82 83
    tap_update_fd_handler(s);
}

84
static void tap_write_poll(TAPState *s, bool enable)
85
{
86
    s->write_poll = enable;
87 88 89 90 91 92 93
    tap_update_fd_handler(s);
}

static void tap_writable(void *opaque)
{
    TAPState *s = opaque;

94
    tap_write_poll(s, false);
95

96
    qemu_flush_queued_packets(&s->nc);
97 98 99 100 101 102 103 104 105 106 107
}

static ssize_t tap_write_packet(TAPState *s, const struct iovec *iov, int iovcnt)
{
    ssize_t len;

    do {
        len = writev(s->fd, iov, iovcnt);
    } while (len == -1 && errno == EINTR);

    if (len == -1 && errno == EAGAIN) {
108
        tap_write_poll(s, true);
109 110 111 112 113 114
        return 0;
    }

    return len;
}

115
static ssize_t tap_receive_iov(NetClientState *nc, const struct iovec *iov,
116 117
                               int iovcnt)
{
118
    TAPState *s = DO_UPCAST(TAPState, nc, nc);
119 120
    const struct iovec *iovp = iov;
    struct iovec iov_copy[iovcnt + 1];
121
    struct virtio_net_hdr_mrg_rxbuf hdr = { };
122

123
    if (s->host_vnet_hdr_len && !s->using_vnet_hdr) {
124
        iov_copy[0].iov_base = &hdr;
125
        iov_copy[0].iov_len =  s->host_vnet_hdr_len;
126 127 128 129 130 131 132 133
        memcpy(&iov_copy[1], iov, iovcnt * sizeof(*iov));
        iovp = iov_copy;
        iovcnt++;
    }

    return tap_write_packet(s, iovp, iovcnt);
}

134
static ssize_t tap_receive_raw(NetClientState *nc, const uint8_t *buf, size_t size)
135
{
136
    TAPState *s = DO_UPCAST(TAPState, nc, nc);
137 138
    struct iovec iov[2];
    int iovcnt = 0;
139
    struct virtio_net_hdr_mrg_rxbuf hdr = { };
140

141
    if (s->host_vnet_hdr_len) {
142
        iov[iovcnt].iov_base = &hdr;
143
        iov[iovcnt].iov_len  = s->host_vnet_hdr_len;
144 145 146 147 148 149 150 151 152 153
        iovcnt++;
    }

    iov[iovcnt].iov_base = (char *)buf;
    iov[iovcnt].iov_len  = size;
    iovcnt++;

    return tap_write_packet(s, iov, iovcnt);
}

154
static ssize_t tap_receive(NetClientState *nc, const uint8_t *buf, size_t size)
155
{
156
    TAPState *s = DO_UPCAST(TAPState, nc, nc);
157 158
    struct iovec iov[1];

159
    if (s->host_vnet_hdr_len && !s->using_vnet_hdr) {
160
        return tap_receive_raw(nc, buf, size);
161 162 163 164 165 166 167 168 169 170 171 172
    }

    iov[0].iov_base = (char *)buf;
    iov[0].iov_len  = size;

    return tap_write_packet(s, iov, 1);
}

static int tap_can_send(void *opaque)
{
    TAPState *s = opaque;

173
    return qemu_can_send_packet(&s->nc);
174 175
}

176 177
#ifndef __sun__
ssize_t tap_read_packet(int tapfd, uint8_t *buf, int maxlen)
178 179 180 181 182
{
    return read(tapfd, buf, maxlen);
}
#endif

183
static void tap_send_completed(NetClientState *nc, ssize_t len)
184
{
185
    TAPState *s = DO_UPCAST(TAPState, nc, nc);
186
    tap_read_poll(s, true);
187 188 189 190 191 192
}

static void tap_send(void *opaque)
{
    TAPState *s = opaque;
    int size;
193
    int packets = 0;
194

S
Stefan Hajnoczi 已提交
195
    while (qemu_can_send_packet(&s->nc)) {
M
Mark McLoughlin 已提交
196 197 198 199 200 201 202
        uint8_t *buf = s->buf;

        size = tap_read_packet(s->fd, s->buf, sizeof(s->buf));
        if (size <= 0) {
            break;
        }

203 204 205
        if (s->host_vnet_hdr_len && !s->using_vnet_hdr) {
            buf  += s->host_vnet_hdr_len;
            size -= s->host_vnet_hdr_len;
M
Mark McLoughlin 已提交
206 207
        }

208
        size = qemu_send_packet_async(&s->nc, buf, size, tap_send_completed);
M
Mark McLoughlin 已提交
209
        if (size == 0) {
210
            tap_read_poll(s, false);
S
Stefan Hajnoczi 已提交
211 212 213
            break;
        } else if (size < 0) {
            break;
M
Mark McLoughlin 已提交
214
        }
215 216 217 218 219 220 221 222 223 224 225

        /*
         * When the host keeps receiving more packets while tap_send() is
         * running we can hog the QEMU global mutex.  Limit the number of
         * packets that are processed per tap_send() callback to prevent
         * stalling the guest.
         */
        packets++;
        if (packets >= 50) {
            break;
        }
S
Stefan Hajnoczi 已提交
226
    }
227 228
}

229
static bool tap_has_ufo(NetClientState *nc)
230
{
231
    TAPState *s = DO_UPCAST(TAPState, nc, nc);
232

233
    assert(nc->info->type == NET_CLIENT_OPTIONS_KIND_TAP);
234 235 236 237

    return s->has_ufo;
}

238
static bool tap_has_vnet_hdr(NetClientState *nc)
239
{
240
    TAPState *s = DO_UPCAST(TAPState, nc, nc);
241

242
    assert(nc->info->type == NET_CLIENT_OPTIONS_KIND_TAP);
243

244
    return !!s->host_vnet_hdr_len;
245 246
}

247
static bool tap_has_vnet_hdr_len(NetClientState *nc, int len)
248 249 250
{
    TAPState *s = DO_UPCAST(TAPState, nc, nc);

251
    assert(nc->info->type == NET_CLIENT_OPTIONS_KIND_TAP);
252

253
    return !!tap_probe_vnet_hdr_len(s->fd, len);
254 255
}

256
static void tap_set_vnet_hdr_len(NetClientState *nc, int len)
257 258 259
{
    TAPState *s = DO_UPCAST(TAPState, nc, nc);

260
    assert(nc->info->type == NET_CLIENT_OPTIONS_KIND_TAP);
261 262 263 264 265 266 267
    assert(len == sizeof(struct virtio_net_hdr_mrg_rxbuf) ||
           len == sizeof(struct virtio_net_hdr));

    tap_fd_set_vnet_hdr_len(s->fd, len);
    s->host_vnet_hdr_len = len;
}

268
static void tap_using_vnet_hdr(NetClientState *nc, bool using_vnet_hdr)
269
{
270
    TAPState *s = DO_UPCAST(TAPState, nc, nc);
271

272
    assert(nc->info->type == NET_CLIENT_OPTIONS_KIND_TAP);
273
    assert(!!s->host_vnet_hdr_len == using_vnet_hdr);
274 275 276 277

    s->using_vnet_hdr = using_vnet_hdr;
}

278
static void tap_set_offload(NetClientState *nc, int csum, int tso4,
279 280
                     int tso6, int ecn, int ufo)
{
281
    TAPState *s = DO_UPCAST(TAPState, nc, nc);
282 283 284
    if (s->fd < 0) {
        return;
    }
285

286
    tap_fd_set_offload(s->fd, csum, tso4, tso6, ecn, ufo);
287 288
}

289
static void tap_cleanup(NetClientState *nc)
290
{
291
    TAPState *s = DO_UPCAST(TAPState, nc, nc);
292
    Error *err = NULL;
293

294 295
    if (s->vhost_net) {
        vhost_net_cleanup(s->vhost_net);
296
        s->vhost_net = NULL;
297 298
    }

299
    qemu_purge_queued_packets(nc);
300

301 302 303 304 305 306
    if (s->down_script[0]) {
        launch_script(s->down_script, s->down_script_arg, s->fd, &err);
        if (err) {
            error_report_err(err);
        }
    }
307

308 309
    tap_read_poll(s, false);
    tap_write_poll(s, false);
310
    close(s->fd);
311
    s->fd = -1;
312 313
}

314
static void tap_poll(NetClientState *nc, bool enable)
315 316 317 318 319 320
{
    TAPState *s = DO_UPCAST(TAPState, nc, nc);
    tap_read_poll(s, enable);
    tap_write_poll(s, enable);
}

321
int tap_get_fd(NetClientState *nc)
322 323
{
    TAPState *s = DO_UPCAST(TAPState, nc, nc);
324
    assert(nc->info->type == NET_CLIENT_OPTIONS_KIND_TAP);
325 326 327
    return s->fd;
}

328 329
/* fd support */

330
static NetClientInfo net_tap_info = {
331
    .type = NET_CLIENT_OPTIONS_KIND_TAP,
332 333 334 335
    .size = sizeof(TAPState),
    .receive = tap_receive,
    .receive_raw = tap_receive_raw,
    .receive_iov = tap_receive_iov,
336
    .poll = tap_poll,
337
    .cleanup = tap_cleanup,
338 339 340 341 342 343
    .has_ufo = tap_has_ufo,
    .has_vnet_hdr = tap_has_vnet_hdr,
    .has_vnet_hdr_len = tap_has_vnet_hdr_len,
    .using_vnet_hdr = tap_using_vnet_hdr,
    .set_offload = tap_set_offload,
    .set_vnet_hdr_len = tap_set_vnet_hdr_len,
344 345
};

346
static TAPState *net_tap_fd_init(NetClientState *peer,
347 348 349 350 351
                                 const char *model,
                                 const char *name,
                                 int fd,
                                 int vnet_hdr)
{
352
    NetClientState *nc;
353 354
    TAPState *s;

355
    nc = qemu_new_net_client(&net_tap_info, peer, model, name);
356 357 358

    s = DO_UPCAST(TAPState, nc, nc);

359
    s->fd = fd;
360
    s->host_vnet_hdr_len = vnet_hdr ? sizeof(struct virtio_net_hdr) : 0;
361
    s->using_vnet_hdr = false;
362
    s->has_ufo = tap_probe_has_ufo(s->fd);
363
    s->enabled = true;
364
    tap_set_offload(&s->nc, 0, 0, 0, 0, 0);
365 366 367 368 369 370 371
    /*
     * Make sure host header length is set correctly in tap:
     * it might have been modified by another instance of qemu.
     */
    if (tap_probe_vnet_hdr_len(s->fd, s->host_vnet_hdr_len)) {
        tap_fd_set_vnet_hdr_len(s->fd, s->host_vnet_hdr_len);
    }
372
    tap_read_poll(s, true);
373
    s->vhost_net = NULL;
374 375 376
    return s;
}

377 378
static void launch_script(const char *setup_script, const char *ifname,
                          int fd, Error **errp)
379 380 381 382 383 384 385
{
    int pid, status;
    char *args[3];
    char **parg;

    /* try to launch network script */
    pid = fork();
386 387 388 389 390
    if (pid < 0) {
        error_setg_errno(errp, errno, "could not launch network script %s",
                         setup_script);
        return;
    }
391 392 393
    if (pid == 0) {
        int open_max = sysconf(_SC_OPEN_MAX), i;

394 395
        for (i = 3; i < open_max; i++) {
            if (i != fd) {
396 397 398 399 400 401
                close(i);
            }
        }
        parg = args;
        *parg++ = (char *)setup_script;
        *parg++ = (char *)ifname;
402
        *parg = NULL;
403 404
        execv(setup_script, args);
        _exit(1);
405
    } else {
406 407 408 409 410
        while (waitpid(pid, &status, 0) != pid) {
            /* loop */
        }

        if (WIFEXITED(status) && WEXITSTATUS(status) == 0) {
411
            return;
412
        }
413 414
        error_setg(errp, "network script %s failed with status %d",
                   setup_script, status);
415 416 417
    }
}

C
Corey Bryant 已提交
418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451
static int recv_fd(int c)
{
    int fd;
    uint8_t msgbuf[CMSG_SPACE(sizeof(fd))];
    struct msghdr msg = {
        .msg_control = msgbuf,
        .msg_controllen = sizeof(msgbuf),
    };
    struct cmsghdr *cmsg;
    struct iovec iov;
    uint8_t req[1];
    ssize_t len;

    cmsg = CMSG_FIRSTHDR(&msg);
    cmsg->cmsg_level = SOL_SOCKET;
    cmsg->cmsg_type = SCM_RIGHTS;
    cmsg->cmsg_len = CMSG_LEN(sizeof(fd));
    msg.msg_controllen = cmsg->cmsg_len;

    iov.iov_base = req;
    iov.iov_len = sizeof(req);

    msg.msg_iov = &iov;
    msg.msg_iovlen = 1;

    len = recvmsg(c, &msg, 0);
    if (len > 0) {
        memcpy(&fd, CMSG_DATA(cmsg), sizeof(fd));
        return fd;
    }

    return len;
}

452 453
static int net_bridge_run_helper(const char *helper, const char *bridge,
                                 Error **errp)
C
Corey Bryant 已提交
454 455 456 457 458 459 460 461 462 463 464 465
{
    sigset_t oldmask, mask;
    int pid, status;
    char *args[5];
    char **parg;
    int sv[2];

    sigemptyset(&mask);
    sigaddset(&mask, SIGCHLD);
    sigprocmask(SIG_BLOCK, &mask, &oldmask);

    if (socketpair(PF_UNIX, SOCK_STREAM, 0, sv) == -1) {
466
        error_setg_errno(errp, errno, "socketpair() failed");
C
Corey Bryant 已提交
467 468 469 470 471
        return -1;
    }

    /* try to launch bridge helper */
    pid = fork();
472 473 474 475
    if (pid < 0) {
        error_setg_errno(errp, errno, "Can't fork bridge helper");
        return -1;
    }
C
Corey Bryant 已提交
476 477 478 479 480 481
    if (pid == 0) {
        int open_max = sysconf(_SC_OPEN_MAX), i;
        char fd_buf[6+10];
        char br_buf[6+IFNAMSIZ] = {0};
        char helper_cmd[PATH_MAX + sizeof(fd_buf) + sizeof(br_buf) + 15];

482 483
        for (i = 3; i < open_max; i++) {
            if (i != sv[1]) {
C
Corey Bryant 已提交
484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522
                close(i);
            }
        }

        snprintf(fd_buf, sizeof(fd_buf), "%s%d", "--fd=", sv[1]);

        if (strrchr(helper, ' ') || strrchr(helper, '\t')) {
            /* assume helper is a command */

            if (strstr(helper, "--br=") == NULL) {
                snprintf(br_buf, sizeof(br_buf), "%s%s", "--br=", bridge);
            }

            snprintf(helper_cmd, sizeof(helper_cmd), "%s %s %s %s",
                     helper, "--use-vnet", fd_buf, br_buf);

            parg = args;
            *parg++ = (char *)"sh";
            *parg++ = (char *)"-c";
            *parg++ = helper_cmd;
            *parg++ = NULL;

            execv("/bin/sh", args);
        } else {
            /* assume helper is just the executable path name */

            snprintf(br_buf, sizeof(br_buf), "%s%s", "--br=", bridge);

            parg = args;
            *parg++ = (char *)helper;
            *parg++ = (char *)"--use-vnet";
            *parg++ = fd_buf;
            *parg++ = br_buf;
            *parg++ = NULL;

            execv(helper, args);
        }
        _exit(1);

523
    } else {
C
Corey Bryant 已提交
524
        int fd;
525
        int saved_errno;
C
Corey Bryant 已提交
526 527 528 529 530 531

        close(sv[1]);

        do {
            fd = recv_fd(sv[0]);
        } while (fd == -1 && errno == EINTR);
532
        saved_errno = errno;
C
Corey Bryant 已提交
533 534 535 536 537 538 539 540

        close(sv[0]);

        while (waitpid(pid, &status, 0) != pid) {
            /* loop */
        }
        sigprocmask(SIG_SETMASK, &oldmask, NULL);
        if (fd < 0) {
541 542
            error_setg_errno(errp, saved_errno,
                             "failed to recv file descriptor");
C
Corey Bryant 已提交
543 544
            return -1;
        }
545 546 547
        if (!WIFEXITED(status) || WEXITSTATUS(status) != 0) {
            error_setg(errp, "bridge helper failed");
            return -1;
C
Corey Bryant 已提交
548
        }
549
        return fd;
C
Corey Bryant 已提交
550 551 552
    }
}

553
int net_init_bridge(const NetClientOptions *opts, const char *name,
554
                    NetClientState *peer, Error **errp)
C
Corey Bryant 已提交
555
{
556 557
    const NetdevBridgeOptions *bridge;
    const char *helper, *br;
C
Corey Bryant 已提交
558 559 560
    TAPState *s;
    int fd, vnet_hdr;

561 562 563 564 565
    assert(opts->kind == NET_CLIENT_OPTIONS_KIND_BRIDGE);
    bridge = opts->bridge;

    helper = bridge->has_helper ? bridge->helper : DEFAULT_BRIDGE_HELPER;
    br     = bridge->has_br     ? bridge->br     : DEFAULT_BRIDGE_INTERFACE;
C
Corey Bryant 已提交
566

567
    fd = net_bridge_run_helper(helper, br, errp);
C
Corey Bryant 已提交
568 569 570 571 572 573
    if (fd == -1) {
        return -1;
    }

    fcntl(fd, F_SETFL, O_NONBLOCK);
    vnet_hdr = tap_probe_vnet_hdr(fd);
574
    s = net_tap_fd_init(peer, "bridge", name, fd, vnet_hdr);
C
Corey Bryant 已提交
575

576 577
    snprintf(s->nc.info_str, sizeof(s->nc.info_str), "helper=%s,br=%s", helper,
             br);
C
Corey Bryant 已提交
578 579 580 581

    return 0;
}

582 583
static int net_tap_init(const NetdevTapOptions *tap, int *vnet_hdr,
                        const char *setup_script, char *ifname,
584
                        size_t ifname_sz, int mq_required, Error **errp)
585
{
586
    Error *err = NULL;
587 588
    int fd, vnet_hdr_required;

589 590
    if (tap->has_vnet_hdr) {
        *vnet_hdr = tap->vnet_hdr;
591 592
        vnet_hdr_required = *vnet_hdr;
    } else {
593
        *vnet_hdr = 1;
594 595 596
        vnet_hdr_required = 0;
    }

J
Jason Wang 已提交
597
    TFR(fd = tap_open(ifname, ifname_sz, vnet_hdr, vnet_hdr_required,
598
                      mq_required, errp));
599 600 601 602 603 604
    if (fd < 0) {
        return -1;
    }

    if (setup_script &&
        setup_script[0] != '\0' &&
605 606 607
        strcmp(setup_script, "no") != 0) {
        launch_script(setup_script, ifname, fd, &err);
        if (err) {
608
            error_propagate(errp, err);
609 610 611
            close(fd);
            return -1;
        }
612 613 614 615 616
    }

    return fd;
}

J
Jason Wang 已提交
617 618
#define MAX_TAP_QUEUES 1024

619 620 621 622 623
static void net_init_tap_one(const NetdevTapOptions *tap, NetClientState *peer,
                             const char *model, const char *name,
                             const char *ifname, const char *script,
                             const char *downscript, const char *vhostfdname,
                             int vnet_hdr, int fd, Error **errp)
624
{
625
    Error *err = NULL;
626
    TAPState *s = net_tap_fd_init(peer, model, name, fd, vnet_hdr);
627
    int vhostfd;
628

629 630
    tap_set_sndbuf(s->fd, tap, &err);
    if (err) {
631 632
        error_propagate(errp, err);
        return;
633 634
    }

J
Jason Wang 已提交
635
    if (tap->has_fd || tap->has_fds) {
636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653
        snprintf(s->nc.info_str, sizeof(s->nc.info_str), "fd=%d", fd);
    } else if (tap->has_helper) {
        snprintf(s->nc.info_str, sizeof(s->nc.info_str), "helper=%s",
                 tap->helper);
    } else {
        snprintf(s->nc.info_str, sizeof(s->nc.info_str),
                 "ifname=%s,script=%s,downscript=%s", ifname, script,
                 downscript);

        if (strcmp(downscript, "no") != 0) {
            snprintf(s->down_script, sizeof(s->down_script), "%s", downscript);
            snprintf(s->down_script_arg, sizeof(s->down_script_arg),
                     "%s", ifname);
        }
    }

    if (tap->has_vhost ? tap->vhost :
        vhostfdname || (tap->has_vhostforce && tap->vhostforce)) {
654 655
        VhostNetOptions options;

656
        options.backend_type = VHOST_BACKEND_TYPE_KERNEL;
657 658
        options.net_backend = &s->nc;
        options.force = tap->has_vhostforce && tap->vhostforce;
659

J
Jason Wang 已提交
660
        if (tap->has_vhostfd || tap->has_vhostfds) {
661
            vhostfd = monitor_fd_param(cur_mon, vhostfdname, &err);
662
            if (vhostfd == -1) {
663 664
                error_propagate(errp, err);
                return;
665 666
            }
        } else {
667 668
            vhostfd = open("/dev/vhost-net", O_RDWR);
            if (vhostfd < 0) {
669 670 671
                error_setg_errno(errp, errno,
                                 "tap: open vhost char device failed");
                return;
672
            }
673
        }
674
        options.opaque = (void *)(uintptr_t)vhostfd;
675

676
        s->vhost_net = vhost_net_init(&options);
677
        if (!s->vhost_net) {
678 679 680
            error_setg(errp,
                       "vhost-net requested but could not be initialized");
            return;
681
        }
J
Jason Wang 已提交
682
    } else if (tap->has_vhostfd || tap->has_vhostfds) {
683
        error_setg(errp, "vhostfd= is not valid without vhost");
684 685 686
    }
}

J
Jason Wang 已提交
687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712
static int get_fds(char *str, char *fds[], int max)
{
    char *ptr = str, *this;
    size_t len = strlen(str);
    int i = 0;

    while (i < max && ptr < str + len) {
        this = strchr(ptr, ':');

        if (this == NULL) {
            fds[i] = g_strdup(ptr);
        } else {
            fds[i] = g_strndup(ptr, this - ptr);
        }

        i++;
        if (this == NULL) {
            break;
        } else {
            ptr = this + 1;
        }
    }

    return i;
}

713
int net_init_tap(const NetClientOptions *opts, const char *name,
714
                 NetClientState *peer, Error **errp)
715
{
716
    const NetdevTapOptions *tap;
J
Jason Wang 已提交
717
    int fd, vnet_hdr = 0, i = 0, queues;
718 719
    /* for the no-fd, no-helper case */
    const char *script = NULL; /* suppress wrong "uninit'd use" gcc warning */
720
    const char *downscript = NULL;
721
    Error *err = NULL;
J
Jason Wang 已提交
722
    const char *vhostfdname;
723 724 725 726
    char ifname[128];

    assert(opts->kind == NET_CLIENT_OPTIONS_KIND_TAP);
    tap = opts->tap;
J
Jason Wang 已提交
727 728
    queues = tap->has_queues ? tap->queues : 1;
    vhostfdname = tap->has_vhostfd ? tap->vhostfd : NULL;
729

730 731 732
    /* QEMU vlans does not support multiqueue tap, in this case peer is set.
     * For -netdev, peer is always NULL. */
    if (peer && (tap->has_queues || tap->has_fds || tap->has_vhostfds)) {
733
        error_setg(errp, "Multiqueue tap cannot be used with QEMU vlans");
734 735 736
        return -1;
    }

737 738
    if (tap->has_fd) {
        if (tap->has_ifname || tap->has_script || tap->has_downscript ||
J
Jason Wang 已提交
739
            tap->has_vnet_hdr || tap->has_helper || tap->has_queues ||
740
            tap->has_fds || tap->has_vhostfds) {
741 742 743
            error_setg(errp, "ifname=, script=, downscript=, vnet_hdr=, "
                       "helper=, queues=, fds=, and vhostfds= "
                       "are invalid with fd=");
744 745 746
            return -1;
        }

747
        fd = monitor_fd_param(cur_mon, tap->fd, &err);
748
        if (fd == -1) {
749
            error_propagate(errp, err);
750 751 752 753 754 755
            return -1;
        }

        fcntl(fd, F_SETFL, O_NONBLOCK);

        vnet_hdr = tap_probe_vnet_hdr(fd);
C
Corey Bryant 已提交
756

757 758 759 760
        net_init_tap_one(tap, peer, "tap", name, NULL,
                         script, downscript,
                         vhostfdname, vnet_hdr, fd, &err);
        if (err) {
761
            error_propagate(errp, err);
J
Jason Wang 已提交
762 763 764 765 766 767 768 769 770
            return -1;
        }
    } else if (tap->has_fds) {
        char *fds[MAX_TAP_QUEUES];
        char *vhost_fds[MAX_TAP_QUEUES];
        int nfds, nvhosts;

        if (tap->has_ifname || tap->has_script || tap->has_downscript ||
            tap->has_vnet_hdr || tap->has_helper || tap->has_queues ||
771
            tap->has_vhostfd) {
772 773 774
            error_setg(errp, "ifname=, script=, downscript=, vnet_hdr=, "
                       "helper=, queues=, and vhostfd= "
                       "are invalid with fds=");
J
Jason Wang 已提交
775 776 777 778 779 780 781
            return -1;
        }

        nfds = get_fds(tap->fds, fds, MAX_TAP_QUEUES);
        if (tap->has_vhostfds) {
            nvhosts = get_fds(tap->vhostfds, vhost_fds, MAX_TAP_QUEUES);
            if (nfds != nvhosts) {
782 783
                error_setg(errp, "The number of fds passed does not match "
                           "the number of vhostfds passed");
J
Jason Wang 已提交
784 785 786 787 788
                return -1;
            }
        }

        for (i = 0; i < nfds; i++) {
789
            fd = monitor_fd_param(cur_mon, fds[i], &err);
J
Jason Wang 已提交
790
            if (fd == -1) {
791
                error_propagate(errp, err);
J
Jason Wang 已提交
792 793 794 795
                return -1;
            }

            fcntl(fd, F_SETFL, O_NONBLOCK);
C
Corey Bryant 已提交
796

J
Jason Wang 已提交
797 798 799
            if (i == 0) {
                vnet_hdr = tap_probe_vnet_hdr(fd);
            } else if (vnet_hdr != tap_probe_vnet_hdr(fd)) {
800 801
                error_setg(errp,
                           "vnet_hdr not consistent across given tap fds");
J
Jason Wang 已提交
802 803 804
                return -1;
            }

805 806 807 808 809
            net_init_tap_one(tap, peer, "tap", name, ifname,
                             script, downscript,
                             tap->has_vhostfds ? vhost_fds[i] : NULL,
                             vnet_hdr, fd, &err);
            if (err) {
810
                error_propagate(errp, err);
J
Jason Wang 已提交
811 812 813
                return -1;
            }
        }
814 815
    } else if (tap->has_helper) {
        if (tap->has_ifname || tap->has_script || tap->has_downscript ||
816
            tap->has_vnet_hdr || tap->has_queues || tap->has_vhostfds) {
817 818
            error_setg(errp, "ifname=, script=, downscript=, vnet_hdr=, "
                       "queues=, and vhostfds= are invalid with helper=");
C
Corey Bryant 已提交
819 820 821
            return -1;
        }

822 823
        fd = net_bridge_run_helper(tap->helper, DEFAULT_BRIDGE_INTERFACE,
                                   errp);
C
Corey Bryant 已提交
824 825 826 827 828 829 830
        if (fd == -1) {
            return -1;
        }

        fcntl(fd, F_SETFL, O_NONBLOCK);
        vnet_hdr = tap_probe_vnet_hdr(fd);

831 832 833 834
        net_init_tap_one(tap, peer, "bridge", name, ifname,
                         script, downscript, vhostfdname,
                         vnet_hdr, fd, &err);
        if (err) {
835
            error_propagate(errp, err);
836
            close(fd);
J
Jason Wang 已提交
837 838
            return -1;
        }
839
    } else {
840
        if (tap->has_vhostfds) {
841
            error_setg(errp, "vhostfds= is invalid if fds= wasn't specified");
842 843
            return -1;
        }
844
        script = tap->has_script ? tap->script : DEFAULT_NETWORK_SCRIPT;
845 846
        downscript = tap->has_downscript ? tap->downscript :
            DEFAULT_NETWORK_DOWN_SCRIPT;
J
Jason Wang 已提交
847 848 849 850 851

        if (tap->has_ifname) {
            pstrcpy(ifname, sizeof ifname, tap->ifname);
        } else {
            ifname[0] = '\0';
852
        }
C
Corey Bryant 已提交
853

J
Jason Wang 已提交
854 855
        for (i = 0; i < queues; i++) {
            fd = net_tap_init(tap, &vnet_hdr, i >= 1 ? "no" : script,
856
                              ifname, sizeof ifname, queues > 1, errp);
J
Jason Wang 已提交
857 858 859 860 861 862
            if (fd == -1) {
                return -1;
            }

            if (queues > 1 && i == 0 && !tap->has_ifname) {
                if (tap_fd_get_ifname(fd, ifname)) {
863
                    error_setg(errp, "Fail to get ifname");
864
                    close(fd);
J
Jason Wang 已提交
865 866 867 868
                    return -1;
                }
            }

869 870 871 872 873
            net_init_tap_one(tap, peer, "tap", name, ifname,
                             i >= 1 ? "no" : script,
                             i >= 1 ? "no" : downscript,
                             vhostfdname, vnet_hdr, fd, &err);
            if (err) {
874
                error_propagate(errp, err);
875
                close(fd);
J
Jason Wang 已提交
876 877 878
                return -1;
            }
        }
879 880
    }

J
Jason Wang 已提交
881
    return 0;
882
}
883

884
VHostNetState *tap_get_vhost_net(NetClientState *nc)
885 886
{
    TAPState *s = DO_UPCAST(TAPState, nc, nc);
887
    assert(nc->info->type == NET_CLIENT_OPTIONS_KIND_TAP);
888 889
    return s->vhost_net;
}
890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924

int tap_enable(NetClientState *nc)
{
    TAPState *s = DO_UPCAST(TAPState, nc, nc);
    int ret;

    if (s->enabled) {
        return 0;
    } else {
        ret = tap_fd_enable(s->fd);
        if (ret == 0) {
            s->enabled = true;
            tap_update_fd_handler(s);
        }
        return ret;
    }
}

int tap_disable(NetClientState *nc)
{
    TAPState *s = DO_UPCAST(TAPState, nc, nc);
    int ret;

    if (s->enabled == 0) {
        return 0;
    } else {
        ret = tap_fd_disable(s->fd);
        if (ret == 0) {
            qemu_purge_queued_packets(nc);
            s->enabled = false;
            tap_update_fd_handler(s);
        }
        return ret;
    }
}