tap.c 25.3 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25
/*
 * QEMU System Emulator
 *
 * Copyright (c) 2003-2008 Fabrice Bellard
 * Copyright (c) 2009 Red Hat, Inc.
 *
 * Permission is hereby granted, free of charge, to any person obtaining a copy
 * of this software and associated documentation files (the "Software"), to deal
 * in the Software without restriction, including without limitation the rights
 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 * copies of the Software, and to permit persons to whom the Software is
 * furnished to do so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice shall be included in
 * all copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
 * THE SOFTWARE.
 */

P
Peter Maydell 已提交
26
#include "qemu/osdep.h"
P
Paolo Bonzini 已提交
27
#include "tap_int.h"
28 29 30 31


#include <sys/ioctl.h>
#include <sys/wait.h>
A
Alexander Graf 已提交
32
#include <sys/socket.h>
33 34
#include <net/if.h>

P
Paolo Bonzini 已提交
35
#include "net/net.h"
36
#include "clients.h"
37
#include "monitor/monitor.h"
38
#include "sysemu/sysemu.h"
39
#include "qemu-common.h"
40
#include "qemu/error-report.h"
41

P
Paolo Bonzini 已提交
42
#include "net/tap.h"
43

P
Paolo Bonzini 已提交
44
#include "net/vhost_net.h"
45

46
typedef struct TAPState {
47
    NetClientState nc;
48 49 50
    int fd;
    char down_script[1024];
    char down_script_arg[128];
51
    uint8_t buf[NET_BUFSIZE];
52 53 54 55
    bool read_poll;
    bool write_poll;
    bool using_vnet_hdr;
    bool has_ufo;
56
    bool enabled;
57
    VHostNetState *vhost_net;
58
    unsigned host_vnet_hdr_len;
59 60
} TAPState;

61 62
static void launch_script(const char *setup_script, const char *ifname,
                          int fd, Error **errp);
63 64 65 66 67 68

static void tap_send(void *opaque);
static void tap_writable(void *opaque);

static void tap_update_fd_handler(TAPState *s)
{
69 70 71 72
    qemu_set_fd_handler(s->fd,
                        s->read_poll && s->enabled ? tap_send : NULL,
                        s->write_poll && s->enabled ? tap_writable : NULL,
                        s);
73 74
}

75
static void tap_read_poll(TAPState *s, bool enable)
76
{
77
    s->read_poll = enable;
78 79 80
    tap_update_fd_handler(s);
}

81
static void tap_write_poll(TAPState *s, bool enable)
82
{
83
    s->write_poll = enable;
84 85 86 87 88 89 90
    tap_update_fd_handler(s);
}

static void tap_writable(void *opaque)
{
    TAPState *s = opaque;

91
    tap_write_poll(s, false);
92

93
    qemu_flush_queued_packets(&s->nc);
94 95 96 97 98 99 100 101 102 103 104
}

static ssize_t tap_write_packet(TAPState *s, const struct iovec *iov, int iovcnt)
{
    ssize_t len;

    do {
        len = writev(s->fd, iov, iovcnt);
    } while (len == -1 && errno == EINTR);

    if (len == -1 && errno == EAGAIN) {
105
        tap_write_poll(s, true);
106 107 108 109 110 111
        return 0;
    }

    return len;
}

112
static ssize_t tap_receive_iov(NetClientState *nc, const struct iovec *iov,
113 114
                               int iovcnt)
{
115
    TAPState *s = DO_UPCAST(TAPState, nc, nc);
116 117
    const struct iovec *iovp = iov;
    struct iovec iov_copy[iovcnt + 1];
118
    struct virtio_net_hdr_mrg_rxbuf hdr = { };
119

120
    if (s->host_vnet_hdr_len && !s->using_vnet_hdr) {
121
        iov_copy[0].iov_base = &hdr;
122
        iov_copy[0].iov_len =  s->host_vnet_hdr_len;
123 124 125 126 127 128 129 130
        memcpy(&iov_copy[1], iov, iovcnt * sizeof(*iov));
        iovp = iov_copy;
        iovcnt++;
    }

    return tap_write_packet(s, iovp, iovcnt);
}

131
static ssize_t tap_receive_raw(NetClientState *nc, const uint8_t *buf, size_t size)
132
{
133
    TAPState *s = DO_UPCAST(TAPState, nc, nc);
134 135
    struct iovec iov[2];
    int iovcnt = 0;
136
    struct virtio_net_hdr_mrg_rxbuf hdr = { };
137

138
    if (s->host_vnet_hdr_len) {
139
        iov[iovcnt].iov_base = &hdr;
140
        iov[iovcnt].iov_len  = s->host_vnet_hdr_len;
141 142 143 144 145 146 147 148 149 150
        iovcnt++;
    }

    iov[iovcnt].iov_base = (char *)buf;
    iov[iovcnt].iov_len  = size;
    iovcnt++;

    return tap_write_packet(s, iov, iovcnt);
}

151
static ssize_t tap_receive(NetClientState *nc, const uint8_t *buf, size_t size)
152
{
153
    TAPState *s = DO_UPCAST(TAPState, nc, nc);
154 155
    struct iovec iov[1];

156
    if (s->host_vnet_hdr_len && !s->using_vnet_hdr) {
157
        return tap_receive_raw(nc, buf, size);
158 159 160 161 162 163 164 165
    }

    iov[0].iov_base = (char *)buf;
    iov[0].iov_len  = size;

    return tap_write_packet(s, iov, 1);
}

166 167
#ifndef __sun__
ssize_t tap_read_packet(int tapfd, uint8_t *buf, int maxlen)
168 169 170 171 172
{
    return read(tapfd, buf, maxlen);
}
#endif

173
static void tap_send_completed(NetClientState *nc, ssize_t len)
174
{
175
    TAPState *s = DO_UPCAST(TAPState, nc, nc);
176
    tap_read_poll(s, true);
177 178 179 180 181 182
}

static void tap_send(void *opaque)
{
    TAPState *s = opaque;
    int size;
183
    int packets = 0;
184

F
Fam Zheng 已提交
185
    while (true) {
M
Mark McLoughlin 已提交
186 187 188 189 190 191 192
        uint8_t *buf = s->buf;

        size = tap_read_packet(s->fd, s->buf, sizeof(s->buf));
        if (size <= 0) {
            break;
        }

193 194 195
        if (s->host_vnet_hdr_len && !s->using_vnet_hdr) {
            buf  += s->host_vnet_hdr_len;
            size -= s->host_vnet_hdr_len;
M
Mark McLoughlin 已提交
196 197
        }

198
        size = qemu_send_packet_async(&s->nc, buf, size, tap_send_completed);
M
Mark McLoughlin 已提交
199
        if (size == 0) {
200
            tap_read_poll(s, false);
S
Stefan Hajnoczi 已提交
201 202 203
            break;
        } else if (size < 0) {
            break;
M
Mark McLoughlin 已提交
204
        }
205 206 207 208 209 210 211 212 213 214 215

        /*
         * When the host keeps receiving more packets while tap_send() is
         * running we can hog the QEMU global mutex.  Limit the number of
         * packets that are processed per tap_send() callback to prevent
         * stalling the guest.
         */
        packets++;
        if (packets >= 50) {
            break;
        }
S
Stefan Hajnoczi 已提交
216
    }
217 218
}

219
static bool tap_has_ufo(NetClientState *nc)
220
{
221
    TAPState *s = DO_UPCAST(TAPState, nc, nc);
222

223
    assert(nc->info->type == NET_CLIENT_OPTIONS_KIND_TAP);
224 225 226 227

    return s->has_ufo;
}

228
static bool tap_has_vnet_hdr(NetClientState *nc)
229
{
230
    TAPState *s = DO_UPCAST(TAPState, nc, nc);
231

232
    assert(nc->info->type == NET_CLIENT_OPTIONS_KIND_TAP);
233

234
    return !!s->host_vnet_hdr_len;
235 236
}

237
static bool tap_has_vnet_hdr_len(NetClientState *nc, int len)
238 239 240
{
    TAPState *s = DO_UPCAST(TAPState, nc, nc);

241
    assert(nc->info->type == NET_CLIENT_OPTIONS_KIND_TAP);
242

243
    return !!tap_probe_vnet_hdr_len(s->fd, len);
244 245
}

246
static void tap_set_vnet_hdr_len(NetClientState *nc, int len)
247 248 249
{
    TAPState *s = DO_UPCAST(TAPState, nc, nc);

250
    assert(nc->info->type == NET_CLIENT_OPTIONS_KIND_TAP);
251 252 253 254 255 256 257
    assert(len == sizeof(struct virtio_net_hdr_mrg_rxbuf) ||
           len == sizeof(struct virtio_net_hdr));

    tap_fd_set_vnet_hdr_len(s->fd, len);
    s->host_vnet_hdr_len = len;
}

258
static void tap_using_vnet_hdr(NetClientState *nc, bool using_vnet_hdr)
259
{
260
    TAPState *s = DO_UPCAST(TAPState, nc, nc);
261

262
    assert(nc->info->type == NET_CLIENT_OPTIONS_KIND_TAP);
263
    assert(!!s->host_vnet_hdr_len == using_vnet_hdr);
264 265 266 267

    s->using_vnet_hdr = using_vnet_hdr;
}

G
Greg Kurz 已提交
268 269 270 271 272 273 274 275 276 277 278 279 280 281
static int tap_set_vnet_le(NetClientState *nc, bool is_le)
{
    TAPState *s = DO_UPCAST(TAPState, nc, nc);

    return tap_fd_set_vnet_le(s->fd, is_le);
}

static int tap_set_vnet_be(NetClientState *nc, bool is_be)
{
    TAPState *s = DO_UPCAST(TAPState, nc, nc);

    return tap_fd_set_vnet_be(s->fd, is_be);
}

282
static void tap_set_offload(NetClientState *nc, int csum, int tso4,
283 284
                     int tso6, int ecn, int ufo)
{
285
    TAPState *s = DO_UPCAST(TAPState, nc, nc);
286 287 288
    if (s->fd < 0) {
        return;
    }
289

290
    tap_fd_set_offload(s->fd, csum, tso4, tso6, ecn, ufo);
291 292
}

293
static void tap_cleanup(NetClientState *nc)
294
{
295
    TAPState *s = DO_UPCAST(TAPState, nc, nc);
296
    Error *err = NULL;
297

298 299
    if (s->vhost_net) {
        vhost_net_cleanup(s->vhost_net);
300
        s->vhost_net = NULL;
301 302
    }

303
    qemu_purge_queued_packets(nc);
304

305 306 307 308 309 310
    if (s->down_script[0]) {
        launch_script(s->down_script, s->down_script_arg, s->fd, &err);
        if (err) {
            error_report_err(err);
        }
    }
311

312 313
    tap_read_poll(s, false);
    tap_write_poll(s, false);
314
    close(s->fd);
315
    s->fd = -1;
316 317
}

318
static void tap_poll(NetClientState *nc, bool enable)
319 320 321 322 323 324
{
    TAPState *s = DO_UPCAST(TAPState, nc, nc);
    tap_read_poll(s, enable);
    tap_write_poll(s, enable);
}

325
int tap_get_fd(NetClientState *nc)
326 327
{
    TAPState *s = DO_UPCAST(TAPState, nc, nc);
328
    assert(nc->info->type == NET_CLIENT_OPTIONS_KIND_TAP);
329 330 331
    return s->fd;
}

332 333
/* fd support */

334
static NetClientInfo net_tap_info = {
335
    .type = NET_CLIENT_OPTIONS_KIND_TAP,
336 337 338 339
    .size = sizeof(TAPState),
    .receive = tap_receive,
    .receive_raw = tap_receive_raw,
    .receive_iov = tap_receive_iov,
340
    .poll = tap_poll,
341
    .cleanup = tap_cleanup,
342 343 344 345 346 347
    .has_ufo = tap_has_ufo,
    .has_vnet_hdr = tap_has_vnet_hdr,
    .has_vnet_hdr_len = tap_has_vnet_hdr_len,
    .using_vnet_hdr = tap_using_vnet_hdr,
    .set_offload = tap_set_offload,
    .set_vnet_hdr_len = tap_set_vnet_hdr_len,
G
Greg Kurz 已提交
348 349
    .set_vnet_le = tap_set_vnet_le,
    .set_vnet_be = tap_set_vnet_be,
350 351
};

352
static TAPState *net_tap_fd_init(NetClientState *peer,
353 354 355 356 357
                                 const char *model,
                                 const char *name,
                                 int fd,
                                 int vnet_hdr)
{
358
    NetClientState *nc;
359 360
    TAPState *s;

361
    nc = qemu_new_net_client(&net_tap_info, peer, model, name);
362 363 364

    s = DO_UPCAST(TAPState, nc, nc);

365
    s->fd = fd;
366
    s->host_vnet_hdr_len = vnet_hdr ? sizeof(struct virtio_net_hdr) : 0;
367
    s->using_vnet_hdr = false;
368
    s->has_ufo = tap_probe_has_ufo(s->fd);
369
    s->enabled = true;
370
    tap_set_offload(&s->nc, 0, 0, 0, 0, 0);
371 372 373 374 375 376 377
    /*
     * Make sure host header length is set correctly in tap:
     * it might have been modified by another instance of qemu.
     */
    if (tap_probe_vnet_hdr_len(s->fd, s->host_vnet_hdr_len)) {
        tap_fd_set_vnet_hdr_len(s->fd, s->host_vnet_hdr_len);
    }
378
    tap_read_poll(s, true);
379
    s->vhost_net = NULL;
380 381 382
    return s;
}

383 384
static void launch_script(const char *setup_script, const char *ifname,
                          int fd, Error **errp)
385 386 387 388 389 390 391
{
    int pid, status;
    char *args[3];
    char **parg;

    /* try to launch network script */
    pid = fork();
392 393 394 395 396
    if (pid < 0) {
        error_setg_errno(errp, errno, "could not launch network script %s",
                         setup_script);
        return;
    }
397 398 399
    if (pid == 0) {
        int open_max = sysconf(_SC_OPEN_MAX), i;

400 401
        for (i = 3; i < open_max; i++) {
            if (i != fd) {
402 403 404 405 406 407
                close(i);
            }
        }
        parg = args;
        *parg++ = (char *)setup_script;
        *parg++ = (char *)ifname;
408
        *parg = NULL;
409 410
        execv(setup_script, args);
        _exit(1);
411
    } else {
412 413 414 415 416
        while (waitpid(pid, &status, 0) != pid) {
            /* loop */
        }

        if (WIFEXITED(status) && WEXITSTATUS(status) == 0) {
417
            return;
418
        }
419 420
        error_setg(errp, "network script %s failed with status %d",
                   setup_script, status);
421 422 423
    }
}

C
Corey Bryant 已提交
424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457
static int recv_fd(int c)
{
    int fd;
    uint8_t msgbuf[CMSG_SPACE(sizeof(fd))];
    struct msghdr msg = {
        .msg_control = msgbuf,
        .msg_controllen = sizeof(msgbuf),
    };
    struct cmsghdr *cmsg;
    struct iovec iov;
    uint8_t req[1];
    ssize_t len;

    cmsg = CMSG_FIRSTHDR(&msg);
    cmsg->cmsg_level = SOL_SOCKET;
    cmsg->cmsg_type = SCM_RIGHTS;
    cmsg->cmsg_len = CMSG_LEN(sizeof(fd));
    msg.msg_controllen = cmsg->cmsg_len;

    iov.iov_base = req;
    iov.iov_len = sizeof(req);

    msg.msg_iov = &iov;
    msg.msg_iovlen = 1;

    len = recvmsg(c, &msg, 0);
    if (len > 0) {
        memcpy(&fd, CMSG_DATA(cmsg), sizeof(fd));
        return fd;
    }

    return len;
}

458 459
static int net_bridge_run_helper(const char *helper, const char *bridge,
                                 Error **errp)
C
Corey Bryant 已提交
460 461 462 463 464 465 466 467 468 469 470 471
{
    sigset_t oldmask, mask;
    int pid, status;
    char *args[5];
    char **parg;
    int sv[2];

    sigemptyset(&mask);
    sigaddset(&mask, SIGCHLD);
    sigprocmask(SIG_BLOCK, &mask, &oldmask);

    if (socketpair(PF_UNIX, SOCK_STREAM, 0, sv) == -1) {
472
        error_setg_errno(errp, errno, "socketpair() failed");
C
Corey Bryant 已提交
473 474 475 476 477
        return -1;
    }

    /* try to launch bridge helper */
    pid = fork();
478 479 480 481
    if (pid < 0) {
        error_setg_errno(errp, errno, "Can't fork bridge helper");
        return -1;
    }
C
Corey Bryant 已提交
482 483 484 485 486 487
    if (pid == 0) {
        int open_max = sysconf(_SC_OPEN_MAX), i;
        char fd_buf[6+10];
        char br_buf[6+IFNAMSIZ] = {0};
        char helper_cmd[PATH_MAX + sizeof(fd_buf) + sizeof(br_buf) + 15];

488 489
        for (i = 3; i < open_max; i++) {
            if (i != sv[1]) {
C
Corey Bryant 已提交
490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528
                close(i);
            }
        }

        snprintf(fd_buf, sizeof(fd_buf), "%s%d", "--fd=", sv[1]);

        if (strrchr(helper, ' ') || strrchr(helper, '\t')) {
            /* assume helper is a command */

            if (strstr(helper, "--br=") == NULL) {
                snprintf(br_buf, sizeof(br_buf), "%s%s", "--br=", bridge);
            }

            snprintf(helper_cmd, sizeof(helper_cmd), "%s %s %s %s",
                     helper, "--use-vnet", fd_buf, br_buf);

            parg = args;
            *parg++ = (char *)"sh";
            *parg++ = (char *)"-c";
            *parg++ = helper_cmd;
            *parg++ = NULL;

            execv("/bin/sh", args);
        } else {
            /* assume helper is just the executable path name */

            snprintf(br_buf, sizeof(br_buf), "%s%s", "--br=", bridge);

            parg = args;
            *parg++ = (char *)helper;
            *parg++ = (char *)"--use-vnet";
            *parg++ = fd_buf;
            *parg++ = br_buf;
            *parg++ = NULL;

            execv(helper, args);
        }
        _exit(1);

529
    } else {
C
Corey Bryant 已提交
530
        int fd;
531
        int saved_errno;
C
Corey Bryant 已提交
532 533 534 535 536 537

        close(sv[1]);

        do {
            fd = recv_fd(sv[0]);
        } while (fd == -1 && errno == EINTR);
538
        saved_errno = errno;
C
Corey Bryant 已提交
539 540 541 542 543 544 545 546

        close(sv[0]);

        while (waitpid(pid, &status, 0) != pid) {
            /* loop */
        }
        sigprocmask(SIG_SETMASK, &oldmask, NULL);
        if (fd < 0) {
547 548
            error_setg_errno(errp, saved_errno,
                             "failed to recv file descriptor");
C
Corey Bryant 已提交
549 550
            return -1;
        }
551 552 553
        if (!WIFEXITED(status) || WEXITSTATUS(status) != 0) {
            error_setg(errp, "bridge helper failed");
            return -1;
C
Corey Bryant 已提交
554
        }
555
        return fd;
C
Corey Bryant 已提交
556 557 558
    }
}

559
int net_init_bridge(const NetClientOptions *opts, const char *name,
560
                    NetClientState *peer, Error **errp)
C
Corey Bryant 已提交
561
{
562 563
    const NetdevBridgeOptions *bridge;
    const char *helper, *br;
C
Corey Bryant 已提交
564 565 566
    TAPState *s;
    int fd, vnet_hdr;

E
Eric Blake 已提交
567 568
    assert(opts->type == NET_CLIENT_OPTIONS_KIND_BRIDGE);
    bridge = opts->u.bridge;
569 570 571

    helper = bridge->has_helper ? bridge->helper : DEFAULT_BRIDGE_HELPER;
    br     = bridge->has_br     ? bridge->br     : DEFAULT_BRIDGE_INTERFACE;
C
Corey Bryant 已提交
572

573
    fd = net_bridge_run_helper(helper, br, errp);
C
Corey Bryant 已提交
574 575 576 577 578 579
    if (fd == -1) {
        return -1;
    }

    fcntl(fd, F_SETFL, O_NONBLOCK);
    vnet_hdr = tap_probe_vnet_hdr(fd);
580
    s = net_tap_fd_init(peer, "bridge", name, fd, vnet_hdr);
C
Corey Bryant 已提交
581

582 583
    snprintf(s->nc.info_str, sizeof(s->nc.info_str), "helper=%s,br=%s", helper,
             br);
C
Corey Bryant 已提交
584 585 586 587

    return 0;
}

588 589
static int net_tap_init(const NetdevTapOptions *tap, int *vnet_hdr,
                        const char *setup_script, char *ifname,
590
                        size_t ifname_sz, int mq_required, Error **errp)
591
{
592
    Error *err = NULL;
593 594
    int fd, vnet_hdr_required;

595 596
    if (tap->has_vnet_hdr) {
        *vnet_hdr = tap->vnet_hdr;
597 598
        vnet_hdr_required = *vnet_hdr;
    } else {
599
        *vnet_hdr = 1;
600 601 602
        vnet_hdr_required = 0;
    }

J
Jason Wang 已提交
603
    TFR(fd = tap_open(ifname, ifname_sz, vnet_hdr, vnet_hdr_required,
604
                      mq_required, errp));
605 606 607 608 609 610
    if (fd < 0) {
        return -1;
    }

    if (setup_script &&
        setup_script[0] != '\0' &&
611 612 613
        strcmp(setup_script, "no") != 0) {
        launch_script(setup_script, ifname, fd, &err);
        if (err) {
614
            error_propagate(errp, err);
615 616 617
            close(fd);
            return -1;
        }
618 619 620 621 622
    }

    return fd;
}

J
Jason Wang 已提交
623 624
#define MAX_TAP_QUEUES 1024

625 626 627 628 629
static void net_init_tap_one(const NetdevTapOptions *tap, NetClientState *peer,
                             const char *model, const char *name,
                             const char *ifname, const char *script,
                             const char *downscript, const char *vhostfdname,
                             int vnet_hdr, int fd, Error **errp)
630
{
631
    Error *err = NULL;
632
    TAPState *s = net_tap_fd_init(peer, model, name, fd, vnet_hdr);
633
    int vhostfd;
634

635 636
    tap_set_sndbuf(s->fd, tap, &err);
    if (err) {
637 638
        error_propagate(errp, err);
        return;
639 640
    }

J
Jason Wang 已提交
641
    if (tap->has_fd || tap->has_fds) {
642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659
        snprintf(s->nc.info_str, sizeof(s->nc.info_str), "fd=%d", fd);
    } else if (tap->has_helper) {
        snprintf(s->nc.info_str, sizeof(s->nc.info_str), "helper=%s",
                 tap->helper);
    } else {
        snprintf(s->nc.info_str, sizeof(s->nc.info_str),
                 "ifname=%s,script=%s,downscript=%s", ifname, script,
                 downscript);

        if (strcmp(downscript, "no") != 0) {
            snprintf(s->down_script, sizeof(s->down_script), "%s", downscript);
            snprintf(s->down_script_arg, sizeof(s->down_script_arg),
                     "%s", ifname);
        }
    }

    if (tap->has_vhost ? tap->vhost :
        vhostfdname || (tap->has_vhostforce && tap->vhostforce)) {
660 661
        VhostNetOptions options;

662
        options.backend_type = VHOST_BACKEND_TYPE_KERNEL;
663
        options.net_backend = &s->nc;
664

J
Jason Wang 已提交
665
        if (tap->has_vhostfd || tap->has_vhostfds) {
666
            vhostfd = monitor_fd_param(cur_mon, vhostfdname, &err);
667
            if (vhostfd == -1) {
668 669
                error_propagate(errp, err);
                return;
670 671
            }
        } else {
672 673
            vhostfd = open("/dev/vhost-net", O_RDWR);
            if (vhostfd < 0) {
674 675 676
                error_setg_errno(errp, errno,
                                 "tap: open vhost char device failed");
                return;
677
            }
678
        }
679
        options.opaque = (void *)(uintptr_t)vhostfd;
680

681
        s->vhost_net = vhost_net_init(&options);
682
        if (!s->vhost_net) {
683 684 685
            error_setg(errp,
                       "vhost-net requested but could not be initialized");
            return;
686
        }
J
Jason Wang 已提交
687
    } else if (tap->has_vhostfd || tap->has_vhostfds) {
688
        error_setg(errp, "vhostfd= is not valid without vhost");
689 690 691
    }
}

J
Jason Wang 已提交
692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717
static int get_fds(char *str, char *fds[], int max)
{
    char *ptr = str, *this;
    size_t len = strlen(str);
    int i = 0;

    while (i < max && ptr < str + len) {
        this = strchr(ptr, ':');

        if (this == NULL) {
            fds[i] = g_strdup(ptr);
        } else {
            fds[i] = g_strndup(ptr, this - ptr);
        }

        i++;
        if (this == NULL) {
            break;
        } else {
            ptr = this + 1;
        }
    }

    return i;
}

718
int net_init_tap(const NetClientOptions *opts, const char *name,
719
                 NetClientState *peer, Error **errp)
720
{
721
    const NetdevTapOptions *tap;
J
Jason Wang 已提交
722
    int fd, vnet_hdr = 0, i = 0, queues;
723 724
    /* for the no-fd, no-helper case */
    const char *script = NULL; /* suppress wrong "uninit'd use" gcc warning */
725
    const char *downscript = NULL;
726
    Error *err = NULL;
J
Jason Wang 已提交
727
    const char *vhostfdname;
728 729
    char ifname[128];

E
Eric Blake 已提交
730 731
    assert(opts->type == NET_CLIENT_OPTIONS_KIND_TAP);
    tap = opts->u.tap;
J
Jason Wang 已提交
732 733
    queues = tap->has_queues ? tap->queues : 1;
    vhostfdname = tap->has_vhostfd ? tap->vhostfd : NULL;
734

735 736 737
    /* QEMU vlans does not support multiqueue tap, in this case peer is set.
     * For -netdev, peer is always NULL. */
    if (peer && (tap->has_queues || tap->has_fds || tap->has_vhostfds)) {
738
        error_setg(errp, "Multiqueue tap cannot be used with QEMU vlans");
739 740 741
        return -1;
    }

742 743
    if (tap->has_fd) {
        if (tap->has_ifname || tap->has_script || tap->has_downscript ||
J
Jason Wang 已提交
744
            tap->has_vnet_hdr || tap->has_helper || tap->has_queues ||
745
            tap->has_fds || tap->has_vhostfds) {
746 747 748
            error_setg(errp, "ifname=, script=, downscript=, vnet_hdr=, "
                       "helper=, queues=, fds=, and vhostfds= "
                       "are invalid with fd=");
749 750 751
            return -1;
        }

752
        fd = monitor_fd_param(cur_mon, tap->fd, &err);
753
        if (fd == -1) {
754
            error_propagate(errp, err);
755 756 757 758 759 760
            return -1;
        }

        fcntl(fd, F_SETFL, O_NONBLOCK);

        vnet_hdr = tap_probe_vnet_hdr(fd);
C
Corey Bryant 已提交
761

762 763 764 765
        net_init_tap_one(tap, peer, "tap", name, NULL,
                         script, downscript,
                         vhostfdname, vnet_hdr, fd, &err);
        if (err) {
766
            error_propagate(errp, err);
J
Jason Wang 已提交
767 768 769 770 771 772 773 774 775
            return -1;
        }
    } else if (tap->has_fds) {
        char *fds[MAX_TAP_QUEUES];
        char *vhost_fds[MAX_TAP_QUEUES];
        int nfds, nvhosts;

        if (tap->has_ifname || tap->has_script || tap->has_downscript ||
            tap->has_vnet_hdr || tap->has_helper || tap->has_queues ||
776
            tap->has_vhostfd) {
777 778 779
            error_setg(errp, "ifname=, script=, downscript=, vnet_hdr=, "
                       "helper=, queues=, and vhostfd= "
                       "are invalid with fds=");
J
Jason Wang 已提交
780 781 782 783 784 785 786
            return -1;
        }

        nfds = get_fds(tap->fds, fds, MAX_TAP_QUEUES);
        if (tap->has_vhostfds) {
            nvhosts = get_fds(tap->vhostfds, vhost_fds, MAX_TAP_QUEUES);
            if (nfds != nvhosts) {
787 788
                error_setg(errp, "The number of fds passed does not match "
                           "the number of vhostfds passed");
J
Jason Wang 已提交
789 790 791 792 793
                return -1;
            }
        }

        for (i = 0; i < nfds; i++) {
794
            fd = monitor_fd_param(cur_mon, fds[i], &err);
J
Jason Wang 已提交
795
            if (fd == -1) {
796
                error_propagate(errp, err);
J
Jason Wang 已提交
797 798 799 800
                return -1;
            }

            fcntl(fd, F_SETFL, O_NONBLOCK);
C
Corey Bryant 已提交
801

J
Jason Wang 已提交
802 803 804
            if (i == 0) {
                vnet_hdr = tap_probe_vnet_hdr(fd);
            } else if (vnet_hdr != tap_probe_vnet_hdr(fd)) {
805 806
                error_setg(errp,
                           "vnet_hdr not consistent across given tap fds");
J
Jason Wang 已提交
807 808 809
                return -1;
            }

810 811 812 813 814
            net_init_tap_one(tap, peer, "tap", name, ifname,
                             script, downscript,
                             tap->has_vhostfds ? vhost_fds[i] : NULL,
                             vnet_hdr, fd, &err);
            if (err) {
815
                error_propagate(errp, err);
J
Jason Wang 已提交
816 817 818
                return -1;
            }
        }
819 820
    } else if (tap->has_helper) {
        if (tap->has_ifname || tap->has_script || tap->has_downscript ||
821
            tap->has_vnet_hdr || tap->has_queues || tap->has_vhostfds) {
822 823
            error_setg(errp, "ifname=, script=, downscript=, vnet_hdr=, "
                       "queues=, and vhostfds= are invalid with helper=");
C
Corey Bryant 已提交
824 825 826
            return -1;
        }

827 828
        fd = net_bridge_run_helper(tap->helper, DEFAULT_BRIDGE_INTERFACE,
                                   errp);
C
Corey Bryant 已提交
829 830 831 832 833 834 835
        if (fd == -1) {
            return -1;
        }

        fcntl(fd, F_SETFL, O_NONBLOCK);
        vnet_hdr = tap_probe_vnet_hdr(fd);

836 837 838 839
        net_init_tap_one(tap, peer, "bridge", name, ifname,
                         script, downscript, vhostfdname,
                         vnet_hdr, fd, &err);
        if (err) {
840
            error_propagate(errp, err);
841
            close(fd);
J
Jason Wang 已提交
842 843
            return -1;
        }
844
    } else {
845
        if (tap->has_vhostfds) {
846
            error_setg(errp, "vhostfds= is invalid if fds= wasn't specified");
847 848
            return -1;
        }
849
        script = tap->has_script ? tap->script : DEFAULT_NETWORK_SCRIPT;
850 851
        downscript = tap->has_downscript ? tap->downscript :
            DEFAULT_NETWORK_DOWN_SCRIPT;
J
Jason Wang 已提交
852 853 854 855 856

        if (tap->has_ifname) {
            pstrcpy(ifname, sizeof ifname, tap->ifname);
        } else {
            ifname[0] = '\0';
857
        }
C
Corey Bryant 已提交
858

J
Jason Wang 已提交
859 860
        for (i = 0; i < queues; i++) {
            fd = net_tap_init(tap, &vnet_hdr, i >= 1 ? "no" : script,
861
                              ifname, sizeof ifname, queues > 1, errp);
J
Jason Wang 已提交
862 863 864 865 866 867
            if (fd == -1) {
                return -1;
            }

            if (queues > 1 && i == 0 && !tap->has_ifname) {
                if (tap_fd_get_ifname(fd, ifname)) {
868
                    error_setg(errp, "Fail to get ifname");
869
                    close(fd);
J
Jason Wang 已提交
870 871 872 873
                    return -1;
                }
            }

874 875 876 877 878
            net_init_tap_one(tap, peer, "tap", name, ifname,
                             i >= 1 ? "no" : script,
                             i >= 1 ? "no" : downscript,
                             vhostfdname, vnet_hdr, fd, &err);
            if (err) {
879
                error_propagate(errp, err);
880
                close(fd);
J
Jason Wang 已提交
881 882 883
                return -1;
            }
        }
884 885
    }

J
Jason Wang 已提交
886
    return 0;
887
}
888

889
VHostNetState *tap_get_vhost_net(NetClientState *nc)
890 891
{
    TAPState *s = DO_UPCAST(TAPState, nc, nc);
892
    assert(nc->info->type == NET_CLIENT_OPTIONS_KIND_TAP);
893 894
    return s->vhost_net;
}
895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929

int tap_enable(NetClientState *nc)
{
    TAPState *s = DO_UPCAST(TAPState, nc, nc);
    int ret;

    if (s->enabled) {
        return 0;
    } else {
        ret = tap_fd_enable(s->fd);
        if (ret == 0) {
            s->enabled = true;
            tap_update_fd_handler(s);
        }
        return ret;
    }
}

int tap_disable(NetClientState *nc)
{
    TAPState *s = DO_UPCAST(TAPState, nc, nc);
    int ret;

    if (s->enabled == 0) {
        return 0;
    } else {
        ret = tap_fd_disable(s->fd);
        if (ret == 0) {
            qemu_purge_queued_packets(nc);
            s->enabled = false;
            tap_update_fd_handler(s);
        }
        return ret;
    }
}