virtio-net.c 31.7 KB
Newer Older
A
aliguori 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13
/*
 * Virtio Network Device
 *
 * Copyright IBM, Corp. 2007
 *
 * Authors:
 *  Anthony Liguori   <aliguori@us.ibm.com>
 *
 * This work is licensed under the terms of the GNU GPL, version 2.  See
 * the COPYING file in the top-level directory.
 *
 */

14
#include "iov.h"
A
aliguori 已提交
15 16
#include "virtio.h"
#include "net.h"
17
#include "net/checksum.h"
18
#include "net/tap.h"
19
#include "qemu-error.h"
A
aliguori 已提交
20 21
#include "qemu-timer.h"
#include "virtio-net.h"
22
#include "vhost_net.h"
A
aliguori 已提交
23

24
#define VIRTIO_NET_VM_VERSION    11
25

26
#define MAC_TABLE_ENTRIES    64
27
#define MAX_VLAN    (1 << 12)   /* Per 802.1Q definition */
28

A
aliguori 已提交
29 30 31
typedef struct VirtIONet
{
    VirtIODevice vdev;
32
    uint8_t mac[ETH_ALEN];
33
    uint16_t status;
A
aliguori 已提交
34 35
    VirtQueue *rx_vq;
    VirtQueue *tx_vq;
36
    VirtQueue *ctrl_vq;
M
Mark McLoughlin 已提交
37
    NICState *nic;
A
aliguori 已提交
38
    QEMUTimer *tx_timer;
39
    QEMUBH *tx_bh;
40
    uint32_t tx_timeout;
41
    int32_t tx_burst;
42
    int tx_waiting;
M
Mark McLoughlin 已提交
43
    uint32_t has_vnet_hdr;
44 45
    size_t host_hdr_len;
    size_t guest_hdr_len;
46
    uint8_t has_ufo;
47 48 49 50
    struct {
        VirtQueueElement elem;
        ssize_t len;
    } async_tx;
A
aliguori 已提交
51
    int mergeable_rx_bufs;
52 53
    uint8_t promisc;
    uint8_t allmulti;
54 55 56 57
    uint8_t alluni;
    uint8_t nomulti;
    uint8_t nouni;
    uint8_t nobcast;
58
    uint8_t vhost_started;
59 60
    struct {
        int in_use;
61
        int first_multi;
62 63
        uint8_t multi_overflow;
        uint8_t uni_overflow;
64 65
        uint8_t *macs;
    } mac_table;
66
    uint32_t *vlans;
67
    DeviceState *qdev;
A
aliguori 已提交
68 69 70 71 72 73 74 75 76 77 78
} VirtIONet;

/* TODO
 * - we could suppress RX interrupt if we were so inclined.
 */

static VirtIONet *to_virtio_net(VirtIODevice *vdev)
{
    return (VirtIONet *)vdev;
}

79
static void virtio_net_get_config(VirtIODevice *vdev, uint8_t *config)
A
aliguori 已提交
80 81 82 83
{
    VirtIONet *n = to_virtio_net(vdev);
    struct virtio_net_config netcfg;

84
    stw_p(&netcfg.status, n->status);
85
    memcpy(netcfg.mac, n->mac, ETH_ALEN);
A
aliguori 已提交
86 87 88
    memcpy(config, &netcfg, sizeof(netcfg));
}

89 90 91 92 93 94 95
static void virtio_net_set_config(VirtIODevice *vdev, const uint8_t *config)
{
    VirtIONet *n = to_virtio_net(vdev);
    struct virtio_net_config netcfg;

    memcpy(&netcfg, config, sizeof(netcfg));

96 97
    if (memcmp(netcfg.mac, n->mac, ETH_ALEN)) {
        memcpy(n->mac, netcfg.mac, ETH_ALEN);
M
Mark McLoughlin 已提交
98
        qemu_format_nic_info_str(&n->nic->nc, n->mac);
99 100 101
    }
}

102 103 104
static bool virtio_net_started(VirtIONet *n, uint8_t status)
{
    return (status & VIRTIO_CONFIG_S_DRIVER_OK) &&
105
        (n->status & VIRTIO_NET_S_LINK_UP) && n->vdev.vm_running;
106 107 108
}

static void virtio_net_vhost_status(VirtIONet *n, uint8_t status)
109 110 111 112
{
    if (!n->nic->nc.peer) {
        return;
    }
113
    if (n->nic->nc.peer->info->type != NET_CLIENT_OPTIONS_KIND_TAP) {
114 115 116 117 118 119
        return;
    }

    if (!tap_get_vhost_net(n->nic->nc.peer)) {
        return;
    }
120 121
    if (!!n->vhost_started == virtio_net_started(n, status) &&
                              !n->nic->nc.peer->link_down) {
122 123 124
        return;
    }
    if (!n->vhost_started) {
125 126 127 128 129
        int r;
        if (!vhost_net_query(tap_get_vhost_net(n->nic->nc.peer), &n->vdev)) {
            return;
        }
        r = vhost_net_start(tap_get_vhost_net(n->nic->nc.peer), &n->vdev);
130
        if (r < 0) {
131 132
            error_report("unable to start vhost net: %d: "
                         "falling back on userspace virtio", -r);
133 134 135 136 137 138 139 140 141
        } else {
            n->vhost_started = 1;
        }
    } else {
        vhost_net_stop(tap_get_vhost_net(n->nic->nc.peer), &n->vdev);
        n->vhost_started = 0;
    }
}

142 143 144 145 146 147 148 149 150 151 152 153 154
static void virtio_net_set_status(struct VirtIODevice *vdev, uint8_t status)
{
    VirtIONet *n = to_virtio_net(vdev);

    virtio_net_vhost_status(n, status);

    if (!n->tx_waiting) {
        return;
    }

    if (virtio_net_started(n, status) && !n->vhost_started) {
        if (n->tx_timer) {
            qemu_mod_timer(n->tx_timer,
155
                           qemu_get_clock_ns(vm_clock) + n->tx_timeout);
156 157 158 159 160 161 162 163 164 165 166 167
        } else {
            qemu_bh_schedule(n->tx_bh);
        }
    } else {
        if (n->tx_timer) {
            qemu_del_timer(n->tx_timer);
        } else {
            qemu_bh_cancel(n->tx_bh);
        }
    }
}

168
static void virtio_net_set_link_status(NetClientState *nc)
169
{
M
Mark McLoughlin 已提交
170
    VirtIONet *n = DO_UPCAST(NICState, nc, nc)->opaque;
171 172
    uint16_t old_status = n->status;

M
Mark McLoughlin 已提交
173
    if (nc->link_down)
174 175 176 177 178 179
        n->status &= ~VIRTIO_NET_S_LINK_UP;
    else
        n->status |= VIRTIO_NET_S_LINK_UP;

    if (n->status != old_status)
        virtio_notify_config(&n->vdev);
180 181

    virtio_net_set_status(&n->vdev, n->vdev.status);
182 183
}

184 185 186 187 188 189 190
static void virtio_net_reset(VirtIODevice *vdev)
{
    VirtIONet *n = to_virtio_net(vdev);

    /* Reset back to compatibility mode */
    n->promisc = 1;
    n->allmulti = 0;
191 192 193 194
    n->alluni = 0;
    n->nomulti = 0;
    n->nouni = 0;
    n->nobcast = 0;
195

196
    /* Flush any MAC and VLAN filter table state */
197
    n->mac_table.in_use = 0;
198
    n->mac_table.first_multi = 0;
199 200
    n->mac_table.multi_overflow = 0;
    n->mac_table.uni_overflow = 0;
201
    memset(n->mac_table.macs, 0, MAC_TABLE_ENTRIES * ETH_ALEN);
202
    memset(n->vlans, 0, MAX_VLAN >> 3);
203 204
}

M
Mark McLoughlin 已提交
205 206
static int peer_has_vnet_hdr(VirtIONet *n)
{
M
Mark McLoughlin 已提交
207
    if (!n->nic->nc.peer)
M
Mark McLoughlin 已提交
208 209
        return 0;

210
    if (n->nic->nc.peer->info->type != NET_CLIENT_OPTIONS_KIND_TAP)
M
Mark McLoughlin 已提交
211 212
        return 0;

M
Mark McLoughlin 已提交
213
    n->has_vnet_hdr = tap_has_vnet_hdr(n->nic->nc.peer);
M
Mark McLoughlin 已提交
214 215 216 217

    return n->has_vnet_hdr;
}

218 219 220 221 222
static int peer_has_ufo(VirtIONet *n)
{
    if (!peer_has_vnet_hdr(n))
        return 0;

M
Mark McLoughlin 已提交
223
    n->has_ufo = tap_has_ufo(n->nic->nc.peer);
224 225 226 227

    return n->has_ufo;
}

228
static uint32_t virtio_net_get_features(VirtIODevice *vdev, uint32_t features)
A
aliguori 已提交
229
{
M
Mark McLoughlin 已提交
230
    VirtIONet *n = to_virtio_net(vdev);
A
aliguori 已提交
231

232 233
    features |= (1 << VIRTIO_NET_F_MAC);

M
Mark McLoughlin 已提交
234
    if (peer_has_vnet_hdr(n)) {
M
Mark McLoughlin 已提交
235
        tap_using_vnet_hdr(n->nic->nc.peer, 1);
236
        n->host_hdr_len = sizeof(struct virtio_net_hdr);
237 238 239 240 241 242 243 244 245 246 247
    } else {
        features &= ~(0x1 << VIRTIO_NET_F_CSUM);
        features &= ~(0x1 << VIRTIO_NET_F_HOST_TSO4);
        features &= ~(0x1 << VIRTIO_NET_F_HOST_TSO6);
        features &= ~(0x1 << VIRTIO_NET_F_HOST_ECN);

        features &= ~(0x1 << VIRTIO_NET_F_GUEST_CSUM);
        features &= ~(0x1 << VIRTIO_NET_F_GUEST_TSO4);
        features &= ~(0x1 << VIRTIO_NET_F_GUEST_TSO6);
        features &= ~(0x1 << VIRTIO_NET_F_GUEST_ECN);
    }
M
Mark McLoughlin 已提交
248

249 250 251
    if (!peer_has_vnet_hdr(n) || !peer_has_ufo(n)) {
        features &= ~(0x1 << VIRTIO_NET_F_GUEST_UFO);
        features &= ~(0x1 << VIRTIO_NET_F_HOST_UFO);
M
Mark McLoughlin 已提交
252 253
    }

254
    if (!n->nic->nc.peer ||
255
        n->nic->nc.peer->info->type != NET_CLIENT_OPTIONS_KIND_TAP) {
256 257 258 259 260 261
        return features;
    }
    if (!tap_get_vhost_net(n->nic->nc.peer)) {
        return features;
    }
    return vhost_net_get_features(tap_get_vhost_net(n->nic->nc.peer), features);
A
aliguori 已提交
262 263
}

264 265 266 267 268 269 270
static uint32_t virtio_net_bad_features(VirtIODevice *vdev)
{
    uint32_t features = 0;

    /* Linux kernel 2.6.25.  It understood MAC (as everyone must),
     * but also these: */
    features |= (1 << VIRTIO_NET_F_MAC);
271 272 273 274
    features |= (1 << VIRTIO_NET_F_CSUM);
    features |= (1 << VIRTIO_NET_F_HOST_TSO4);
    features |= (1 << VIRTIO_NET_F_HOST_TSO6);
    features |= (1 << VIRTIO_NET_F_HOST_ECN);
275

276
    return features;
277 278
}

A
aliguori 已提交
279 280 281 282 283
static void virtio_net_set_features(VirtIODevice *vdev, uint32_t features)
{
    VirtIONet *n = to_virtio_net(vdev);

    n->mergeable_rx_bufs = !!(features & (1 << VIRTIO_NET_F_MRG_RXBUF));
284 285
    n->guest_hdr_len = n->mergeable_rx_bufs ?
        sizeof(struct virtio_net_hdr_mrg_rxbuf) : sizeof(struct virtio_net_hdr);
286 287

    if (n->has_vnet_hdr) {
M
Mark McLoughlin 已提交
288
        tap_set_offload(n->nic->nc.peer,
289 290 291
                        (features >> VIRTIO_NET_F_GUEST_CSUM) & 1,
                        (features >> VIRTIO_NET_F_GUEST_TSO4) & 1,
                        (features >> VIRTIO_NET_F_GUEST_TSO6) & 1,
292 293
                        (features >> VIRTIO_NET_F_GUEST_ECN)  & 1,
                        (features >> VIRTIO_NET_F_GUEST_UFO)  & 1);
294
    }
D
David L Stevens 已提交
295
    if (!n->nic->nc.peer ||
296
        n->nic->nc.peer->info->type != NET_CLIENT_OPTIONS_KIND_TAP) {
D
David L Stevens 已提交
297 298 299 300 301
        return;
    }
    if (!tap_get_vhost_net(n->nic->nc.peer)) {
        return;
    }
302
    vhost_net_ack_features(tap_get_vhost_net(n->nic->nc.peer), features);
A
aliguori 已提交
303 304
}

305 306 307 308 309 310
static int virtio_net_handle_rx_mode(VirtIONet *n, uint8_t cmd,
                                     VirtQueueElement *elem)
{
    uint8_t on;

    if (elem->out_num != 2 || elem->out_sg[1].iov_len != sizeof(on)) {
311
        error_report("virtio-net ctrl invalid rx mode command");
312 313 314 315 316 317 318 319 320
        exit(1);
    }

    on = ldub_p(elem->out_sg[1].iov_base);

    if (cmd == VIRTIO_NET_CTRL_RX_MODE_PROMISC)
        n->promisc = on;
    else if (cmd == VIRTIO_NET_CTRL_RX_MODE_ALLMULTI)
        n->allmulti = on;
321 322 323 324 325 326 327 328
    else if (cmd == VIRTIO_NET_CTRL_RX_MODE_ALLUNI)
        n->alluni = on;
    else if (cmd == VIRTIO_NET_CTRL_RX_MODE_NOMULTI)
        n->nomulti = on;
    else if (cmd == VIRTIO_NET_CTRL_RX_MODE_NOUNI)
        n->nouni = on;
    else if (cmd == VIRTIO_NET_CTRL_RX_MODE_NOBCAST)
        n->nobcast = on;
329 330 331 332 333 334
    else
        return VIRTIO_NET_ERR;

    return VIRTIO_NET_OK;
}

335 336 337 338 339 340 341 342 343 344 345
static int virtio_net_handle_mac(VirtIONet *n, uint8_t cmd,
                                 VirtQueueElement *elem)
{
    struct virtio_net_ctrl_mac mac_data;

    if (cmd != VIRTIO_NET_CTRL_MAC_TABLE_SET || elem->out_num != 3 ||
        elem->out_sg[1].iov_len < sizeof(mac_data) ||
        elem->out_sg[2].iov_len < sizeof(mac_data))
        return VIRTIO_NET_ERR;

    n->mac_table.in_use = 0;
346
    n->mac_table.first_multi = 0;
347 348
    n->mac_table.uni_overflow = 0;
    n->mac_table.multi_overflow = 0;
349 350
    memset(n->mac_table.macs, 0, MAC_TABLE_ENTRIES * ETH_ALEN);

351
    mac_data.entries = ldl_p(elem->out_sg[1].iov_base);
352 353 354 355 356 357 358 359 360 361

    if (sizeof(mac_data.entries) +
        (mac_data.entries * ETH_ALEN) > elem->out_sg[1].iov_len)
        return VIRTIO_NET_ERR;

    if (mac_data.entries <= MAC_TABLE_ENTRIES) {
        memcpy(n->mac_table.macs, elem->out_sg[1].iov_base + sizeof(mac_data),
               mac_data.entries * ETH_ALEN);
        n->mac_table.in_use += mac_data.entries;
    } else {
362
        n->mac_table.uni_overflow = 1;
363 364
    }

365 366
    n->mac_table.first_multi = n->mac_table.in_use;

367
    mac_data.entries = ldl_p(elem->out_sg[2].iov_base);
368 369 370 371 372 373 374 375 376 377 378

    if (sizeof(mac_data.entries) +
        (mac_data.entries * ETH_ALEN) > elem->out_sg[2].iov_len)
        return VIRTIO_NET_ERR;

    if (mac_data.entries) {
        if (n->mac_table.in_use + mac_data.entries <= MAC_TABLE_ENTRIES) {
            memcpy(n->mac_table.macs + (n->mac_table.in_use * ETH_ALEN),
                   elem->out_sg[2].iov_base + sizeof(mac_data),
                   mac_data.entries * ETH_ALEN);
            n->mac_table.in_use += mac_data.entries;
379 380 381
        } else {
            n->mac_table.multi_overflow = 1;
        }
382 383 384 385 386
    }

    return VIRTIO_NET_OK;
}

387 388 389 390 391 392
static int virtio_net_handle_vlan_table(VirtIONet *n, uint8_t cmd,
                                        VirtQueueElement *elem)
{
    uint16_t vid;

    if (elem->out_num != 2 || elem->out_sg[1].iov_len != sizeof(vid)) {
393
        error_report("virtio-net ctrl invalid vlan command");
394 395 396
        return VIRTIO_NET_ERR;
    }

397
    vid = lduw_p(elem->out_sg[1].iov_base);
398 399 400 401 402 403 404 405 406 407 408 409 410 411

    if (vid >= MAX_VLAN)
        return VIRTIO_NET_ERR;

    if (cmd == VIRTIO_NET_CTRL_VLAN_ADD)
        n->vlans[vid >> 5] |= (1U << (vid & 0x1f));
    else if (cmd == VIRTIO_NET_CTRL_VLAN_DEL)
        n->vlans[vid >> 5] &= ~(1U << (vid & 0x1f));
    else
        return VIRTIO_NET_ERR;

    return VIRTIO_NET_OK;
}

412 413
static void virtio_net_handle_ctrl(VirtIODevice *vdev, VirtQueue *vq)
{
414
    VirtIONet *n = to_virtio_net(vdev);
415 416 417 418 419 420
    struct virtio_net_ctrl_hdr ctrl;
    virtio_net_ctrl_ack status = VIRTIO_NET_ERR;
    VirtQueueElement elem;

    while (virtqueue_pop(vq, &elem)) {
        if ((elem.in_num < 1) || (elem.out_num < 1)) {
421
            error_report("virtio-net ctrl missing headers");
422 423 424 425
            exit(1);
        }

        if (elem.out_sg[0].iov_len < sizeof(ctrl) ||
426
            elem.in_sg[elem.in_num - 1].iov_len < sizeof(status)) {
427
            error_report("virtio-net ctrl header not in correct element");
428 429 430 431 432 433
            exit(1);
        }

        ctrl.class = ldub_p(elem.out_sg[0].iov_base);
        ctrl.cmd = ldub_p(elem.out_sg[0].iov_base + sizeof(ctrl.class));

434 435
        if (ctrl.class == VIRTIO_NET_CTRL_RX_MODE)
            status = virtio_net_handle_rx_mode(n, ctrl.cmd, &elem);
436 437
        else if (ctrl.class == VIRTIO_NET_CTRL_MAC)
            status = virtio_net_handle_mac(n, ctrl.cmd, &elem);
438 439
        else if (ctrl.class == VIRTIO_NET_CTRL_VLAN)
            status = virtio_net_handle_vlan_table(n, ctrl.cmd, &elem);
440

441 442 443 444 445 446 447
        stb_p(elem.in_sg[elem.in_num - 1].iov_base, status);

        virtqueue_push(vq, &elem, sizeof(status));
        virtio_notify(vdev, vq);
    }
}

A
aliguori 已提交
448 449 450 451
/* RX */

static void virtio_net_handle_rx(VirtIODevice *vdev, VirtQueue *vq)
{
452 453
    VirtIONet *n = to_virtio_net(vdev);

M
Mark McLoughlin 已提交
454
    qemu_flush_queued_packets(&n->nic->nc);
A
aliguori 已提交
455 456
}

457
static int virtio_net_can_receive(NetClientState *nc)
A
aliguori 已提交
458
{
M
Mark McLoughlin 已提交
459
    VirtIONet *n = DO_UPCAST(NICState, nc, nc)->opaque;
460
    if (!n->vdev.vm_running) {
461 462
        return 0;
    }
463

A
aliguori 已提交
464 465 466 467
    if (!virtio_queue_ready(n->rx_vq) ||
        !(n->vdev.status & VIRTIO_CONFIG_S_DRIVER_OK))
        return 0;

468 469 470 471 472
    return 1;
}

static int virtio_net_has_buffers(VirtIONet *n, int bufsize)
{
A
aliguori 已提交
473 474 475 476
    if (virtio_queue_empty(n->rx_vq) ||
        (n->mergeable_rx_bufs &&
         !virtqueue_avail_bytes(n->rx_vq, bufsize, 0))) {
        virtio_queue_set_notification(n->rx_vq, 1);
477 478 479 480 481 482 483 484 485

        /* To avoid a race condition where the guest has made some buffers
         * available after the above check but before notification was
         * enabled, check for available buffers again.
         */
        if (virtio_queue_empty(n->rx_vq) ||
            (n->mergeable_rx_bufs &&
             !virtqueue_avail_bytes(n->rx_vq, bufsize, 0)))
            return 0;
A
aliguori 已提交
486 487 488 489 490 491
    }

    virtio_queue_set_notification(n->rx_vq, 0);
    return 1;
}

A
Anthony Liguori 已提交
492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519
/* dhclient uses AF_PACKET but doesn't pass auxdata to the kernel so
 * it never finds out that the packets don't have valid checksums.  This
 * causes dhclient to get upset.  Fedora's carried a patch for ages to
 * fix this with Xen but it hasn't appeared in an upstream release of
 * dhclient yet.
 *
 * To avoid breaking existing guests, we catch udp packets and add
 * checksums.  This is terrible but it's better than hacking the guest
 * kernels.
 *
 * N.B. if we introduce a zero-copy API, this operation is no longer free so
 * we should provide a mechanism to disable it to avoid polluting the host
 * cache.
 */
static void work_around_broken_dhclient(struct virtio_net_hdr *hdr,
                                        const uint8_t *buf, size_t size)
{
    if ((hdr->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM) && /* missing csum */
        (size > 27 && size < 1500) && /* normal sized MTU */
        (buf[12] == 0x08 && buf[13] == 0x00) && /* ethertype == IPv4 */
        (buf[23] == 17) && /* ip.protocol == UDP */
        (buf[34] == 0 && buf[35] == 67)) { /* udp.srcport == bootps */
        /* FIXME this cast is evil */
        net_checksum_calculate((uint8_t *)buf, size);
        hdr->flags &= ~VIRTIO_NET_HDR_F_NEEDS_CSUM;
    }
}

A
aliguori 已提交
520
static int receive_header(VirtIONet *n, struct iovec *iov, int iovcnt,
A
aliguori 已提交
521
                          const void *buf, size_t size, size_t hdr_len)
A
aliguori 已提交
522
{
523
    struct virtio_net_hdr *hdr = (struct virtio_net_hdr *)iov[0].iov_base;
A
aliguori 已提交
524 525 526 527 528
    int offset = 0;

    hdr->flags = 0;
    hdr->gso_type = VIRTIO_NET_HDR_GSO_NONE;

M
Mark McLoughlin 已提交
529 530 531
    if (n->has_vnet_hdr) {
        memcpy(hdr, buf, sizeof(*hdr));
        offset = sizeof(*hdr);
A
Anthony Liguori 已提交
532
        work_around_broken_dhclient(hdr, buf + offset, size - offset);
M
Mark McLoughlin 已提交
533 534
    }

A
aliguori 已提交
535 536 537 538 539 540 541 542 543
    /* We only ever receive a struct virtio_net_hdr from the tapfd,
     * but we may be passing along a larger header to the guest.
     */
    iov[0].iov_base += hdr_len;
    iov[0].iov_len  -= hdr_len;

    return offset;
}

544 545 546
static int receive_filter(VirtIONet *n, const uint8_t *buf, int size)
{
    static const uint8_t bcast[] = {0xff, 0xff, 0xff, 0xff, 0xff, 0xff};
547
    static const uint8_t vlan[] = {0x81, 0x00};
548
    uint8_t *ptr = (uint8_t *)buf;
549
    int i;
550 551 552 553

    if (n->promisc)
        return 1;

M
Mark McLoughlin 已提交
554 555 556 557
    if (n->has_vnet_hdr) {
        ptr += sizeof(struct virtio_net_hdr);
    }

558 559 560 561 562 563
    if (!memcmp(&ptr[12], vlan, sizeof(vlan))) {
        int vid = be16_to_cpup((uint16_t *)(ptr + 14)) & 0xfff;
        if (!(n->vlans[vid >> 5] & (1U << (vid & 0x1f))))
            return 0;
    }

564 565
    if (ptr[0] & 1) { // multicast
        if (!memcmp(ptr, bcast, sizeof(bcast))) {
566 567 568
            return !n->nobcast;
        } else if (n->nomulti) {
            return 0;
569
        } else if (n->allmulti || n->mac_table.multi_overflow) {
570 571
            return 1;
        }
572 573 574 575 576 577

        for (i = n->mac_table.first_multi; i < n->mac_table.in_use; i++) {
            if (!memcmp(ptr, &n->mac_table.macs[i * ETH_ALEN], ETH_ALEN)) {
                return 1;
            }
        }
578
    } else { // unicast
579 580 581
        if (n->nouni) {
            return 0;
        } else if (n->alluni || n->mac_table.uni_overflow) {
582 583
            return 1;
        } else if (!memcmp(ptr, n->mac, ETH_ALEN)) {
584 585
            return 1;
        }
586

587 588 589 590 591
        for (i = 0; i < n->mac_table.first_multi; i++) {
            if (!memcmp(ptr, &n->mac_table.macs[i * ETH_ALEN], ETH_ALEN)) {
                return 1;
            }
        }
592 593
    }

594 595 596
    return 0;
}

597
static ssize_t virtio_net_receive(NetClientState *nc, const uint8_t *buf, size_t size)
A
aliguori 已提交
598
{
M
Mark McLoughlin 已提交
599
    VirtIONet *n = DO_UPCAST(NICState, nc, nc)->opaque;
A
aliguori 已提交
600
    struct virtio_net_hdr_mrg_rxbuf *mhdr = NULL;
601
    size_t offset, i;
A
aliguori 已提交
602

M
Mark McLoughlin 已提交
603
    if (!virtio_net_can_receive(&n->nic->nc))
604 605
        return -1;

606
    /* hdr_len refers to the header we supply to the guest */
607
    if (!virtio_net_has_buffers(n, size + n->guest_hdr_len - n->host_hdr_len))
608
        return 0;
A
aliguori 已提交
609

610
    if (!receive_filter(n, buf, size))
611
        return size;
612

A
aliguori 已提交
613 614 615 616 617 618 619
    offset = i = 0;

    while (offset < size) {
        VirtQueueElement elem;
        int len, total;
        struct iovec sg[VIRTQUEUE_MAX_SIZE];

A
Amit Shah 已提交
620
        total = 0;
A
aliguori 已提交
621

622
        if (virtqueue_pop(n->rx_vq, &elem) == 0) {
A
aliguori 已提交
623
            if (i == 0)
624
                return -1;
625
            error_report("virtio-net unexpected empty queue: "
626
                    "i %zd mergeable %d offset %zd, size %zd, "
627
                    "guest hdr len %zd, host hdr len %zd guest features 0x%x",
628
                    i, n->mergeable_rx_bufs, offset, size,
629
                    n->guest_hdr_len, n->host_hdr_len, n->vdev.guest_features);
A
aliguori 已提交
630 631 632 633
            exit(1);
        }

        if (elem.in_num < 1) {
634
            error_report("virtio-net receive queue contains no in buffers");
A
aliguori 已提交
635 636 637
            exit(1);
        }

638
        if (!n->mergeable_rx_bufs && elem.in_sg[0].iov_len != n->guest_hdr_len) {
639
            error_report("virtio-net header not in first element");
A
aliguori 已提交
640 641 642 643 644 645 646 647 648 649
            exit(1);
        }

        memcpy(&sg, &elem.in_sg[0], sizeof(sg[0]) * elem.in_num);

        if (i == 0) {
            if (n->mergeable_rx_bufs)
                mhdr = (struct virtio_net_hdr_mrg_rxbuf *)sg[0].iov_base;

            offset += receive_header(n, sg, elem.in_num,
650 651 652
                                     buf + offset, size - offset,
                                     n->guest_hdr_len);
            total += n->guest_hdr_len;
A
aliguori 已提交
653 654 655
        }

        /* copy in packet.  ugh */
656 657
        len = iov_from_buf(sg, elem.in_num, 0,
                           buf + offset, size - offset);
A
aliguori 已提交
658
        total += len;
659 660 661 662 663 664
        offset += len;
        /* If buffers can't be merged, at this point we
         * must have consumed the complete packet.
         * Otherwise, drop it. */
        if (!n->mergeable_rx_bufs && offset < size) {
#if 0
665 666 667 668
            error_report("virtio-net truncated non-mergeable packet: "
                         "i %zd mergeable %d offset %zd, size %zd, "
                         "guest hdr len %zd, host hdr len %zd",
                         i, n->mergeable_rx_bufs,
669
                         offset, size, n->guest_hdr_len, n->host_hdr_len);
670 671 672
#endif
            return size;
        }
A
aliguori 已提交
673 674 675 676 677

        /* signal other side */
        virtqueue_fill(n->rx_vq, &elem, total, i++);
    }

678
    if (mhdr) {
679
        stw_p(&mhdr->num_buffers, i);
680
    }
A
aliguori 已提交
681 682 683

    virtqueue_flush(n->rx_vq, i);
    virtio_notify(&n->vdev, n->rx_vq);
684 685

    return size;
A
aliguori 已提交
686 687
}

688
static int32_t virtio_net_flush_tx(VirtIONet *n, VirtQueue *vq);
689

690
static void virtio_net_tx_complete(NetClientState *nc, ssize_t len)
691
{
M
Mark McLoughlin 已提交
692
    VirtIONet *n = DO_UPCAST(NICState, nc, nc)->opaque;
693

694
    virtqueue_push(n->tx_vq, &n->async_tx.elem, 0);
695 696 697 698 699 700 701 702
    virtio_notify(&n->vdev, n->tx_vq);

    n->async_tx.elem.out_num = n->async_tx.len = 0;

    virtio_queue_set_notification(n->tx_vq, 1);
    virtio_net_flush_tx(n, n->tx_vq);
}

A
aliguori 已提交
703
/* TX */
704
static int32_t virtio_net_flush_tx(VirtIONet *n, VirtQueue *vq)
A
aliguori 已提交
705 706
{
    VirtQueueElement elem;
707 708 709 710
    int32_t num_packets = 0;
    if (!(n->vdev.status & VIRTIO_CONFIG_S_DRIVER_OK)) {
        return num_packets;
    }
A
aliguori 已提交
711

712
    assert(n->vdev.vm_running);
713

714 715
    if (n->async_tx.elem.out_num) {
        virtio_queue_set_notification(n->tx_vq, 0);
716
        return num_packets;
717 718
    }

A
aliguori 已提交
719
    while (virtqueue_pop(vq, &elem)) {
720
        ssize_t ret, len = 0;
A
aliguori 已提交
721 722 723 724 725 726 727 728 729 730
        unsigned int out_num = elem.out_num;
        struct iovec *out_sg = &elem.out_sg[0];
        unsigned hdr_len;

        /* hdr_len refers to the header received from the guest */
        hdr_len = n->mergeable_rx_bufs ?
            sizeof(struct virtio_net_hdr_mrg_rxbuf) :
            sizeof(struct virtio_net_hdr);

        if (out_num < 1 || out_sg->iov_len != hdr_len) {
731
            error_report("virtio-net header not in first element");
A
aliguori 已提交
732 733 734 735
            exit(1);
        }

        /* ignore the header if GSO is not supported */
M
Mark McLoughlin 已提交
736
        if (!n->has_vnet_hdr) {
A
aliguori 已提交
737 738 739 740 741 742 743 744 745 746
            out_num--;
            out_sg++;
            len += hdr_len;
        } else if (n->mergeable_rx_bufs) {
            /* tapfd expects a struct virtio_net_hdr */
            hdr_len -= sizeof(struct virtio_net_hdr);
            out_sg->iov_len -= hdr_len;
            len += hdr_len;
        }

M
Mark McLoughlin 已提交
747
        ret = qemu_sendv_packet_async(&n->nic->nc, out_sg, out_num,
748 749 750 751 752
                                      virtio_net_tx_complete);
        if (ret == 0) {
            virtio_queue_set_notification(n->tx_vq, 0);
            n->async_tx.elem = elem;
            n->async_tx.len  = len;
753
            return -EBUSY;
754 755 756
        }

        len += ret;
A
aliguori 已提交
757

758
        virtqueue_push(vq, &elem, 0);
A
aliguori 已提交
759
        virtio_notify(&n->vdev, vq);
760 761 762 763

        if (++num_packets >= n->tx_burst) {
            break;
        }
A
aliguori 已提交
764
    }
765
    return num_packets;
A
aliguori 已提交
766 767
}

768
static void virtio_net_handle_tx_timer(VirtIODevice *vdev, VirtQueue *vq)
A
aliguori 已提交
769 770 771
{
    VirtIONet *n = to_virtio_net(vdev);

772
    /* This happens when device was stopped but VCPU wasn't. */
773
    if (!n->vdev.vm_running) {
774 775 776 777
        n->tx_waiting = 1;
        return;
    }

778
    if (n->tx_waiting) {
A
aliguori 已提交
779 780
        virtio_queue_set_notification(vq, 1);
        qemu_del_timer(n->tx_timer);
781
        n->tx_waiting = 0;
A
aliguori 已提交
782 783 784
        virtio_net_flush_tx(n, vq);
    } else {
        qemu_mod_timer(n->tx_timer,
785
                       qemu_get_clock_ns(vm_clock) + n->tx_timeout);
786
        n->tx_waiting = 1;
A
aliguori 已提交
787 788 789 790
        virtio_queue_set_notification(vq, 0);
    }
}

791 792 793 794 795 796 797
static void virtio_net_handle_tx_bh(VirtIODevice *vdev, VirtQueue *vq)
{
    VirtIONet *n = to_virtio_net(vdev);

    if (unlikely(n->tx_waiting)) {
        return;
    }
798 799
    n->tx_waiting = 1;
    /* This happens when device was stopped but VCPU wasn't. */
800
    if (!n->vdev.vm_running) {
801 802
        return;
    }
803 804 805 806
    virtio_queue_set_notification(vq, 0);
    qemu_bh_schedule(n->tx_bh);
}

A
aliguori 已提交
807 808 809
static void virtio_net_tx_timer(void *opaque)
{
    VirtIONet *n = opaque;
810
    assert(n->vdev.vm_running);
A
aliguori 已提交
811

812
    n->tx_waiting = 0;
A
aliguori 已提交
813 814 815 816 817 818 819 820 821

    /* Just in case the driver is not ready on more */
    if (!(n->vdev.status & VIRTIO_CONFIG_S_DRIVER_OK))
        return;

    virtio_queue_set_notification(n->tx_vq, 1);
    virtio_net_flush_tx(n, n->tx_vq);
}

822 823 824 825 826
static void virtio_net_tx_bh(void *opaque)
{
    VirtIONet *n = opaque;
    int32_t ret;

827
    assert(n->vdev.vm_running);
828

829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858
    n->tx_waiting = 0;

    /* Just in case the driver is not ready on more */
    if (unlikely(!(n->vdev.status & VIRTIO_CONFIG_S_DRIVER_OK)))
        return;

    ret = virtio_net_flush_tx(n, n->tx_vq);
    if (ret == -EBUSY) {
        return; /* Notification re-enable handled by tx_complete */
    }

    /* If we flush a full burst of packets, assume there are
     * more coming and immediately reschedule */
    if (ret >= n->tx_burst) {
        qemu_bh_schedule(n->tx_bh);
        n->tx_waiting = 1;
        return;
    }

    /* If less than a full burst, re-enable notification and flush
     * anything that may have come in while we weren't looking.  If
     * we find something, assume the guest is still active and reschedule */
    virtio_queue_set_notification(n->tx_vq, 1);
    if (virtio_net_flush_tx(n, n->tx_vq) > 0) {
        virtio_queue_set_notification(n->tx_vq, 0);
        qemu_bh_schedule(n->tx_bh);
        n->tx_waiting = 1;
    }
}

A
aliguori 已提交
859 860 861 862
static void virtio_net_save(QEMUFile *f, void *opaque)
{
    VirtIONet *n = opaque;

863 864 865
    /* At this point, backend must be stopped, otherwise
     * it might keep writing to memory. */
    assert(!n->vhost_started);
A
aliguori 已提交
866 867
    virtio_save(&n->vdev, f);

868
    qemu_put_buffer(f, n->mac, ETH_ALEN);
869
    qemu_put_be32(f, n->tx_waiting);
870
    qemu_put_be32(f, n->mergeable_rx_bufs);
871
    qemu_put_be16(f, n->status);
872 873
    qemu_put_byte(f, n->promisc);
    qemu_put_byte(f, n->allmulti);
874 875
    qemu_put_be32(f, n->mac_table.in_use);
    qemu_put_buffer(f, n->mac_table.macs, n->mac_table.in_use * ETH_ALEN);
876
    qemu_put_buffer(f, (uint8_t *)n->vlans, MAX_VLAN >> 3);
M
Mark McLoughlin 已提交
877
    qemu_put_be32(f, n->has_vnet_hdr);
878 879
    qemu_put_byte(f, n->mac_table.multi_overflow);
    qemu_put_byte(f, n->mac_table.uni_overflow);
880 881 882 883
    qemu_put_byte(f, n->alluni);
    qemu_put_byte(f, n->nomulti);
    qemu_put_byte(f, n->nouni);
    qemu_put_byte(f, n->nobcast);
884
    qemu_put_byte(f, n->has_ufo);
A
aliguori 已提交
885 886 887 888 889
}

static int virtio_net_load(QEMUFile *f, void *opaque, int version_id)
{
    VirtIONet *n = opaque;
890
    int i;
891
    int ret;
A
aliguori 已提交
892

893
    if (version_id < 2 || version_id > VIRTIO_NET_VM_VERSION)
A
aliguori 已提交
894 895
        return -EINVAL;

896 897 898 899
    ret = virtio_load(&n->vdev, f);
    if (ret) {
        return ret;
    }
A
aliguori 已提交
900

901
    qemu_get_buffer(f, n->mac, ETH_ALEN);
902
    n->tx_waiting = qemu_get_be32(f);
903
    n->mergeable_rx_bufs = qemu_get_be32(f);
904 905
    n->guest_hdr_len = n->mergeable_rx_bufs ?
        sizeof(struct virtio_net_hdr_mrg_rxbuf) : sizeof(struct virtio_net_hdr);
A
aliguori 已提交
906

907 908 909
    if (version_id >= 3)
        n->status = qemu_get_be16(f);

910
    if (version_id >= 4) {
911 912 913 914 915 916 917
        if (version_id < 8) {
            n->promisc = qemu_get_be32(f);
            n->allmulti = qemu_get_be32(f);
        } else {
            n->promisc = qemu_get_byte(f);
            n->allmulti = qemu_get_byte(f);
        }
918 919
    }

920 921 922 923 924 925 926
    if (version_id >= 5) {
        n->mac_table.in_use = qemu_get_be32(f);
        /* MAC_TABLE_ENTRIES may be different from the saved image */
        if (n->mac_table.in_use <= MAC_TABLE_ENTRIES) {
            qemu_get_buffer(f, n->mac_table.macs,
                            n->mac_table.in_use * ETH_ALEN);
        } else if (n->mac_table.in_use) {
927 928 929
            uint8_t *buf = g_malloc0(n->mac_table.in_use);
            qemu_get_buffer(f, buf, n->mac_table.in_use * ETH_ALEN);
            g_free(buf);
930
            n->mac_table.multi_overflow = n->mac_table.uni_overflow = 1;
931 932 933 934
            n->mac_table.in_use = 0;
        }
    }
 
935 936 937
    if (version_id >= 6)
        qemu_get_buffer(f, (uint8_t *)n->vlans, MAX_VLAN >> 3);

M
Mark McLoughlin 已提交
938 939
    if (version_id >= 7) {
        if (qemu_get_be32(f) && !peer_has_vnet_hdr(n)) {
940
            error_report("virtio-net: saved image requires vnet_hdr=on");
M
Mark McLoughlin 已提交
941 942 943 944
            return -1;
        }

        if (n->has_vnet_hdr) {
M
Mark McLoughlin 已提交
945
            tap_using_vnet_hdr(n->nic->nc.peer, 1);
946
            n->host_hdr_len = sizeof(struct virtio_net_hdr);
M
Mark McLoughlin 已提交
947
            tap_set_offload(n->nic->nc.peer,
948 949 950 951 952
                    (n->vdev.guest_features >> VIRTIO_NET_F_GUEST_CSUM) & 1,
                    (n->vdev.guest_features >> VIRTIO_NET_F_GUEST_TSO4) & 1,
                    (n->vdev.guest_features >> VIRTIO_NET_F_GUEST_TSO6) & 1,
                    (n->vdev.guest_features >> VIRTIO_NET_F_GUEST_ECN)  & 1,
                    (n->vdev.guest_features >> VIRTIO_NET_F_GUEST_UFO)  & 1);
M
Mark McLoughlin 已提交
953
        }
954 955
    }

956 957 958 959 960
    if (version_id >= 9) {
        n->mac_table.multi_overflow = qemu_get_byte(f);
        n->mac_table.uni_overflow = qemu_get_byte(f);
    }

961 962 963 964 965 966 967
    if (version_id >= 10) {
        n->alluni = qemu_get_byte(f);
        n->nomulti = qemu_get_byte(f);
        n->nouni = qemu_get_byte(f);
        n->nobcast = qemu_get_byte(f);
    }

968 969
    if (version_id >= 11) {
        if (qemu_get_byte(f) && !peer_has_ufo(n)) {
970
            error_report("virtio-net: saved image requires TUN_F_UFO support");
971 972 973 974
            return -1;
        }
    }

975 976 977 978 979 980 981
    /* Find the first multicast entry in the saved MAC filter */
    for (i = 0; i < n->mac_table.in_use; i++) {
        if (n->mac_table.macs[i * ETH_ALEN] & 1) {
            break;
        }
    }
    n->mac_table.first_multi = i;
982 983 984 985 986

    /* nc.link_down can't be migrated, so infer link_down according
     * to link status bit in n->status */
    n->nic->nc.link_down = (n->status & VIRTIO_NET_S_LINK_UP) == 0;

A
aliguori 已提交
987 988 989
    return 0;
}

990
static void virtio_net_cleanup(NetClientState *nc)
991
{
M
Mark McLoughlin 已提交
992
    VirtIONet *n = DO_UPCAST(NICState, nc, nc)->opaque;
993

M
Mark McLoughlin 已提交
994
    n->nic = NULL;
995 996
}

M
Mark McLoughlin 已提交
997
static NetClientInfo net_virtio_info = {
998
    .type = NET_CLIENT_OPTIONS_KIND_NIC,
M
Mark McLoughlin 已提交
999 1000 1001 1002 1003 1004 1005
    .size = sizeof(NICState),
    .can_receive = virtio_net_can_receive,
    .receive = virtio_net_receive,
        .cleanup = virtio_net_cleanup,
    .link_status_changed = virtio_net_set_link_status,
};

1006 1007
VirtIODevice *virtio_net_init(DeviceState *dev, NICConf *conf,
                              virtio_net_conf *net)
A
aliguori 已提交
1008 1009 1010
{
    VirtIONet *n;

P
Paul Brook 已提交
1011 1012 1013
    n = (VirtIONet *)virtio_common_init("virtio-net", VIRTIO_ID_NET,
                                        sizeof(struct virtio_net_config),
                                        sizeof(VirtIONet));
A
aliguori 已提交
1014

1015 1016
    n->vdev.get_config = virtio_net_get_config;
    n->vdev.set_config = virtio_net_set_config;
A
aliguori 已提交
1017 1018
    n->vdev.get_features = virtio_net_get_features;
    n->vdev.set_features = virtio_net_set_features;
1019
    n->vdev.bad_features = virtio_net_bad_features;
1020
    n->vdev.reset = virtio_net_reset;
1021
    n->vdev.set_status = virtio_net_set_status;
A
aliguori 已提交
1022
    n->rx_vq = virtio_add_queue(&n->vdev, 256, virtio_net_handle_rx);
1023 1024

    if (net->tx && strcmp(net->tx, "timer") && strcmp(net->tx, "bh")) {
1025 1026 1027 1028
        error_report("virtio-net: "
                     "Unknown option tx=%s, valid options: \"timer\" \"bh\"",
                     net->tx);
        error_report("Defaulting to \"bh\"");
1029 1030 1031 1032
    }

    if (net->tx && !strcmp(net->tx, "timer")) {
        n->tx_vq = virtio_add_queue(&n->vdev, 256, virtio_net_handle_tx_timer);
1033
        n->tx_timer = qemu_new_timer_ns(vm_clock, virtio_net_tx_timer, n);
1034 1035 1036 1037 1038
        n->tx_timeout = net->txtimer;
    } else {
        n->tx_vq = virtio_add_queue(&n->vdev, 256, virtio_net_handle_tx_bh);
        n->tx_bh = qemu_bh_new(virtio_net_tx_bh, n);
    }
1039
    n->ctrl_vq = virtio_add_queue(&n->vdev, 64, virtio_net_handle_ctrl);
1040
    qemu_macaddr_default_if_unset(&conf->macaddr);
1041
    memcpy(&n->mac[0], &conf->macaddr, sizeof(n->mac));
1042
    n->status = VIRTIO_NET_S_LINK_UP;
A
aliguori 已提交
1043

1044
    n->nic = qemu_new_nic(&net_virtio_info, conf, object_get_typename(OBJECT(dev)), dev->id, n);
M
Mark McLoughlin 已提交
1045 1046

    qemu_format_nic_info_str(&n->nic->nc, conf->macaddr.a);
1047

1048
    n->tx_waiting = 0;
1049
    n->tx_burst = net->txburst;
A
aliguori 已提交
1050
    n->mergeable_rx_bufs = 0;
1051
    n->guest_hdr_len = sizeof(struct virtio_net_hdr);
1052
    n->promisc = 1; /* for compatibility */
A
aliguori 已提交
1053

1054
    n->mac_table.macs = g_malloc0(MAC_TABLE_ENTRIES * ETH_ALEN);
1055

1056
    n->vlans = g_malloc0(MAX_VLAN >> 3);
1057

1058 1059
    n->qdev = dev;
    register_savevm(dev, "virtio-net", -1, VIRTIO_NET_VM_VERSION,
A
aliguori 已提交
1060
                    virtio_net_save, virtio_net_load, n);
P
Paul Brook 已提交
1061

1062 1063
    add_boot_device_path(conf->bootindex, dev, "/ethernet-phy@0");

P
Paul Brook 已提交
1064
    return &n->vdev;
P
Paul Brook 已提交
1065
}
1066 1067 1068 1069

void virtio_net_exit(VirtIODevice *vdev)
{
    VirtIONet *n = DO_UPCAST(VirtIONet, vdev, vdev);
1070

1071 1072
    /* This will stop vhost backend if appropriate. */
    virtio_net_set_status(vdev, 0);
1073

M
Mark McLoughlin 已提交
1074
    qemu_purge_queued_packets(&n->nic->nc);
1075

1076
    unregister_savevm(n->qdev, "virtio-net", n);
1077

1078 1079
    g_free(n->mac_table.macs);
    g_free(n->vlans);
1080

1081 1082 1083 1084 1085 1086
    if (n->tx_timer) {
        qemu_del_timer(n->tx_timer);
        qemu_free_timer(n->tx_timer);
    } else {
        qemu_bh_delete(n->tx_bh);
    }
1087

1088
    qemu_del_net_client(&n->nic->nc);
1089
    virtio_cleanup(&n->vdev);
1090
}