virtio-net.c 28.5 KB
Newer Older
A
aliguori 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13
/*
 * Virtio Network Device
 *
 * Copyright IBM, Corp. 2007
 *
 * Authors:
 *  Anthony Liguori   <aliguori@us.ibm.com>
 *
 * This work is licensed under the terms of the GNU GPL, version 2.  See
 * the COPYING file in the top-level directory.
 *
 */

14
#include "iov.h"
A
aliguori 已提交
15 16
#include "virtio.h"
#include "net.h"
17
#include "net/checksum.h"
18
#include "net/tap.h"
19
#include "qemu-error.h"
A
aliguori 已提交
20 21
#include "qemu-timer.h"
#include "virtio-net.h"
22
#include "vhost_net.h"
A
aliguori 已提交
23

24
#define VIRTIO_NET_VM_VERSION    11
25

26
#define MAC_TABLE_ENTRIES    64
27
#define MAX_VLAN    (1 << 12)   /* Per 802.1Q definition */
28

A
aliguori 已提交
29 30 31
typedef struct VirtIONet
{
    VirtIODevice vdev;
32
    uint8_t mac[ETH_ALEN];
33
    uint16_t status;
A
aliguori 已提交
34 35
    VirtQueue *rx_vq;
    VirtQueue *tx_vq;
36
    VirtQueue *ctrl_vq;
M
Mark McLoughlin 已提交
37
    NICState *nic;
A
aliguori 已提交
38 39
    QEMUTimer *tx_timer;
    int tx_timer_active;
M
Mark McLoughlin 已提交
40
    uint32_t has_vnet_hdr;
41
    uint8_t has_ufo;
42 43 44 45
    struct {
        VirtQueueElement elem;
        ssize_t len;
    } async_tx;
A
aliguori 已提交
46
    int mergeable_rx_bufs;
47 48
    uint8_t promisc;
    uint8_t allmulti;
49 50 51 52
    uint8_t alluni;
    uint8_t nomulti;
    uint8_t nouni;
    uint8_t nobcast;
53 54
    uint8_t vhost_started;
    VMChangeStateEntry *vmstate;
55 56
    struct {
        int in_use;
57
        int first_multi;
58 59
        uint8_t multi_overflow;
        uint8_t uni_overflow;
60 61
        uint8_t *macs;
    } mac_table;
62
    uint32_t *vlans;
63
    DeviceState *qdev;
A
aliguori 已提交
64 65 66 67 68 69 70 71 72 73 74
} VirtIONet;

/* TODO
 * - we could suppress RX interrupt if we were so inclined.
 */

static VirtIONet *to_virtio_net(VirtIODevice *vdev)
{
    return (VirtIONet *)vdev;
}

75
static void virtio_net_get_config(VirtIODevice *vdev, uint8_t *config)
A
aliguori 已提交
76 77 78 79
{
    VirtIONet *n = to_virtio_net(vdev);
    struct virtio_net_config netcfg;

80
    netcfg.status = n->status;
81
    memcpy(netcfg.mac, n->mac, ETH_ALEN);
A
aliguori 已提交
82 83 84
    memcpy(config, &netcfg, sizeof(netcfg));
}

85 86 87 88 89 90 91
static void virtio_net_set_config(VirtIODevice *vdev, const uint8_t *config)
{
    VirtIONet *n = to_virtio_net(vdev);
    struct virtio_net_config netcfg;

    memcpy(&netcfg, config, sizeof(netcfg));

92 93
    if (memcmp(netcfg.mac, n->mac, ETH_ALEN)) {
        memcpy(n->mac, netcfg.mac, ETH_ALEN);
M
Mark McLoughlin 已提交
94
        qemu_format_nic_info_str(&n->nic->nc, n->mac);
95 96 97
    }
}

M
Mark McLoughlin 已提交
98
static void virtio_net_set_link_status(VLANClientState *nc)
99
{
M
Mark McLoughlin 已提交
100
    VirtIONet *n = DO_UPCAST(NICState, nc, nc)->opaque;
101 102
    uint16_t old_status = n->status;

M
Mark McLoughlin 已提交
103
    if (nc->link_down)
104 105 106 107 108 109 110 111
        n->status &= ~VIRTIO_NET_S_LINK_UP;
    else
        n->status |= VIRTIO_NET_S_LINK_UP;

    if (n->status != old_status)
        virtio_notify_config(&n->vdev);
}

112 113 114 115 116 117 118
static void virtio_net_reset(VirtIODevice *vdev)
{
    VirtIONet *n = to_virtio_net(vdev);

    /* Reset back to compatibility mode */
    n->promisc = 1;
    n->allmulti = 0;
119 120 121 122
    n->alluni = 0;
    n->nomulti = 0;
    n->nouni = 0;
    n->nobcast = 0;
123 124 125 126
    if (n->vhost_started) {
        vhost_net_stop(tap_get_vhost_net(n->nic->nc.peer), vdev);
        n->vhost_started = 0;
    }
127

128
    /* Flush any MAC and VLAN filter table state */
129
    n->mac_table.in_use = 0;
130
    n->mac_table.first_multi = 0;
131 132
    n->mac_table.multi_overflow = 0;
    n->mac_table.uni_overflow = 0;
133
    memset(n->mac_table.macs, 0, MAC_TABLE_ENTRIES * ETH_ALEN);
134
    memset(n->vlans, 0, MAX_VLAN >> 3);
135 136
}

M
Mark McLoughlin 已提交
137 138
static int peer_has_vnet_hdr(VirtIONet *n)
{
M
Mark McLoughlin 已提交
139
    if (!n->nic->nc.peer)
M
Mark McLoughlin 已提交
140 141
        return 0;

142
    if (n->nic->nc.peer->info->type != NET_CLIENT_TYPE_TAP)
M
Mark McLoughlin 已提交
143 144
        return 0;

M
Mark McLoughlin 已提交
145
    n->has_vnet_hdr = tap_has_vnet_hdr(n->nic->nc.peer);
M
Mark McLoughlin 已提交
146 147 148 149

    return n->has_vnet_hdr;
}

150 151 152 153 154
static int peer_has_ufo(VirtIONet *n)
{
    if (!peer_has_vnet_hdr(n))
        return 0;

M
Mark McLoughlin 已提交
155
    n->has_ufo = tap_has_ufo(n->nic->nc.peer);
156 157 158 159

    return n->has_ufo;
}

160
static uint32_t virtio_net_get_features(VirtIODevice *vdev, uint32_t features)
A
aliguori 已提交
161
{
M
Mark McLoughlin 已提交
162
    VirtIONet *n = to_virtio_net(vdev);
A
aliguori 已提交
163

164 165
    features |= (1 << VIRTIO_NET_F_MAC);

M
Mark McLoughlin 已提交
166
    if (peer_has_vnet_hdr(n)) {
M
Mark McLoughlin 已提交
167
        tap_using_vnet_hdr(n->nic->nc.peer, 1);
168 169 170 171 172 173 174 175 176 177 178
    } else {
        features &= ~(0x1 << VIRTIO_NET_F_CSUM);
        features &= ~(0x1 << VIRTIO_NET_F_HOST_TSO4);
        features &= ~(0x1 << VIRTIO_NET_F_HOST_TSO6);
        features &= ~(0x1 << VIRTIO_NET_F_HOST_ECN);

        features &= ~(0x1 << VIRTIO_NET_F_GUEST_CSUM);
        features &= ~(0x1 << VIRTIO_NET_F_GUEST_TSO4);
        features &= ~(0x1 << VIRTIO_NET_F_GUEST_TSO6);
        features &= ~(0x1 << VIRTIO_NET_F_GUEST_ECN);
    }
M
Mark McLoughlin 已提交
179

180 181 182
    if (!peer_has_vnet_hdr(n) || !peer_has_ufo(n)) {
        features &= ~(0x1 << VIRTIO_NET_F_GUEST_UFO);
        features &= ~(0x1 << VIRTIO_NET_F_HOST_UFO);
M
Mark McLoughlin 已提交
183 184
    }

185 186 187 188 189 190 191 192
    if (!n->nic->nc.peer ||
        n->nic->nc.peer->info->type != NET_CLIENT_TYPE_TAP) {
        return features;
    }
    if (!tap_get_vhost_net(n->nic->nc.peer)) {
        return features;
    }
    return vhost_net_get_features(tap_get_vhost_net(n->nic->nc.peer), features);
A
aliguori 已提交
193 194
}

195 196 197 198 199 200 201
static uint32_t virtio_net_bad_features(VirtIODevice *vdev)
{
    uint32_t features = 0;

    /* Linux kernel 2.6.25.  It understood MAC (as everyone must),
     * but also these: */
    features |= (1 << VIRTIO_NET_F_MAC);
202 203 204 205
    features |= (1 << VIRTIO_NET_F_CSUM);
    features |= (1 << VIRTIO_NET_F_HOST_TSO4);
    features |= (1 << VIRTIO_NET_F_HOST_TSO6);
    features |= (1 << VIRTIO_NET_F_HOST_ECN);
206

207
    return features;
208 209
}

A
aliguori 已提交
210 211 212 213 214
static void virtio_net_set_features(VirtIODevice *vdev, uint32_t features)
{
    VirtIONet *n = to_virtio_net(vdev);

    n->mergeable_rx_bufs = !!(features & (1 << VIRTIO_NET_F_MRG_RXBUF));
215 216

    if (n->has_vnet_hdr) {
M
Mark McLoughlin 已提交
217
        tap_set_offload(n->nic->nc.peer,
218 219 220
                        (features >> VIRTIO_NET_F_GUEST_CSUM) & 1,
                        (features >> VIRTIO_NET_F_GUEST_TSO4) & 1,
                        (features >> VIRTIO_NET_F_GUEST_TSO6) & 1,
221 222
                        (features >> VIRTIO_NET_F_GUEST_ECN)  & 1,
                        (features >> VIRTIO_NET_F_GUEST_UFO)  & 1);
223
    }
D
David L Stevens 已提交
224 225 226 227 228 229 230
    if (!n->nic->nc.peer ||
        n->nic->nc.peer->info->type != NET_CLIENT_TYPE_TAP) {
        return;
    }
    if (!tap_get_vhost_net(n->nic->nc.peer)) {
        return;
    }
231
    vhost_net_ack_features(tap_get_vhost_net(n->nic->nc.peer), features);
A
aliguori 已提交
232 233
}

234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249
static int virtio_net_handle_rx_mode(VirtIONet *n, uint8_t cmd,
                                     VirtQueueElement *elem)
{
    uint8_t on;

    if (elem->out_num != 2 || elem->out_sg[1].iov_len != sizeof(on)) {
        fprintf(stderr, "virtio-net ctrl invalid rx mode command\n");
        exit(1);
    }

    on = ldub_p(elem->out_sg[1].iov_base);

    if (cmd == VIRTIO_NET_CTRL_RX_MODE_PROMISC)
        n->promisc = on;
    else if (cmd == VIRTIO_NET_CTRL_RX_MODE_ALLMULTI)
        n->allmulti = on;
250 251 252 253 254 255 256 257
    else if (cmd == VIRTIO_NET_CTRL_RX_MODE_ALLUNI)
        n->alluni = on;
    else if (cmd == VIRTIO_NET_CTRL_RX_MODE_NOMULTI)
        n->nomulti = on;
    else if (cmd == VIRTIO_NET_CTRL_RX_MODE_NOUNI)
        n->nouni = on;
    else if (cmd == VIRTIO_NET_CTRL_RX_MODE_NOBCAST)
        n->nobcast = on;
258 259 260 261 262 263
    else
        return VIRTIO_NET_ERR;

    return VIRTIO_NET_OK;
}

264 265 266 267 268 269 270 271 272 273 274
static int virtio_net_handle_mac(VirtIONet *n, uint8_t cmd,
                                 VirtQueueElement *elem)
{
    struct virtio_net_ctrl_mac mac_data;

    if (cmd != VIRTIO_NET_CTRL_MAC_TABLE_SET || elem->out_num != 3 ||
        elem->out_sg[1].iov_len < sizeof(mac_data) ||
        elem->out_sg[2].iov_len < sizeof(mac_data))
        return VIRTIO_NET_ERR;

    n->mac_table.in_use = 0;
275
    n->mac_table.first_multi = 0;
276 277
    n->mac_table.uni_overflow = 0;
    n->mac_table.multi_overflow = 0;
278 279 280 281 282 283 284 285 286 287 288 289 290
    memset(n->mac_table.macs, 0, MAC_TABLE_ENTRIES * ETH_ALEN);

    mac_data.entries = ldl_le_p(elem->out_sg[1].iov_base);

    if (sizeof(mac_data.entries) +
        (mac_data.entries * ETH_ALEN) > elem->out_sg[1].iov_len)
        return VIRTIO_NET_ERR;

    if (mac_data.entries <= MAC_TABLE_ENTRIES) {
        memcpy(n->mac_table.macs, elem->out_sg[1].iov_base + sizeof(mac_data),
               mac_data.entries * ETH_ALEN);
        n->mac_table.in_use += mac_data.entries;
    } else {
291
        n->mac_table.uni_overflow = 1;
292 293
    }

294 295
    n->mac_table.first_multi = n->mac_table.in_use;

296 297 298 299 300 301 302 303 304 305 306 307
    mac_data.entries = ldl_le_p(elem->out_sg[2].iov_base);

    if (sizeof(mac_data.entries) +
        (mac_data.entries * ETH_ALEN) > elem->out_sg[2].iov_len)
        return VIRTIO_NET_ERR;

    if (mac_data.entries) {
        if (n->mac_table.in_use + mac_data.entries <= MAC_TABLE_ENTRIES) {
            memcpy(n->mac_table.macs + (n->mac_table.in_use * ETH_ALEN),
                   elem->out_sg[2].iov_base + sizeof(mac_data),
                   mac_data.entries * ETH_ALEN);
            n->mac_table.in_use += mac_data.entries;
308 309 310
        } else {
            n->mac_table.multi_overflow = 1;
        }
311 312 313 314 315
    }

    return VIRTIO_NET_OK;
}

316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340
static int virtio_net_handle_vlan_table(VirtIONet *n, uint8_t cmd,
                                        VirtQueueElement *elem)
{
    uint16_t vid;

    if (elem->out_num != 2 || elem->out_sg[1].iov_len != sizeof(vid)) {
        fprintf(stderr, "virtio-net ctrl invalid vlan command\n");
        return VIRTIO_NET_ERR;
    }

    vid = lduw_le_p(elem->out_sg[1].iov_base);

    if (vid >= MAX_VLAN)
        return VIRTIO_NET_ERR;

    if (cmd == VIRTIO_NET_CTRL_VLAN_ADD)
        n->vlans[vid >> 5] |= (1U << (vid & 0x1f));
    else if (cmd == VIRTIO_NET_CTRL_VLAN_DEL)
        n->vlans[vid >> 5] &= ~(1U << (vid & 0x1f));
    else
        return VIRTIO_NET_ERR;

    return VIRTIO_NET_OK;
}

341 342
static void virtio_net_handle_ctrl(VirtIODevice *vdev, VirtQueue *vq)
{
343
    VirtIONet *n = to_virtio_net(vdev);
344 345 346 347 348 349 350 351 352 353 354
    struct virtio_net_ctrl_hdr ctrl;
    virtio_net_ctrl_ack status = VIRTIO_NET_ERR;
    VirtQueueElement elem;

    while (virtqueue_pop(vq, &elem)) {
        if ((elem.in_num < 1) || (elem.out_num < 1)) {
            fprintf(stderr, "virtio-net ctrl missing headers\n");
            exit(1);
        }

        if (elem.out_sg[0].iov_len < sizeof(ctrl) ||
355
            elem.in_sg[elem.in_num - 1].iov_len < sizeof(status)) {
356 357 358 359 360 361 362
            fprintf(stderr, "virtio-net ctrl header not in correct element\n");
            exit(1);
        }

        ctrl.class = ldub_p(elem.out_sg[0].iov_base);
        ctrl.cmd = ldub_p(elem.out_sg[0].iov_base + sizeof(ctrl.class));

363 364
        if (ctrl.class == VIRTIO_NET_CTRL_RX_MODE)
            status = virtio_net_handle_rx_mode(n, ctrl.cmd, &elem);
365 366
        else if (ctrl.class == VIRTIO_NET_CTRL_MAC)
            status = virtio_net_handle_mac(n, ctrl.cmd, &elem);
367 368
        else if (ctrl.class == VIRTIO_NET_CTRL_VLAN)
            status = virtio_net_handle_vlan_table(n, ctrl.cmd, &elem);
369

370 371 372 373 374 375 376
        stb_p(elem.in_sg[elem.in_num - 1].iov_base, status);

        virtqueue_push(vq, &elem, sizeof(status));
        virtio_notify(vdev, vq);
    }
}

A
aliguori 已提交
377 378 379 380
/* RX */

static void virtio_net_handle_rx(VirtIODevice *vdev, VirtQueue *vq)
{
381 382
    VirtIONet *n = to_virtio_net(vdev);

M
Mark McLoughlin 已提交
383
    qemu_flush_queued_packets(&n->nic->nc);
384 385 386 387

    /* We now have RX buffers, signal to the IO thread to break out of the
     * select to re-poll the tap file descriptor */
    qemu_notify_event();
A
aliguori 已提交
388 389
}

M
Mark McLoughlin 已提交
390
static int virtio_net_can_receive(VLANClientState *nc)
A
aliguori 已提交
391
{
M
Mark McLoughlin 已提交
392
    VirtIONet *n = DO_UPCAST(NICState, nc, nc)->opaque;
393

A
aliguori 已提交
394 395 396 397
    if (!virtio_queue_ready(n->rx_vq) ||
        !(n->vdev.status & VIRTIO_CONFIG_S_DRIVER_OK))
        return 0;

398 399 400 401 402
    return 1;
}

static int virtio_net_has_buffers(VirtIONet *n, int bufsize)
{
A
aliguori 已提交
403 404 405 406
    if (virtio_queue_empty(n->rx_vq) ||
        (n->mergeable_rx_bufs &&
         !virtqueue_avail_bytes(n->rx_vq, bufsize, 0))) {
        virtio_queue_set_notification(n->rx_vq, 1);
407 408 409 410 411 412 413 414 415

        /* To avoid a race condition where the guest has made some buffers
         * available after the above check but before notification was
         * enabled, check for available buffers again.
         */
        if (virtio_queue_empty(n->rx_vq) ||
            (n->mergeable_rx_bufs &&
             !virtqueue_avail_bytes(n->rx_vq, bufsize, 0)))
            return 0;
A
aliguori 已提交
416 417 418 419 420 421
    }

    virtio_queue_set_notification(n->rx_vq, 0);
    return 1;
}

A
Anthony Liguori 已提交
422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449
/* dhclient uses AF_PACKET but doesn't pass auxdata to the kernel so
 * it never finds out that the packets don't have valid checksums.  This
 * causes dhclient to get upset.  Fedora's carried a patch for ages to
 * fix this with Xen but it hasn't appeared in an upstream release of
 * dhclient yet.
 *
 * To avoid breaking existing guests, we catch udp packets and add
 * checksums.  This is terrible but it's better than hacking the guest
 * kernels.
 *
 * N.B. if we introduce a zero-copy API, this operation is no longer free so
 * we should provide a mechanism to disable it to avoid polluting the host
 * cache.
 */
static void work_around_broken_dhclient(struct virtio_net_hdr *hdr,
                                        const uint8_t *buf, size_t size)
{
    if ((hdr->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM) && /* missing csum */
        (size > 27 && size < 1500) && /* normal sized MTU */
        (buf[12] == 0x08 && buf[13] == 0x00) && /* ethertype == IPv4 */
        (buf[23] == 17) && /* ip.protocol == UDP */
        (buf[34] == 0 && buf[35] == 67)) { /* udp.srcport == bootps */
        /* FIXME this cast is evil */
        net_checksum_calculate((uint8_t *)buf, size);
        hdr->flags &= ~VIRTIO_NET_HDR_F_NEEDS_CSUM;
    }
}

A
aliguori 已提交
450
static int receive_header(VirtIONet *n, struct iovec *iov, int iovcnt,
A
aliguori 已提交
451
                          const void *buf, size_t size, size_t hdr_len)
A
aliguori 已提交
452
{
453
    struct virtio_net_hdr *hdr = (struct virtio_net_hdr *)iov[0].iov_base;
A
aliguori 已提交
454 455 456 457 458
    int offset = 0;

    hdr->flags = 0;
    hdr->gso_type = VIRTIO_NET_HDR_GSO_NONE;

M
Mark McLoughlin 已提交
459 460 461
    if (n->has_vnet_hdr) {
        memcpy(hdr, buf, sizeof(*hdr));
        offset = sizeof(*hdr);
A
Anthony Liguori 已提交
462
        work_around_broken_dhclient(hdr, buf + offset, size - offset);
M
Mark McLoughlin 已提交
463 464
    }

A
aliguori 已提交
465 466 467 468 469 470 471 472 473
    /* We only ever receive a struct virtio_net_hdr from the tapfd,
     * but we may be passing along a larger header to the guest.
     */
    iov[0].iov_base += hdr_len;
    iov[0].iov_len  -= hdr_len;

    return offset;
}

474 475 476
static int receive_filter(VirtIONet *n, const uint8_t *buf, int size)
{
    static const uint8_t bcast[] = {0xff, 0xff, 0xff, 0xff, 0xff, 0xff};
477
    static const uint8_t vlan[] = {0x81, 0x00};
478
    uint8_t *ptr = (uint8_t *)buf;
479
    int i;
480 481 482 483

    if (n->promisc)
        return 1;

M
Mark McLoughlin 已提交
484 485 486 487
    if (n->has_vnet_hdr) {
        ptr += sizeof(struct virtio_net_hdr);
    }

488 489 490 491 492 493
    if (!memcmp(&ptr[12], vlan, sizeof(vlan))) {
        int vid = be16_to_cpup((uint16_t *)(ptr + 14)) & 0xfff;
        if (!(n->vlans[vid >> 5] & (1U << (vid & 0x1f))))
            return 0;
    }

494 495
    if (ptr[0] & 1) { // multicast
        if (!memcmp(ptr, bcast, sizeof(bcast))) {
496 497 498
            return !n->nobcast;
        } else if (n->nomulti) {
            return 0;
499
        } else if (n->allmulti || n->mac_table.multi_overflow) {
500 501
            return 1;
        }
502 503 504 505 506 507

        for (i = n->mac_table.first_multi; i < n->mac_table.in_use; i++) {
            if (!memcmp(ptr, &n->mac_table.macs[i * ETH_ALEN], ETH_ALEN)) {
                return 1;
            }
        }
508
    } else { // unicast
509 510 511
        if (n->nouni) {
            return 0;
        } else if (n->alluni || n->mac_table.uni_overflow) {
512 513
            return 1;
        } else if (!memcmp(ptr, n->mac, ETH_ALEN)) {
514 515
            return 1;
        }
516

517 518 519 520 521
        for (i = 0; i < n->mac_table.first_multi; i++) {
            if (!memcmp(ptr, &n->mac_table.macs[i * ETH_ALEN], ETH_ALEN)) {
                return 1;
            }
        }
522 523
    }

524 525 526
    return 0;
}

M
Mark McLoughlin 已提交
527
static ssize_t virtio_net_receive(VLANClientState *nc, const uint8_t *buf, size_t size)
A
aliguori 已提交
528
{
M
Mark McLoughlin 已提交
529
    VirtIONet *n = DO_UPCAST(NICState, nc, nc)->opaque;
A
aliguori 已提交
530
    struct virtio_net_hdr_mrg_rxbuf *mhdr = NULL;
531
    size_t guest_hdr_len, offset, i, host_hdr_len;
A
aliguori 已提交
532

M
Mark McLoughlin 已提交
533
    if (!virtio_net_can_receive(&n->nic->nc))
534 535
        return -1;

536
    /* hdr_len refers to the header we supply to the guest */
537
    guest_hdr_len = n->mergeable_rx_bufs ?
538 539 540
        sizeof(struct virtio_net_hdr_mrg_rxbuf) : sizeof(struct virtio_net_hdr);


541 542
    host_hdr_len = n->has_vnet_hdr ? sizeof(struct virtio_net_hdr) : 0;
    if (!virtio_net_has_buffers(n, size + guest_hdr_len - host_hdr_len))
543
        return 0;
A
aliguori 已提交
544

545
    if (!receive_filter(n, buf, size))
546
        return size;
547

A
aliguori 已提交
548 549 550 551 552 553 554
    offset = i = 0;

    while (offset < size) {
        VirtQueueElement elem;
        int len, total;
        struct iovec sg[VIRTQUEUE_MAX_SIZE];

A
Amit Shah 已提交
555
        total = 0;
A
aliguori 已提交
556

557
        if (virtqueue_pop(n->rx_vq, &elem) == 0) {
A
aliguori 已提交
558
            if (i == 0)
559
                return -1;
560 561 562 563 564
            fprintf(stderr, "virtio-net unexpected empty queue: "
                    "i %zd mergeable %d offset %zd, size %zd, "
                    "guest hdr len %zd, host hdr len %zd guest features 0x%x\n",
                    i, n->mergeable_rx_bufs, offset, size,
                    guest_hdr_len, host_hdr_len, n->vdev.guest_features);
A
aliguori 已提交
565 566 567 568 569 570 571 572
            exit(1);
        }

        if (elem.in_num < 1) {
            fprintf(stderr, "virtio-net receive queue contains no in buffers\n");
            exit(1);
        }

573
        if (!n->mergeable_rx_bufs && elem.in_sg[0].iov_len != guest_hdr_len) {
A
aliguori 已提交
574 575 576 577 578 579 580 581 582 583 584
            fprintf(stderr, "virtio-net header not in first element\n");
            exit(1);
        }

        memcpy(&sg, &elem.in_sg[0], sizeof(sg[0]) * elem.in_num);

        if (i == 0) {
            if (n->mergeable_rx_bufs)
                mhdr = (struct virtio_net_hdr_mrg_rxbuf *)sg[0].iov_base;

            offset += receive_header(n, sg, elem.in_num,
585 586
                                     buf + offset, size - offset, guest_hdr_len);
            total += guest_hdr_len;
A
aliguori 已提交
587 588 589
        }

        /* copy in packet.  ugh */
590 591
        len = iov_from_buf(sg, elem.in_num,
                           buf + offset, size - offset);
A
aliguori 已提交
592
        total += len;
593 594 595 596 597 598 599 600 601 602 603 604 605 606 607
        offset += len;
        /* If buffers can't be merged, at this point we
         * must have consumed the complete packet.
         * Otherwise, drop it. */
        if (!n->mergeable_rx_bufs && offset < size) {
#if 0
            fprintf(stderr, "virtio-net truncated non-mergeable packet: "

                    "i %zd mergeable %d offset %zd, size %zd, "
                    "guest hdr len %zd, host hdr len %zd\n",
                    i, n->mergeable_rx_bufs,
                    offset, size, guest_hdr_len, host_hdr_len);
#endif
            return size;
        }
A
aliguori 已提交
608 609 610 611 612 613 614 615 616 617

        /* signal other side */
        virtqueue_fill(n->rx_vq, &elem, total, i++);
    }

    if (mhdr)
        mhdr->num_buffers = i;

    virtqueue_flush(n->rx_vq, i);
    virtio_notify(&n->vdev, n->rx_vq);
618 619

    return size;
A
aliguori 已提交
620 621
}

622 623
static void virtio_net_flush_tx(VirtIONet *n, VirtQueue *vq);

M
Mark McLoughlin 已提交
624
static void virtio_net_tx_complete(VLANClientState *nc, ssize_t len)
625
{
M
Mark McLoughlin 已提交
626
    VirtIONet *n = DO_UPCAST(NICState, nc, nc)->opaque;
627 628 629 630 631 632 633 634 635 636

    virtqueue_push(n->tx_vq, &n->async_tx.elem, n->async_tx.len);
    virtio_notify(&n->vdev, n->tx_vq);

    n->async_tx.elem.out_num = n->async_tx.len = 0;

    virtio_queue_set_notification(n->tx_vq, 1);
    virtio_net_flush_tx(n, n->tx_vq);
}

A
aliguori 已提交
637 638 639 640 641 642 643 644
/* TX */
static void virtio_net_flush_tx(VirtIONet *n, VirtQueue *vq)
{
    VirtQueueElement elem;

    if (!(n->vdev.status & VIRTIO_CONFIG_S_DRIVER_OK))
        return;

645 646 647 648 649
    if (n->async_tx.elem.out_num) {
        virtio_queue_set_notification(n->tx_vq, 0);
        return;
    }

A
aliguori 已提交
650
    while (virtqueue_pop(vq, &elem)) {
651
        ssize_t ret, len = 0;
A
aliguori 已提交
652 653 654 655 656 657 658 659 660 661 662 663 664 665 666
        unsigned int out_num = elem.out_num;
        struct iovec *out_sg = &elem.out_sg[0];
        unsigned hdr_len;

        /* hdr_len refers to the header received from the guest */
        hdr_len = n->mergeable_rx_bufs ?
            sizeof(struct virtio_net_hdr_mrg_rxbuf) :
            sizeof(struct virtio_net_hdr);

        if (out_num < 1 || out_sg->iov_len != hdr_len) {
            fprintf(stderr, "virtio-net header not in first element\n");
            exit(1);
        }

        /* ignore the header if GSO is not supported */
M
Mark McLoughlin 已提交
667
        if (!n->has_vnet_hdr) {
A
aliguori 已提交
668 669 670 671 672 673 674 675 676 677
            out_num--;
            out_sg++;
            len += hdr_len;
        } else if (n->mergeable_rx_bufs) {
            /* tapfd expects a struct virtio_net_hdr */
            hdr_len -= sizeof(struct virtio_net_hdr);
            out_sg->iov_len -= hdr_len;
            len += hdr_len;
        }

M
Mark McLoughlin 已提交
678
        ret = qemu_sendv_packet_async(&n->nic->nc, out_sg, out_num,
679 680 681 682 683 684 685 686 687
                                      virtio_net_tx_complete);
        if (ret == 0) {
            virtio_queue_set_notification(n->tx_vq, 0);
            n->async_tx.elem = elem;
            n->async_tx.len  = len;
            return;
        }

        len += ret;
A
aliguori 已提交
688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728

        virtqueue_push(vq, &elem, len);
        virtio_notify(&n->vdev, vq);
    }
}

static void virtio_net_handle_tx(VirtIODevice *vdev, VirtQueue *vq)
{
    VirtIONet *n = to_virtio_net(vdev);

    if (n->tx_timer_active) {
        virtio_queue_set_notification(vq, 1);
        qemu_del_timer(n->tx_timer);
        n->tx_timer_active = 0;
        virtio_net_flush_tx(n, vq);
    } else {
        qemu_mod_timer(n->tx_timer,
                       qemu_get_clock(vm_clock) + TX_TIMER_INTERVAL);
        n->tx_timer_active = 1;
        virtio_queue_set_notification(vq, 0);
    }
}

static void virtio_net_tx_timer(void *opaque)
{
    VirtIONet *n = opaque;

    n->tx_timer_active = 0;

    /* Just in case the driver is not ready on more */
    if (!(n->vdev.status & VIRTIO_CONFIG_S_DRIVER_OK))
        return;

    virtio_queue_set_notification(n->tx_vq, 1);
    virtio_net_flush_tx(n, n->tx_vq);
}

static void virtio_net_save(QEMUFile *f, void *opaque)
{
    VirtIONet *n = opaque;

729 730 731 732 733 734
    if (n->vhost_started) {
        /* TODO: should we really stop the backend?
         * If we don't, it might keep writing to memory. */
        vhost_net_stop(tap_get_vhost_net(n->nic->nc.peer), &n->vdev);
        n->vhost_started = 0;
    }
A
aliguori 已提交
735 736
    virtio_save(&n->vdev, f);

737
    qemu_put_buffer(f, n->mac, ETH_ALEN);
A
aliguori 已提交
738
    qemu_put_be32(f, n->tx_timer_active);
739
    qemu_put_be32(f, n->mergeable_rx_bufs);
740
    qemu_put_be16(f, n->status);
741 742
    qemu_put_byte(f, n->promisc);
    qemu_put_byte(f, n->allmulti);
743 744
    qemu_put_be32(f, n->mac_table.in_use);
    qemu_put_buffer(f, n->mac_table.macs, n->mac_table.in_use * ETH_ALEN);
745
    qemu_put_buffer(f, (uint8_t *)n->vlans, MAX_VLAN >> 3);
M
Mark McLoughlin 已提交
746
    qemu_put_be32(f, n->has_vnet_hdr);
747 748
    qemu_put_byte(f, n->mac_table.multi_overflow);
    qemu_put_byte(f, n->mac_table.uni_overflow);
749 750 751 752
    qemu_put_byte(f, n->alluni);
    qemu_put_byte(f, n->nomulti);
    qemu_put_byte(f, n->nouni);
    qemu_put_byte(f, n->nobcast);
753
    qemu_put_byte(f, n->has_ufo);
A
aliguori 已提交
754 755 756 757 758
}

static int virtio_net_load(QEMUFile *f, void *opaque, int version_id)
{
    VirtIONet *n = opaque;
759
    int i;
A
aliguori 已提交
760

761
    if (version_id < 2 || version_id > VIRTIO_NET_VM_VERSION)
A
aliguori 已提交
762 763 764 765
        return -EINVAL;

    virtio_load(&n->vdev, f);

766
    qemu_get_buffer(f, n->mac, ETH_ALEN);
A
aliguori 已提交
767
    n->tx_timer_active = qemu_get_be32(f);
768
    n->mergeable_rx_bufs = qemu_get_be32(f);
A
aliguori 已提交
769

770 771 772
    if (version_id >= 3)
        n->status = qemu_get_be16(f);

773
    if (version_id >= 4) {
774 775 776 777 778 779 780
        if (version_id < 8) {
            n->promisc = qemu_get_be32(f);
            n->allmulti = qemu_get_be32(f);
        } else {
            n->promisc = qemu_get_byte(f);
            n->allmulti = qemu_get_byte(f);
        }
781 782
    }

783 784 785 786 787 788 789 790
    if (version_id >= 5) {
        n->mac_table.in_use = qemu_get_be32(f);
        /* MAC_TABLE_ENTRIES may be different from the saved image */
        if (n->mac_table.in_use <= MAC_TABLE_ENTRIES) {
            qemu_get_buffer(f, n->mac_table.macs,
                            n->mac_table.in_use * ETH_ALEN);
        } else if (n->mac_table.in_use) {
            qemu_fseek(f, n->mac_table.in_use * ETH_ALEN, SEEK_CUR);
791
            n->mac_table.multi_overflow = n->mac_table.uni_overflow = 1;
792 793 794 795
            n->mac_table.in_use = 0;
        }
    }
 
796 797 798
    if (version_id >= 6)
        qemu_get_buffer(f, (uint8_t *)n->vlans, MAX_VLAN >> 3);

M
Mark McLoughlin 已提交
799 800
    if (version_id >= 7) {
        if (qemu_get_be32(f) && !peer_has_vnet_hdr(n)) {
801
            error_report("virtio-net: saved image requires vnet_hdr=on");
M
Mark McLoughlin 已提交
802 803 804 805
            return -1;
        }

        if (n->has_vnet_hdr) {
M
Mark McLoughlin 已提交
806 807
            tap_using_vnet_hdr(n->nic->nc.peer, 1);
            tap_set_offload(n->nic->nc.peer,
808 809 810 811 812
                    (n->vdev.guest_features >> VIRTIO_NET_F_GUEST_CSUM) & 1,
                    (n->vdev.guest_features >> VIRTIO_NET_F_GUEST_TSO4) & 1,
                    (n->vdev.guest_features >> VIRTIO_NET_F_GUEST_TSO6) & 1,
                    (n->vdev.guest_features >> VIRTIO_NET_F_GUEST_ECN)  & 1,
                    (n->vdev.guest_features >> VIRTIO_NET_F_GUEST_UFO)  & 1);
M
Mark McLoughlin 已提交
813
        }
814 815
    }

816 817 818 819 820
    if (version_id >= 9) {
        n->mac_table.multi_overflow = qemu_get_byte(f);
        n->mac_table.uni_overflow = qemu_get_byte(f);
    }

821 822 823 824 825 826 827
    if (version_id >= 10) {
        n->alluni = qemu_get_byte(f);
        n->nomulti = qemu_get_byte(f);
        n->nouni = qemu_get_byte(f);
        n->nobcast = qemu_get_byte(f);
    }

828 829
    if (version_id >= 11) {
        if (qemu_get_byte(f) && !peer_has_ufo(n)) {
830
            error_report("virtio-net: saved image requires TUN_F_UFO support");
831 832 833 834
            return -1;
        }
    }

835 836 837 838 839 840 841 842
    /* Find the first multicast entry in the saved MAC filter */
    for (i = 0; i < n->mac_table.in_use; i++) {
        if (n->mac_table.macs[i * ETH_ALEN] & 1) {
            break;
        }
    }
    n->mac_table.first_multi = i;

A
aliguori 已提交
843 844 845 846 847 848 849
    if (n->tx_timer_active) {
        qemu_mod_timer(n->tx_timer,
                       qemu_get_clock(vm_clock) + TX_TIMER_INTERVAL);
    }
    return 0;
}

M
Mark McLoughlin 已提交
850
static void virtio_net_cleanup(VLANClientState *nc)
851
{
M
Mark McLoughlin 已提交
852
    VirtIONet *n = DO_UPCAST(NICState, nc, nc)->opaque;
853

M
Mark McLoughlin 已提交
854
    n->nic = NULL;
855 856
}

M
Mark McLoughlin 已提交
857 858 859 860 861 862 863 864 865
static NetClientInfo net_virtio_info = {
    .type = NET_CLIENT_TYPE_NIC,
    .size = sizeof(NICState),
    .can_receive = virtio_net_can_receive,
    .receive = virtio_net_receive,
        .cleanup = virtio_net_cleanup,
    .link_status_changed = virtio_net_set_link_status,
};

866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898
static void virtio_net_set_status(struct VirtIODevice *vdev, uint8_t status)
{
    VirtIONet *n = to_virtio_net(vdev);
    if (!n->nic->nc.peer) {
        return;
    }
    if (n->nic->nc.peer->info->type != NET_CLIENT_TYPE_TAP) {
        return;
    }

    if (!tap_get_vhost_net(n->nic->nc.peer)) {
        return;
    }
    if (!!n->vhost_started == !!(status & VIRTIO_CONFIG_S_DRIVER_OK)) {
        return;
    }
    if (status & VIRTIO_CONFIG_S_DRIVER_OK) {
        int r = vhost_net_start(tap_get_vhost_net(n->nic->nc.peer), vdev);
        if (r < 0) {
            fprintf(stderr, "unable to start vhost net: %d: "
                    "falling back on userspace virtio\n", -r);
        } else {
            n->vhost_started = 1;
        }
    } else {
        vhost_net_stop(tap_get_vhost_net(n->nic->nc.peer), vdev);
        n->vhost_started = 0;
    }
}

static void virtio_net_vmstate_change(void *opaque, int running, int reason)
{
    VirtIONet *n = opaque;
899 900 901 902 903
    uint8_t status = running ? VIRTIO_CONFIG_S_DRIVER_OK : 0;
    /* This is called when vm is started/stopped,
     * it will start/stop vhost backend if * appropriate
     * e.g. after migration. */
    virtio_net_set_status(&n->vdev, n->vdev.status & status);
904 905
}

906
VirtIODevice *virtio_net_init(DeviceState *dev, NICConf *conf)
A
aliguori 已提交
907 908 909
{
    VirtIONet *n;

P
Paul Brook 已提交
910 911 912
    n = (VirtIONet *)virtio_common_init("virtio-net", VIRTIO_ID_NET,
                                        sizeof(struct virtio_net_config),
                                        sizeof(VirtIONet));
A
aliguori 已提交
913

914 915
    n->vdev.get_config = virtio_net_get_config;
    n->vdev.set_config = virtio_net_set_config;
A
aliguori 已提交
916 917
    n->vdev.get_features = virtio_net_get_features;
    n->vdev.set_features = virtio_net_set_features;
918
    n->vdev.bad_features = virtio_net_bad_features;
919
    n->vdev.reset = virtio_net_reset;
920
    n->vdev.set_status = virtio_net_set_status;
A
aliguori 已提交
921 922
    n->rx_vq = virtio_add_queue(&n->vdev, 256, virtio_net_handle_rx);
    n->tx_vq = virtio_add_queue(&n->vdev, 256, virtio_net_handle_tx);
923
    n->ctrl_vq = virtio_add_queue(&n->vdev, 64, virtio_net_handle_ctrl);
924
    qemu_macaddr_default_if_unset(&conf->macaddr);
925
    memcpy(&n->mac[0], &conf->macaddr, sizeof(n->mac));
926
    n->status = VIRTIO_NET_S_LINK_UP;
A
aliguori 已提交
927

M
Mark McLoughlin 已提交
928 929 930
    n->nic = qemu_new_nic(&net_virtio_info, conf, dev->info->name, dev->id, n);

    qemu_format_nic_info_str(&n->nic->nc, conf->macaddr.a);
931

A
aliguori 已提交
932 933 934
    n->tx_timer = qemu_new_timer(vm_clock, virtio_net_tx_timer, n);
    n->tx_timer_active = 0;
    n->mergeable_rx_bufs = 0;
935
    n->promisc = 1; /* for compatibility */
A
aliguori 已提交
936

937 938
    n->mac_table.macs = qemu_mallocz(MAC_TABLE_ENTRIES * ETH_ALEN);

939 940
    n->vlans = qemu_mallocz(MAX_VLAN >> 3);

941 942
    n->qdev = dev;
    register_savevm(dev, "virtio-net", -1, VIRTIO_NET_VM_VERSION,
A
aliguori 已提交
943
                    virtio_net_save, virtio_net_load, n);
944
    n->vmstate = qemu_add_vm_change_state_handler(virtio_net_vmstate_change, n);
P
Paul Brook 已提交
945

P
Paul Brook 已提交
946
    return &n->vdev;
P
Paul Brook 已提交
947
}
948 949 950 951

void virtio_net_exit(VirtIODevice *vdev)
{
    VirtIONet *n = DO_UPCAST(VirtIONet, vdev, vdev);
952 953 954 955 956
    qemu_del_vm_change_state_handler(n->vmstate);

    if (n->vhost_started) {
        vhost_net_stop(tap_get_vhost_net(n->nic->nc.peer), vdev);
    }
957

M
Mark McLoughlin 已提交
958
    qemu_purge_queued_packets(&n->nic->nc);
959

960
    unregister_savevm(n->qdev, "virtio-net", n);
961 962 963 964 965 966 967 968

    qemu_free(n->mac_table.macs);
    qemu_free(n->vlans);

    qemu_del_timer(n->tx_timer);
    qemu_free_timer(n->tx_timer);

    virtio_cleanup(&n->vdev);
M
Mark McLoughlin 已提交
969
    qemu_del_vlan_client(&n->nic->nc);
970
}