virtio-net.c 41.7 KB
Newer Older
A
aliguori 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13
/*
 * Virtio Network Device
 *
 * Copyright IBM, Corp. 2007
 *
 * Authors:
 *  Anthony Liguori   <aliguori@us.ibm.com>
 *
 * This work is licensed under the terms of the GNU GPL, version 2.  See
 * the COPYING file in the top-level directory.
 *
 */

14
#include "qemu/iov.h"
P
Paolo Bonzini 已提交
15
#include "hw/virtio/virtio.h"
P
Paolo Bonzini 已提交
16
#include "net/net.h"
17
#include "net/checksum.h"
18
#include "net/tap.h"
19 20
#include "qemu/error-report.h"
#include "qemu/timer.h"
P
Paolo Bonzini 已提交
21 22
#include "hw/virtio/virtio-net.h"
#include "net/vhost_net.h"
23
#include "hw/virtio/virtio-bus.h"
A
aliguori 已提交
24

25
#define VIRTIO_NET_VM_VERSION    11
26

27
#define MAC_TABLE_ENTRIES    64
28
#define MAX_VLAN    (1 << 12)   /* Per 802.1Q definition */
29

30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51
/*
 * Calculate the number of bytes up to and including the given 'field' of
 * 'container'.
 */
#define endof(container, field) \
    (offsetof(container, field) + sizeof(((container *)0)->field))

typedef struct VirtIOFeature {
    uint32_t flags;
    size_t end;
} VirtIOFeature;

static VirtIOFeature feature_sizes[] = {
    {.flags = 1 << VIRTIO_NET_F_MAC,
     .end = endof(struct virtio_net_config, mac)},
    {.flags = 1 << VIRTIO_NET_F_STATUS,
     .end = endof(struct virtio_net_config, status)},
    {.flags = 1 << VIRTIO_NET_F_MQ,
     .end = endof(struct virtio_net_config, max_virtqueue_pairs)},
    {}
};

J
Jason Wang 已提交
52
static VirtIONetQueue *virtio_net_get_subqueue(NetClientState *nc)
53 54 55
{
    VirtIONet *n = qemu_get_nic_opaque(nc);

J
Jason Wang 已提交
56
    return &n->vqs[nc->queue_index];
57
}
J
Jason Wang 已提交
58 59 60 61 62 63

static int vq2q(int queue_index)
{
    return queue_index / 2;
}

A
aliguori 已提交
64 65 66 67
/* TODO
 * - we could suppress RX interrupt if we were so inclined.
 */

68
static void virtio_net_get_config(VirtIODevice *vdev, uint8_t *config)
A
aliguori 已提交
69
{
70
    VirtIONet *n = VIRTIO_NET(vdev);
A
aliguori 已提交
71 72
    struct virtio_net_config netcfg;

73
    stw_p(&netcfg.status, n->status);
J
Jason Wang 已提交
74
    stw_p(&netcfg.max_virtqueue_pairs, n->max_queues);
75
    memcpy(netcfg.mac, n->mac, ETH_ALEN);
76
    memcpy(config, &netcfg, n->config_size);
A
aliguori 已提交
77 78
}

79 80
static void virtio_net_set_config(VirtIODevice *vdev, const uint8_t *config)
{
81
    VirtIONet *n = VIRTIO_NET(vdev);
82
    struct virtio_net_config netcfg = {};
83

84
    memcpy(&netcfg, config, n->config_size);
85

86
    if (!(vdev->guest_features >> VIRTIO_NET_F_CTRL_MAC_ADDR & 1) &&
87
        memcmp(netcfg.mac, n->mac, ETH_ALEN)) {
88
        memcpy(n->mac, netcfg.mac, ETH_ALEN);
J
Jason Wang 已提交
89
        qemu_format_nic_info_str(qemu_get_queue(n->nic), n->mac);
90 91 92
    }
}

93 94
static bool virtio_net_started(VirtIONet *n, uint8_t status)
{
95
    VirtIODevice *vdev = VIRTIO_DEVICE(n);
96
    return (status & VIRTIO_CONFIG_S_DRIVER_OK) &&
97
        (n->status & VIRTIO_NET_S_LINK_UP) && vdev->vm_running;
98 99 100
}

static void virtio_net_vhost_status(VirtIONet *n, uint8_t status)
101
{
102
    VirtIODevice *vdev = VIRTIO_DEVICE(n);
J
Jason Wang 已提交
103
    NetClientState *nc = qemu_get_queue(n->nic);
J
Jason Wang 已提交
104
    int queues = n->multiqueue ? n->max_queues : 1;
J
Jason Wang 已提交
105 106

    if (!nc->peer) {
107 108
        return;
    }
J
Jason Wang 已提交
109
    if (nc->peer->info->type != NET_CLIENT_OPTIONS_KIND_TAP) {
110 111 112
        return;
    }

J
Jason Wang 已提交
113
    if (!tap_get_vhost_net(nc->peer)) {
114 115
        return;
    }
J
Jason Wang 已提交
116

117
    if (!!n->vhost_started == virtio_net_started(n, status) &&
J
Jason Wang 已提交
118
                              !nc->peer->link_down) {
119 120 121
        return;
    }
    if (!n->vhost_started) {
122
        int r;
123
        if (!vhost_net_query(tap_get_vhost_net(nc->peer), vdev)) {
124 125
            return;
        }
126
        n->vhost_started = 1;
127
        r = vhost_net_start(vdev, n->nic->ncs, queues);
128
        if (r < 0) {
129 130
            error_report("unable to start vhost net: %d: "
                         "falling back on userspace virtio", -r);
131
            n->vhost_started = 0;
132 133
        }
    } else {
134
        vhost_net_stop(vdev, n->nic->ncs, queues);
135 136 137 138
        n->vhost_started = 0;
    }
}

139 140
static void virtio_net_set_status(struct VirtIODevice *vdev, uint8_t status)
{
141
    VirtIONet *n = VIRTIO_NET(vdev);
J
Jason Wang 已提交
142 143 144
    VirtIONetQueue *q;
    int i;
    uint8_t queue_status;
145 146 147

    virtio_net_vhost_status(n, status);

J
Jason Wang 已提交
148 149
    for (i = 0; i < n->max_queues; i++) {
        q = &n->vqs[i];
150

J
Jason Wang 已提交
151 152
        if ((!n->multiqueue && i != 0) || i >= n->curr_queues) {
            queue_status = 0;
153
        } else {
J
Jason Wang 已提交
154
            queue_status = status;
155
        }
J
Jason Wang 已提交
156 157 158 159 160 161 162 163 164 165 166 167

        if (!q->tx_waiting) {
            continue;
        }

        if (virtio_net_started(n, queue_status) && !n->vhost_started) {
            if (q->tx_timer) {
                qemu_mod_timer(q->tx_timer,
                               qemu_get_clock_ns(vm_clock) + n->tx_timeout);
            } else {
                qemu_bh_schedule(q->tx_bh);
            }
168
        } else {
J
Jason Wang 已提交
169 170 171 172 173
            if (q->tx_timer) {
                qemu_del_timer(q->tx_timer);
            } else {
                qemu_bh_cancel(q->tx_bh);
            }
174 175 176 177
        }
    }
}

178
static void virtio_net_set_link_status(NetClientState *nc)
179
{
J
Jason Wang 已提交
180
    VirtIONet *n = qemu_get_nic_opaque(nc);
181
    VirtIODevice *vdev = VIRTIO_DEVICE(n);
182 183
    uint16_t old_status = n->status;

M
Mark McLoughlin 已提交
184
    if (nc->link_down)
185 186 187 188 189
        n->status &= ~VIRTIO_NET_S_LINK_UP;
    else
        n->status |= VIRTIO_NET_S_LINK_UP;

    if (n->status != old_status)
190
        virtio_notify_config(vdev);
191

192
    virtio_net_set_status(vdev, vdev->status);
193 194
}

195 196
static void virtio_net_reset(VirtIODevice *vdev)
{
197
    VirtIONet *n = VIRTIO_NET(vdev);
198 199 200 201

    /* Reset back to compatibility mode */
    n->promisc = 1;
    n->allmulti = 0;
202 203 204 205
    n->alluni = 0;
    n->nomulti = 0;
    n->nouni = 0;
    n->nobcast = 0;
J
Jason Wang 已提交
206 207
    /* multiqueue is disabled by default */
    n->curr_queues = 1;
208

209
    /* Flush any MAC and VLAN filter table state */
210
    n->mac_table.in_use = 0;
211
    n->mac_table.first_multi = 0;
212 213
    n->mac_table.multi_overflow = 0;
    n->mac_table.uni_overflow = 0;
214
    memset(n->mac_table.macs, 0, MAC_TABLE_ENTRIES * ETH_ALEN);
215
    memcpy(&n->mac[0], &n->nic->conf->macaddr, sizeof(n->mac));
216
    memset(n->vlans, 0, MAX_VLAN >> 3);
217 218
}

219
static void peer_test_vnet_hdr(VirtIONet *n)
M
Mark McLoughlin 已提交
220
{
J
Jason Wang 已提交
221 222
    NetClientState *nc = qemu_get_queue(n->nic);
    if (!nc->peer) {
223
        return;
J
Jason Wang 已提交
224
    }
M
Mark McLoughlin 已提交
225

J
Jason Wang 已提交
226
    if (nc->peer->info->type != NET_CLIENT_OPTIONS_KIND_TAP) {
227
        return;
J
Jason Wang 已提交
228
    }
M
Mark McLoughlin 已提交
229

J
Jason Wang 已提交
230
    n->has_vnet_hdr = tap_has_vnet_hdr(nc->peer);
231
}
M
Mark McLoughlin 已提交
232

233 234
static int peer_has_vnet_hdr(VirtIONet *n)
{
M
Mark McLoughlin 已提交
235 236 237
    return n->has_vnet_hdr;
}

238 239 240 241 242
static int peer_has_ufo(VirtIONet *n)
{
    if (!peer_has_vnet_hdr(n))
        return 0;

J
Jason Wang 已提交
243
    n->has_ufo = tap_has_ufo(qemu_get_queue(n->nic)->peer);
244 245 246 247

    return n->has_ufo;
}

248 249
static void virtio_net_set_mrg_rx_bufs(VirtIONet *n, int mergeable_rx_bufs)
{
J
Jason Wang 已提交
250 251 252
    int i;
    NetClientState *nc;

253 254 255 256 257
    n->mergeable_rx_bufs = mergeable_rx_bufs;

    n->guest_hdr_len = n->mergeable_rx_bufs ?
        sizeof(struct virtio_net_hdr_mrg_rxbuf) : sizeof(struct virtio_net_hdr);

J
Jason Wang 已提交
258 259 260 261 262 263 264 265
    for (i = 0; i < n->max_queues; i++) {
        nc = qemu_get_subqueue(n->nic, i);

        if (peer_has_vnet_hdr(n) &&
            tap_has_vnet_hdr_len(nc->peer, n->guest_hdr_len)) {
            tap_set_vnet_hdr_len(nc->peer, n->guest_hdr_len);
            n->host_hdr_len = n->guest_hdr_len;
        }
266 267 268
    }
}

J
Jason Wang 已提交
269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311
static int peer_attach(VirtIONet *n, int index)
{
    NetClientState *nc = qemu_get_subqueue(n->nic, index);

    if (!nc->peer) {
        return 0;
    }

    if (nc->peer->info->type != NET_CLIENT_OPTIONS_KIND_TAP) {
        return 0;
    }

    return tap_enable(nc->peer);
}

static int peer_detach(VirtIONet *n, int index)
{
    NetClientState *nc = qemu_get_subqueue(n->nic, index);

    if (!nc->peer) {
        return 0;
    }

    if (nc->peer->info->type !=  NET_CLIENT_OPTIONS_KIND_TAP) {
        return 0;
    }

    return tap_disable(nc->peer);
}

static void virtio_net_set_queues(VirtIONet *n)
{
    int i;

    for (i = 0; i < n->max_queues; i++) {
        if (i < n->curr_queues) {
            assert(!peer_attach(n, i));
        } else {
            assert(!peer_detach(n, i));
        }
    }
}

J
Jason Wang 已提交
312
static void virtio_net_set_multiqueue(VirtIONet *n, int multiqueue);
J
Jason Wang 已提交
313

314
static uint32_t virtio_net_get_features(VirtIODevice *vdev, uint32_t features)
A
aliguori 已提交
315
{
316
    VirtIONet *n = VIRTIO_NET(vdev);
J
Jason Wang 已提交
317
    NetClientState *nc = qemu_get_queue(n->nic);
A
aliguori 已提交
318

319 320
    features |= (1 << VIRTIO_NET_F_MAC);

321
    if (!peer_has_vnet_hdr(n)) {
322 323 324 325 326 327 328 329 330 331
        features &= ~(0x1 << VIRTIO_NET_F_CSUM);
        features &= ~(0x1 << VIRTIO_NET_F_HOST_TSO4);
        features &= ~(0x1 << VIRTIO_NET_F_HOST_TSO6);
        features &= ~(0x1 << VIRTIO_NET_F_HOST_ECN);

        features &= ~(0x1 << VIRTIO_NET_F_GUEST_CSUM);
        features &= ~(0x1 << VIRTIO_NET_F_GUEST_TSO4);
        features &= ~(0x1 << VIRTIO_NET_F_GUEST_TSO6);
        features &= ~(0x1 << VIRTIO_NET_F_GUEST_ECN);
    }
M
Mark McLoughlin 已提交
332

333 334 335
    if (!peer_has_vnet_hdr(n) || !peer_has_ufo(n)) {
        features &= ~(0x1 << VIRTIO_NET_F_GUEST_UFO);
        features &= ~(0x1 << VIRTIO_NET_F_HOST_UFO);
M
Mark McLoughlin 已提交
336 337
    }

J
Jason Wang 已提交
338
    if (!nc->peer || nc->peer->info->type != NET_CLIENT_OPTIONS_KIND_TAP) {
339 340
        return features;
    }
J
Jason Wang 已提交
341
    if (!tap_get_vhost_net(nc->peer)) {
342 343
        return features;
    }
J
Jason Wang 已提交
344
    return vhost_net_get_features(tap_get_vhost_net(nc->peer), features);
A
aliguori 已提交
345 346
}

347 348 349 350 351 352 353
static uint32_t virtio_net_bad_features(VirtIODevice *vdev)
{
    uint32_t features = 0;

    /* Linux kernel 2.6.25.  It understood MAC (as everyone must),
     * but also these: */
    features |= (1 << VIRTIO_NET_F_MAC);
354 355 356 357
    features |= (1 << VIRTIO_NET_F_CSUM);
    features |= (1 << VIRTIO_NET_F_HOST_TSO4);
    features |= (1 << VIRTIO_NET_F_HOST_TSO6);
    features |= (1 << VIRTIO_NET_F_HOST_ECN);
358

359
    return features;
360 361
}

A
aliguori 已提交
362 363
static void virtio_net_set_features(VirtIODevice *vdev, uint32_t features)
{
364
    VirtIONet *n = VIRTIO_NET(vdev);
J
Jason Wang 已提交
365 366
    int i;

J
Jason Wang 已提交
367
    virtio_net_set_multiqueue(n, !!(features & (1 << VIRTIO_NET_F_MQ)));
A
aliguori 已提交
368

369
    virtio_net_set_mrg_rx_bufs(n, !!(features & (1 << VIRTIO_NET_F_MRG_RXBUF)));
370 371

    if (n->has_vnet_hdr) {
J
Jason Wang 已提交
372
        tap_set_offload(qemu_get_subqueue(n->nic, 0)->peer,
373 374 375
                        (features >> VIRTIO_NET_F_GUEST_CSUM) & 1,
                        (features >> VIRTIO_NET_F_GUEST_TSO4) & 1,
                        (features >> VIRTIO_NET_F_GUEST_TSO6) & 1,
376 377
                        (features >> VIRTIO_NET_F_GUEST_ECN)  & 1,
                        (features >> VIRTIO_NET_F_GUEST_UFO)  & 1);
378
    }
J
Jason Wang 已提交
379 380 381 382 383 384 385 386 387 388 389

    for (i = 0;  i < n->max_queues; i++) {
        NetClientState *nc = qemu_get_subqueue(n->nic, i);

        if (!nc->peer || nc->peer->info->type != NET_CLIENT_OPTIONS_KIND_TAP) {
            continue;
        }
        if (!tap_get_vhost_net(nc->peer)) {
            continue;
        }
        vhost_net_ack_features(tap_get_vhost_net(nc->peer), features);
D
David L Stevens 已提交
390
    }
A
aliguori 已提交
391 392
}

393
static int virtio_net_handle_rx_mode(VirtIONet *n, uint8_t cmd,
394
                                     struct iovec *iov, unsigned int iov_cnt)
395 396
{
    uint8_t on;
397
    size_t s;
398

399 400 401
    s = iov_to_buf(iov, iov_cnt, 0, &on, sizeof(on));
    if (s != sizeof(on)) {
        return VIRTIO_NET_ERR;
402 403
    }

A
Amos Kong 已提交
404
    if (cmd == VIRTIO_NET_CTRL_RX_PROMISC) {
405
        n->promisc = on;
A
Amos Kong 已提交
406
    } else if (cmd == VIRTIO_NET_CTRL_RX_ALLMULTI) {
407
        n->allmulti = on;
A
Amos Kong 已提交
408
    } else if (cmd == VIRTIO_NET_CTRL_RX_ALLUNI) {
409
        n->alluni = on;
A
Amos Kong 已提交
410
    } else if (cmd == VIRTIO_NET_CTRL_RX_NOMULTI) {
411
        n->nomulti = on;
A
Amos Kong 已提交
412
    } else if (cmd == VIRTIO_NET_CTRL_RX_NOUNI) {
413
        n->nouni = on;
A
Amos Kong 已提交
414
    } else if (cmd == VIRTIO_NET_CTRL_RX_NOBCAST) {
415
        n->nobcast = on;
416
    } else {
417
        return VIRTIO_NET_ERR;
418
    }
419 420 421 422

    return VIRTIO_NET_OK;
}

423
static int virtio_net_handle_mac(VirtIONet *n, uint8_t cmd,
424
                                 struct iovec *iov, unsigned int iov_cnt)
425 426
{
    struct virtio_net_ctrl_mac mac_data;
427
    size_t s;
428

429 430 431 432 433 434
    if (cmd == VIRTIO_NET_CTRL_MAC_ADDR_SET) {
        if (iov_size(iov, iov_cnt) != sizeof(n->mac)) {
            return VIRTIO_NET_ERR;
        }
        s = iov_to_buf(iov, iov_cnt, 0, &n->mac, sizeof(n->mac));
        assert(s == sizeof(n->mac));
J
Jason Wang 已提交
435
        qemu_format_nic_info_str(qemu_get_queue(n->nic), n->mac);
436 437 438
        return VIRTIO_NET_OK;
    }

439
    if (cmd != VIRTIO_NET_CTRL_MAC_TABLE_SET) {
440
        return VIRTIO_NET_ERR;
441
    }
442 443

    n->mac_table.in_use = 0;
444
    n->mac_table.first_multi = 0;
445 446
    n->mac_table.uni_overflow = 0;
    n->mac_table.multi_overflow = 0;
447 448
    memset(n->mac_table.macs, 0, MAC_TABLE_ENTRIES * ETH_ALEN);

449 450 451 452 453 454 455
    s = iov_to_buf(iov, iov_cnt, 0, &mac_data.entries,
                   sizeof(mac_data.entries));
    mac_data.entries = ldl_p(&mac_data.entries);
    if (s != sizeof(mac_data.entries)) {
        return VIRTIO_NET_ERR;
    }
    iov_discard_front(&iov, &iov_cnt, s);
456

457
    if (mac_data.entries * ETH_ALEN > iov_size(iov, iov_cnt)) {
458
        return VIRTIO_NET_ERR;
459
    }
460 461

    if (mac_data.entries <= MAC_TABLE_ENTRIES) {
462 463 464 465 466
        s = iov_to_buf(iov, iov_cnt, 0, n->mac_table.macs,
                       mac_data.entries * ETH_ALEN);
        if (s != mac_data.entries * ETH_ALEN) {
            return VIRTIO_NET_ERR;
        }
467 468
        n->mac_table.in_use += mac_data.entries;
    } else {
469
        n->mac_table.uni_overflow = 1;
470 471
    }

472 473
    iov_discard_front(&iov, &iov_cnt, mac_data.entries * ETH_ALEN);

474 475
    n->mac_table.first_multi = n->mac_table.in_use;

476 477 478 479 480 481 482 483
    s = iov_to_buf(iov, iov_cnt, 0, &mac_data.entries,
                   sizeof(mac_data.entries));
    mac_data.entries = ldl_p(&mac_data.entries);
    if (s != sizeof(mac_data.entries)) {
        return VIRTIO_NET_ERR;
    }

    iov_discard_front(&iov, &iov_cnt, s);
484

485
    if (mac_data.entries * ETH_ALEN != iov_size(iov, iov_cnt)) {
486
        return VIRTIO_NET_ERR;
487
    }
488

489 490 491 492 493
    if (n->mac_table.in_use + mac_data.entries <= MAC_TABLE_ENTRIES) {
        s = iov_to_buf(iov, iov_cnt, 0, n->mac_table.macs,
                       mac_data.entries * ETH_ALEN);
        if (s != mac_data.entries * ETH_ALEN) {
            return VIRTIO_NET_ERR;
494
        }
495 496 497
        n->mac_table.in_use += mac_data.entries;
    } else {
        n->mac_table.multi_overflow = 1;
498 499 500 501 502
    }

    return VIRTIO_NET_OK;
}

503
static int virtio_net_handle_vlan_table(VirtIONet *n, uint8_t cmd,
504
                                        struct iovec *iov, unsigned int iov_cnt)
505 506
{
    uint16_t vid;
507
    size_t s;
508

509 510 511
    s = iov_to_buf(iov, iov_cnt, 0, &vid, sizeof(vid));
    vid = lduw_p(&vid);
    if (s != sizeof(vid)) {
512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527
        return VIRTIO_NET_ERR;
    }

    if (vid >= MAX_VLAN)
        return VIRTIO_NET_ERR;

    if (cmd == VIRTIO_NET_CTRL_VLAN_ADD)
        n->vlans[vid >> 5] |= (1U << (vid & 0x1f));
    else if (cmd == VIRTIO_NET_CTRL_VLAN_DEL)
        n->vlans[vid >> 5] &= ~(1U << (vid & 0x1f));
    else
        return VIRTIO_NET_ERR;

    return VIRTIO_NET_OK;
}

J
Jason Wang 已提交
528
static int virtio_net_handle_mq(VirtIONet *n, uint8_t cmd,
529
                                struct iovec *iov, unsigned int iov_cnt)
J
Jason Wang 已提交
530
{
531
    VirtIODevice *vdev = VIRTIO_DEVICE(n);
532 533 534
    struct virtio_net_ctrl_mq mq;
    size_t s;
    uint16_t queues;
J
Jason Wang 已提交
535

536 537
    s = iov_to_buf(iov, iov_cnt, 0, &mq, sizeof(mq));
    if (s != sizeof(mq)) {
J
Jason Wang 已提交
538 539 540 541 542 543 544
        return VIRTIO_NET_ERR;
    }

    if (cmd != VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET) {
        return VIRTIO_NET_ERR;
    }

545
    queues = lduw_p(&mq.virtqueue_pairs);
J
Jason Wang 已提交
546

547 548 549
    if (queues < VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MIN ||
        queues > VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MAX ||
        queues > n->max_queues ||
J
Jason Wang 已提交
550 551 552 553
        !n->multiqueue) {
        return VIRTIO_NET_ERR;
    }

554
    n->curr_queues = queues;
J
Jason Wang 已提交
555 556
    /* stop the backend before changing the number of queues to avoid handling a
     * disabled queue */
557
    virtio_net_set_status(vdev, vdev->status);
J
Jason Wang 已提交
558 559 560 561
    virtio_net_set_queues(n);

    return VIRTIO_NET_OK;
}
562 563
static void virtio_net_handle_ctrl(VirtIODevice *vdev, VirtQueue *vq)
{
564
    VirtIONet *n = VIRTIO_NET(vdev);
565 566 567
    struct virtio_net_ctrl_hdr ctrl;
    virtio_net_ctrl_ack status = VIRTIO_NET_ERR;
    VirtQueueElement elem;
568 569 570
    size_t s;
    struct iovec *iov;
    unsigned int iov_cnt;
571 572

    while (virtqueue_pop(vq, &elem)) {
573 574
        if (iov_size(elem.in_sg, elem.in_num) < sizeof(status) ||
            iov_size(elem.out_sg, elem.out_num) < sizeof(ctrl)) {
575
            error_report("virtio-net ctrl missing headers");
576 577 578
            exit(1);
        }

579 580 581 582 583 584
        iov = elem.out_sg;
        iov_cnt = elem.out_num;
        s = iov_to_buf(iov, iov_cnt, 0, &ctrl, sizeof(ctrl));
        iov_discard_front(&iov, &iov_cnt, sizeof(ctrl));
        if (s != sizeof(ctrl)) {
            status = VIRTIO_NET_ERR;
A
Amos Kong 已提交
585
        } else if (ctrl.class == VIRTIO_NET_CTRL_RX) {
586 587 588 589 590
            status = virtio_net_handle_rx_mode(n, ctrl.cmd, iov, iov_cnt);
        } else if (ctrl.class == VIRTIO_NET_CTRL_MAC) {
            status = virtio_net_handle_mac(n, ctrl.cmd, iov, iov_cnt);
        } else if (ctrl.class == VIRTIO_NET_CTRL_VLAN) {
            status = virtio_net_handle_vlan_table(n, ctrl.cmd, iov, iov_cnt);
J
Jason Wang 已提交
591
        } else if (ctrl.class == VIRTIO_NET_CTRL_MQ) {
592
            status = virtio_net_handle_mq(n, ctrl.cmd, iov, iov_cnt);
593 594
        }

595 596
        s = iov_from_buf(elem.in_sg, elem.in_num, 0, &status, sizeof(status));
        assert(s == sizeof(status));
597 598 599 600 601 602

        virtqueue_push(vq, &elem, sizeof(status));
        virtio_notify(vdev, vq);
    }
}

A
aliguori 已提交
603 604 605 606
/* RX */

static void virtio_net_handle_rx(VirtIODevice *vdev, VirtQueue *vq)
{
607
    VirtIONet *n = VIRTIO_NET(vdev);
J
Jason Wang 已提交
608
    int queue_index = vq2q(virtio_get_queue_index(vq));
609

J
Jason Wang 已提交
610
    qemu_flush_queued_packets(qemu_get_subqueue(n->nic, queue_index));
A
aliguori 已提交
611 612
}

613
static int virtio_net_can_receive(NetClientState *nc)
A
aliguori 已提交
614
{
J
Jason Wang 已提交
615
    VirtIONet *n = qemu_get_nic_opaque(nc);
616
    VirtIODevice *vdev = VIRTIO_DEVICE(n);
J
Jason Wang 已提交
617
    VirtIONetQueue *q = virtio_net_get_subqueue(nc);
618

619
    if (!vdev->vm_running) {
620 621
        return 0;
    }
622

J
Jason Wang 已提交
623 624 625 626
    if (nc->queue_index >= n->curr_queues) {
        return 0;
    }

627
    if (!virtio_queue_ready(q->rx_vq) ||
628
        !(vdev->status & VIRTIO_CONFIG_S_DRIVER_OK)) {
A
aliguori 已提交
629
        return 0;
630
    }
A
aliguori 已提交
631

632 633 634
    return 1;
}

635
static int virtio_net_has_buffers(VirtIONetQueue *q, int bufsize)
636
{
637 638
    VirtIONet *n = q->n;
    if (virtio_queue_empty(q->rx_vq) ||
A
aliguori 已提交
639
        (n->mergeable_rx_bufs &&
640 641
         !virtqueue_avail_bytes(q->rx_vq, bufsize, 0))) {
        virtio_queue_set_notification(q->rx_vq, 1);
642 643 644 645 646

        /* To avoid a race condition where the guest has made some buffers
         * available after the above check but before notification was
         * enabled, check for available buffers again.
         */
647
        if (virtio_queue_empty(q->rx_vq) ||
648
            (n->mergeable_rx_bufs &&
649
             !virtqueue_avail_bytes(q->rx_vq, bufsize, 0))) {
650
            return 0;
651
        }
A
aliguori 已提交
652 653
    }

654
    virtio_queue_set_notification(q->rx_vq, 0);
A
aliguori 已提交
655 656 657
    return 1;
}

A
Anthony Liguori 已提交
658 659 660 661 662 663 664 665 666 667 668 669 670 671 672
/* dhclient uses AF_PACKET but doesn't pass auxdata to the kernel so
 * it never finds out that the packets don't have valid checksums.  This
 * causes dhclient to get upset.  Fedora's carried a patch for ages to
 * fix this with Xen but it hasn't appeared in an upstream release of
 * dhclient yet.
 *
 * To avoid breaking existing guests, we catch udp packets and add
 * checksums.  This is terrible but it's better than hacking the guest
 * kernels.
 *
 * N.B. if we introduce a zero-copy API, this operation is no longer free so
 * we should provide a mechanism to disable it to avoid polluting the host
 * cache.
 */
static void work_around_broken_dhclient(struct virtio_net_hdr *hdr,
M
Michael S. Tsirkin 已提交
673
                                        uint8_t *buf, size_t size)
A
Anthony Liguori 已提交
674 675 676 677 678 679
{
    if ((hdr->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM) && /* missing csum */
        (size > 27 && size < 1500) && /* normal sized MTU */
        (buf[12] == 0x08 && buf[13] == 0x00) && /* ethertype == IPv4 */
        (buf[23] == 17) && /* ip.protocol == UDP */
        (buf[34] == 0 && buf[35] == 67)) { /* udp.srcport == bootps */
M
Michael S. Tsirkin 已提交
680
        net_checksum_calculate(buf, size);
A
Anthony Liguori 已提交
681 682 683 684
        hdr->flags &= ~VIRTIO_NET_HDR_F_NEEDS_CSUM;
    }
}

685 686
static void receive_header(VirtIONet *n, const struct iovec *iov, int iov_cnt,
                           const void *buf, size_t size)
A
aliguori 已提交
687
{
M
Mark McLoughlin 已提交
688
    if (n->has_vnet_hdr) {
M
Michael S. Tsirkin 已提交
689 690
        /* FIXME this cast is evil */
        void *wbuf = (void *)buf;
691 692 693
        work_around_broken_dhclient(wbuf, wbuf + n->host_hdr_len,
                                    size - n->host_hdr_len);
        iov_from_buf(iov, iov_cnt, 0, buf, sizeof(struct virtio_net_hdr));
M
Michael S. Tsirkin 已提交
694 695 696 697 698 699
    } else {
        struct virtio_net_hdr hdr = {
            .flags = 0,
            .gso_type = VIRTIO_NET_HDR_GSO_NONE
        };
        iov_from_buf(iov, iov_cnt, 0, &hdr, sizeof hdr);
M
Mark McLoughlin 已提交
700
    }
A
aliguori 已提交
701 702
}

703 704 705
static int receive_filter(VirtIONet *n, const uint8_t *buf, int size)
{
    static const uint8_t bcast[] = {0xff, 0xff, 0xff, 0xff, 0xff, 0xff};
706
    static const uint8_t vlan[] = {0x81, 0x00};
707
    uint8_t *ptr = (uint8_t *)buf;
708
    int i;
709 710 711 712

    if (n->promisc)
        return 1;

713
    ptr += n->host_hdr_len;
M
Mark McLoughlin 已提交
714

715 716 717 718 719 720
    if (!memcmp(&ptr[12], vlan, sizeof(vlan))) {
        int vid = be16_to_cpup((uint16_t *)(ptr + 14)) & 0xfff;
        if (!(n->vlans[vid >> 5] & (1U << (vid & 0x1f))))
            return 0;
    }

721 722
    if (ptr[0] & 1) { // multicast
        if (!memcmp(ptr, bcast, sizeof(bcast))) {
723 724 725
            return !n->nobcast;
        } else if (n->nomulti) {
            return 0;
726
        } else if (n->allmulti || n->mac_table.multi_overflow) {
727 728
            return 1;
        }
729 730 731 732 733 734

        for (i = n->mac_table.first_multi; i < n->mac_table.in_use; i++) {
            if (!memcmp(ptr, &n->mac_table.macs[i * ETH_ALEN], ETH_ALEN)) {
                return 1;
            }
        }
735
    } else { // unicast
736 737 738
        if (n->nouni) {
            return 0;
        } else if (n->alluni || n->mac_table.uni_overflow) {
739 740
            return 1;
        } else if (!memcmp(ptr, n->mac, ETH_ALEN)) {
741 742
            return 1;
        }
743

744 745 746 747 748
        for (i = 0; i < n->mac_table.first_multi; i++) {
            if (!memcmp(ptr, &n->mac_table.macs[i * ETH_ALEN], ETH_ALEN)) {
                return 1;
            }
        }
749 750
    }

751 752 753
    return 0;
}

754
static ssize_t virtio_net_receive(NetClientState *nc, const uint8_t *buf, size_t size)
A
aliguori 已提交
755
{
J
Jason Wang 已提交
756
    VirtIONet *n = qemu_get_nic_opaque(nc);
J
Jason Wang 已提交
757
    VirtIONetQueue *q = virtio_net_get_subqueue(nc);
758
    VirtIODevice *vdev = VIRTIO_DEVICE(n);
759 760 761
    struct iovec mhdr_sg[VIRTQUEUE_MAX_SIZE];
    struct virtio_net_hdr_mrg_rxbuf mhdr;
    unsigned mhdr_cnt = 0;
M
Michael S. Tsirkin 已提交
762
    size_t offset, i, guest_offset;
A
aliguori 已提交
763

J
Jason Wang 已提交
764
    if (!virtio_net_can_receive(nc)) {
765
        return -1;
J
Jason Wang 已提交
766
    }
767

768
    /* hdr_len refers to the header we supply to the guest */
769
    if (!virtio_net_has_buffers(q, size + n->guest_hdr_len - n->host_hdr_len)) {
770
        return 0;
771
    }
A
aliguori 已提交
772

773
    if (!receive_filter(n, buf, size))
774
        return size;
775

A
aliguori 已提交
776 777 778 779 780
    offset = i = 0;

    while (offset < size) {
        VirtQueueElement elem;
        int len, total;
M
Michael S. Tsirkin 已提交
781
        const struct iovec *sg = elem.in_sg;
A
aliguori 已提交
782

A
Amit Shah 已提交
783
        total = 0;
A
aliguori 已提交
784

785
        if (virtqueue_pop(q->rx_vq, &elem) == 0) {
A
aliguori 已提交
786
            if (i == 0)
787
                return -1;
788
            error_report("virtio-net unexpected empty queue: "
789
                    "i %zd mergeable %d offset %zd, size %zd, "
790
                    "guest hdr len %zd, host hdr len %zd guest features 0x%x",
791
                    i, n->mergeable_rx_bufs, offset, size,
792
                    n->guest_hdr_len, n->host_hdr_len, vdev->guest_features);
A
aliguori 已提交
793 794 795 796
            exit(1);
        }

        if (elem.in_num < 1) {
797
            error_report("virtio-net receive queue contains no in buffers");
A
aliguori 已提交
798 799 800 801
            exit(1);
        }

        if (i == 0) {
802
            assert(offset == 0);
803 804 805 806 807 808
            if (n->mergeable_rx_bufs) {
                mhdr_cnt = iov_copy(mhdr_sg, ARRAY_SIZE(mhdr_sg),
                                    sg, elem.in_num,
                                    offsetof(typeof(mhdr), num_buffers),
                                    sizeof(mhdr.num_buffers));
            }
A
aliguori 已提交
809

810 811
            receive_header(n, sg, elem.in_num, buf, size);
            offset = n->host_hdr_len;
812
            total += n->guest_hdr_len;
M
Michael S. Tsirkin 已提交
813 814 815
            guest_offset = n->guest_hdr_len;
        } else {
            guest_offset = 0;
A
aliguori 已提交
816 817 818
        }

        /* copy in packet.  ugh */
M
Michael S. Tsirkin 已提交
819
        len = iov_from_buf(sg, elem.in_num, guest_offset,
820
                           buf + offset, size - offset);
A
aliguori 已提交
821
        total += len;
822 823 824 825 826 827
        offset += len;
        /* If buffers can't be merged, at this point we
         * must have consumed the complete packet.
         * Otherwise, drop it. */
        if (!n->mergeable_rx_bufs && offset < size) {
#if 0
828 829 830 831
            error_report("virtio-net truncated non-mergeable packet: "
                         "i %zd mergeable %d offset %zd, size %zd, "
                         "guest hdr len %zd, host hdr len %zd",
                         i, n->mergeable_rx_bufs,
832
                         offset, size, n->guest_hdr_len, n->host_hdr_len);
833 834 835
#endif
            return size;
        }
A
aliguori 已提交
836 837

        /* signal other side */
838
        virtqueue_fill(q->rx_vq, &elem, total, i++);
A
aliguori 已提交
839 840
    }

841 842 843 844 845
    if (mhdr_cnt) {
        stw_p(&mhdr.num_buffers, i);
        iov_from_buf(mhdr_sg, mhdr_cnt,
                     0,
                     &mhdr.num_buffers, sizeof mhdr.num_buffers);
846
    }
A
aliguori 已提交
847

848
    virtqueue_flush(q->rx_vq, i);
849
    virtio_notify(vdev, q->rx_vq);
850 851

    return size;
A
aliguori 已提交
852 853
}

854
static int32_t virtio_net_flush_tx(VirtIONetQueue *q);
855

856
static void virtio_net_tx_complete(NetClientState *nc, ssize_t len)
857
{
J
Jason Wang 已提交
858
    VirtIONet *n = qemu_get_nic_opaque(nc);
J
Jason Wang 已提交
859
    VirtIONetQueue *q = virtio_net_get_subqueue(nc);
860
    VirtIODevice *vdev = VIRTIO_DEVICE(n);
861

862
    virtqueue_push(q->tx_vq, &q->async_tx.elem, 0);
863
    virtio_notify(vdev, q->tx_vq);
864

865
    q->async_tx.elem.out_num = q->async_tx.len = 0;
866

867 868
    virtio_queue_set_notification(q->tx_vq, 1);
    virtio_net_flush_tx(q);
869 870
}

A
aliguori 已提交
871
/* TX */
872
static int32_t virtio_net_flush_tx(VirtIONetQueue *q)
A
aliguori 已提交
873
{
874
    VirtIONet *n = q->n;
875
    VirtIODevice *vdev = VIRTIO_DEVICE(n);
A
aliguori 已提交
876
    VirtQueueElement elem;
877
    int32_t num_packets = 0;
J
Jason Wang 已提交
878
    int queue_index = vq2q(virtio_get_queue_index(q->tx_vq));
879
    if (!(vdev->status & VIRTIO_CONFIG_S_DRIVER_OK)) {
880 881
        return num_packets;
    }
A
aliguori 已提交
882

883
    assert(vdev->vm_running);
884

885 886
    if (q->async_tx.elem.out_num) {
        virtio_queue_set_notification(q->tx_vq, 0);
887
        return num_packets;
888 889
    }

890
    while (virtqueue_pop(q->tx_vq, &elem)) {
891
        ssize_t ret, len;
A
aliguori 已提交
892 893
        unsigned int out_num = elem.out_num;
        struct iovec *out_sg = &elem.out_sg[0];
894
        struct iovec sg[VIRTQUEUE_MAX_SIZE];
A
aliguori 已提交
895

M
Michael S. Tsirkin 已提交
896
        if (out_num < 1) {
897
            error_report("virtio-net header not in first element");
A
aliguori 已提交
898 899 900
            exit(1);
        }

901 902 903 904 905 906 907 908 909 910 911 912 913 914 915
        /*
         * If host wants to see the guest header as is, we can
         * pass it on unchanged. Otherwise, copy just the parts
         * that host is interested in.
         */
        assert(n->host_hdr_len <= n->guest_hdr_len);
        if (n->host_hdr_len != n->guest_hdr_len) {
            unsigned sg_num = iov_copy(sg, ARRAY_SIZE(sg),
                                       out_sg, out_num,
                                       0, n->host_hdr_len);
            sg_num += iov_copy(sg + sg_num, ARRAY_SIZE(sg) - sg_num,
                             out_sg, out_num,
                             n->guest_hdr_len, -1);
            out_num = sg_num;
            out_sg = sg;
A
aliguori 已提交
916 917
        }

M
Michael S. Tsirkin 已提交
918
        len = n->guest_hdr_len;
919

J
Jason Wang 已提交
920 921
        ret = qemu_sendv_packet_async(qemu_get_subqueue(n->nic, queue_index),
                                      out_sg, out_num, virtio_net_tx_complete);
922
        if (ret == 0) {
923 924 925
            virtio_queue_set_notification(q->tx_vq, 0);
            q->async_tx.elem = elem;
            q->async_tx.len  = len;
926
            return -EBUSY;
927 928 929
        }

        len += ret;
A
aliguori 已提交
930

931
        virtqueue_push(q->tx_vq, &elem, 0);
932
        virtio_notify(vdev, q->tx_vq);
933 934 935 936

        if (++num_packets >= n->tx_burst) {
            break;
        }
A
aliguori 已提交
937
    }
938
    return num_packets;
A
aliguori 已提交
939 940
}

941
static void virtio_net_handle_tx_timer(VirtIODevice *vdev, VirtQueue *vq)
A
aliguori 已提交
942
{
943
    VirtIONet *n = VIRTIO_NET(vdev);
J
Jason Wang 已提交
944
    VirtIONetQueue *q = &n->vqs[vq2q(virtio_get_queue_index(vq))];
A
aliguori 已提交
945

946
    /* This happens when device was stopped but VCPU wasn't. */
947
    if (!vdev->vm_running) {
948
        q->tx_waiting = 1;
949 950 951
        return;
    }

952
    if (q->tx_waiting) {
A
aliguori 已提交
953
        virtio_queue_set_notification(vq, 1);
954 955 956
        qemu_del_timer(q->tx_timer);
        q->tx_waiting = 0;
        virtio_net_flush_tx(q);
A
aliguori 已提交
957
    } else {
958
        qemu_mod_timer(q->tx_timer,
959
                       qemu_get_clock_ns(vm_clock) + n->tx_timeout);
960
        q->tx_waiting = 1;
A
aliguori 已提交
961 962 963 964
        virtio_queue_set_notification(vq, 0);
    }
}

965 966
static void virtio_net_handle_tx_bh(VirtIODevice *vdev, VirtQueue *vq)
{
967
    VirtIONet *n = VIRTIO_NET(vdev);
J
Jason Wang 已提交
968
    VirtIONetQueue *q = &n->vqs[vq2q(virtio_get_queue_index(vq))];
969

970
    if (unlikely(q->tx_waiting)) {
971 972
        return;
    }
973
    q->tx_waiting = 1;
974
    /* This happens when device was stopped but VCPU wasn't. */
975
    if (!vdev->vm_running) {
976 977
        return;
    }
978
    virtio_queue_set_notification(vq, 0);
979
    qemu_bh_schedule(q->tx_bh);
980 981
}

A
aliguori 已提交
982 983
static void virtio_net_tx_timer(void *opaque)
{
984 985
    VirtIONetQueue *q = opaque;
    VirtIONet *n = q->n;
986 987
    VirtIODevice *vdev = VIRTIO_DEVICE(n);
    assert(vdev->vm_running);
A
aliguori 已提交
988

989
    q->tx_waiting = 0;
A
aliguori 已提交
990 991

    /* Just in case the driver is not ready on more */
992
    if (!(vdev->status & VIRTIO_CONFIG_S_DRIVER_OK)) {
A
aliguori 已提交
993
        return;
994
    }
A
aliguori 已提交
995

996 997
    virtio_queue_set_notification(q->tx_vq, 1);
    virtio_net_flush_tx(q);
A
aliguori 已提交
998 999
}

1000 1001
static void virtio_net_tx_bh(void *opaque)
{
1002 1003
    VirtIONetQueue *q = opaque;
    VirtIONet *n = q->n;
1004
    VirtIODevice *vdev = VIRTIO_DEVICE(n);
1005 1006
    int32_t ret;

1007
    assert(vdev->vm_running);
1008

1009
    q->tx_waiting = 0;
1010 1011

    /* Just in case the driver is not ready on more */
1012
    if (unlikely(!(vdev->status & VIRTIO_CONFIG_S_DRIVER_OK))) {
1013
        return;
1014
    }
1015

1016
    ret = virtio_net_flush_tx(q);
1017 1018 1019 1020 1021 1022 1023
    if (ret == -EBUSY) {
        return; /* Notification re-enable handled by tx_complete */
    }

    /* If we flush a full burst of packets, assume there are
     * more coming and immediately reschedule */
    if (ret >= n->tx_burst) {
1024 1025
        qemu_bh_schedule(q->tx_bh);
        q->tx_waiting = 1;
1026 1027 1028 1029 1030 1031
        return;
    }

    /* If less than a full burst, re-enable notification and flush
     * anything that may have come in while we weren't looking.  If
     * we find something, assume the guest is still active and reschedule */
1032 1033 1034 1035 1036
    virtio_queue_set_notification(q->tx_vq, 1);
    if (virtio_net_flush_tx(q) > 0) {
        virtio_queue_set_notification(q->tx_vq, 0);
        qemu_bh_schedule(q->tx_bh);
        q->tx_waiting = 1;
1037 1038 1039
    }
}

J
Jason Wang 已提交
1040
static void virtio_net_set_multiqueue(VirtIONet *n, int multiqueue)
J
Jason Wang 已提交
1041
{
1042
    VirtIODevice *vdev = VIRTIO_DEVICE(n);
J
Jason Wang 已提交
1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068
    int i, max = multiqueue ? n->max_queues : 1;

    n->multiqueue = multiqueue;

    for (i = 2; i <= n->max_queues * 2 + 1; i++) {
        virtio_del_queue(vdev, i);
    }

    for (i = 1; i < max; i++) {
        n->vqs[i].rx_vq = virtio_add_queue(vdev, 256, virtio_net_handle_rx);
        if (n->vqs[i].tx_timer) {
            n->vqs[i].tx_vq =
                virtio_add_queue(vdev, 256, virtio_net_handle_tx_timer);
            n->vqs[i].tx_timer = qemu_new_timer_ns(vm_clock,
                                                   virtio_net_tx_timer,
                                                   &n->vqs[i]);
        } else {
            n->vqs[i].tx_vq =
                virtio_add_queue(vdev, 256, virtio_net_handle_tx_bh);
            n->vqs[i].tx_bh = qemu_bh_new(virtio_net_tx_bh, &n->vqs[i]);
        }

        n->vqs[i].tx_waiting = 0;
        n->vqs[i].n = n;
    }

J
Jason Wang 已提交
1069 1070 1071 1072 1073
    /* Note: Minux Guests (version 3.2.1) use ctrl vq but don't ack
     * VIRTIO_NET_F_CTRL_VQ. Create ctrl vq unconditionally to avoid
     * breaking them.
     */
    n->ctrl_vq = virtio_add_queue(vdev, 64, virtio_net_handle_ctrl);
J
Jason Wang 已提交
1074 1075 1076 1077

    virtio_net_set_queues(n);
}

A
aliguori 已提交
1078 1079
static void virtio_net_save(QEMUFile *f, void *opaque)
{
1080
    int i;
A
aliguori 已提交
1081
    VirtIONet *n = opaque;
1082
    VirtIODevice *vdev = VIRTIO_DEVICE(n);
A
aliguori 已提交
1083

1084 1085 1086
    /* At this point, backend must be stopped, otherwise
     * it might keep writing to memory. */
    assert(!n->vhost_started);
1087
    virtio_save(vdev, f);
A
aliguori 已提交
1088

1089
    qemu_put_buffer(f, n->mac, ETH_ALEN);
1090
    qemu_put_be32(f, n->vqs[0].tx_waiting);
1091
    qemu_put_be32(f, n->mergeable_rx_bufs);
1092
    qemu_put_be16(f, n->status);
1093 1094
    qemu_put_byte(f, n->promisc);
    qemu_put_byte(f, n->allmulti);
1095 1096
    qemu_put_be32(f, n->mac_table.in_use);
    qemu_put_buffer(f, n->mac_table.macs, n->mac_table.in_use * ETH_ALEN);
1097
    qemu_put_buffer(f, (uint8_t *)n->vlans, MAX_VLAN >> 3);
M
Mark McLoughlin 已提交
1098
    qemu_put_be32(f, n->has_vnet_hdr);
1099 1100
    qemu_put_byte(f, n->mac_table.multi_overflow);
    qemu_put_byte(f, n->mac_table.uni_overflow);
1101 1102 1103 1104
    qemu_put_byte(f, n->alluni);
    qemu_put_byte(f, n->nomulti);
    qemu_put_byte(f, n->nouni);
    qemu_put_byte(f, n->nobcast);
1105
    qemu_put_byte(f, n->has_ufo);
1106 1107 1108 1109 1110 1111 1112
    if (n->max_queues > 1) {
        qemu_put_be16(f, n->max_queues);
        qemu_put_be16(f, n->curr_queues);
        for (i = 1; i < n->curr_queues; i++) {
            qemu_put_be32(f, n->vqs[i].tx_waiting);
        }
    }
A
aliguori 已提交
1113 1114 1115 1116 1117
}

static int virtio_net_load(QEMUFile *f, void *opaque, int version_id)
{
    VirtIONet *n = opaque;
1118
    VirtIODevice *vdev = VIRTIO_DEVICE(n);
1119
    int ret, i, link_down;
A
aliguori 已提交
1120

1121
    if (version_id < 2 || version_id > VIRTIO_NET_VM_VERSION)
A
aliguori 已提交
1122 1123
        return -EINVAL;

1124
    ret = virtio_load(vdev, f);
1125 1126 1127
    if (ret) {
        return ret;
    }
A
aliguori 已提交
1128

1129
    qemu_get_buffer(f, n->mac, ETH_ALEN);
1130
    n->vqs[0].tx_waiting = qemu_get_be32(f);
1131 1132

    virtio_net_set_mrg_rx_bufs(n, qemu_get_be32(f));
A
aliguori 已提交
1133

1134 1135 1136
    if (version_id >= 3)
        n->status = qemu_get_be16(f);

1137
    if (version_id >= 4) {
1138 1139 1140 1141 1142 1143 1144
        if (version_id < 8) {
            n->promisc = qemu_get_be32(f);
            n->allmulti = qemu_get_be32(f);
        } else {
            n->promisc = qemu_get_byte(f);
            n->allmulti = qemu_get_byte(f);
        }
1145 1146
    }

1147 1148 1149 1150 1151 1152 1153
    if (version_id >= 5) {
        n->mac_table.in_use = qemu_get_be32(f);
        /* MAC_TABLE_ENTRIES may be different from the saved image */
        if (n->mac_table.in_use <= MAC_TABLE_ENTRIES) {
            qemu_get_buffer(f, n->mac_table.macs,
                            n->mac_table.in_use * ETH_ALEN);
        } else if (n->mac_table.in_use) {
1154 1155 1156
            uint8_t *buf = g_malloc0(n->mac_table.in_use);
            qemu_get_buffer(f, buf, n->mac_table.in_use * ETH_ALEN);
            g_free(buf);
1157
            n->mac_table.multi_overflow = n->mac_table.uni_overflow = 1;
1158 1159 1160 1161
            n->mac_table.in_use = 0;
        }
    }
 
1162 1163 1164
    if (version_id >= 6)
        qemu_get_buffer(f, (uint8_t *)n->vlans, MAX_VLAN >> 3);

M
Mark McLoughlin 已提交
1165 1166
    if (version_id >= 7) {
        if (qemu_get_be32(f) && !peer_has_vnet_hdr(n)) {
1167
            error_report("virtio-net: saved image requires vnet_hdr=on");
M
Mark McLoughlin 已提交
1168 1169 1170 1171
            return -1;
        }

        if (n->has_vnet_hdr) {
J
Jason Wang 已提交
1172
            tap_set_offload(qemu_get_queue(n->nic)->peer,
1173 1174 1175 1176 1177
                    (vdev->guest_features >> VIRTIO_NET_F_GUEST_CSUM) & 1,
                    (vdev->guest_features >> VIRTIO_NET_F_GUEST_TSO4) & 1,
                    (vdev->guest_features >> VIRTIO_NET_F_GUEST_TSO6) & 1,
                    (vdev->guest_features >> VIRTIO_NET_F_GUEST_ECN)  & 1,
                    (vdev->guest_features >> VIRTIO_NET_F_GUEST_UFO)  & 1);
M
Mark McLoughlin 已提交
1178
        }
1179 1180
    }

1181 1182 1183 1184 1185
    if (version_id >= 9) {
        n->mac_table.multi_overflow = qemu_get_byte(f);
        n->mac_table.uni_overflow = qemu_get_byte(f);
    }

1186 1187 1188 1189 1190 1191 1192
    if (version_id >= 10) {
        n->alluni = qemu_get_byte(f);
        n->nomulti = qemu_get_byte(f);
        n->nouni = qemu_get_byte(f);
        n->nobcast = qemu_get_byte(f);
    }

1193 1194
    if (version_id >= 11) {
        if (qemu_get_byte(f) && !peer_has_ufo(n)) {
1195
            error_report("virtio-net: saved image requires TUN_F_UFO support");
1196 1197 1198 1199
            return -1;
        }
    }

1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213
    if (n->max_queues > 1) {
        if (n->max_queues != qemu_get_be16(f)) {
            error_report("virtio-net: different max_queues ");
            return -1;
        }

        n->curr_queues = qemu_get_be16(f);
        for (i = 1; i < n->curr_queues; i++) {
            n->vqs[i].tx_waiting = qemu_get_be32(f);
        }
    }

    virtio_net_set_queues(n);

1214 1215 1216 1217 1218 1219 1220
    /* Find the first multicast entry in the saved MAC filter */
    for (i = 0; i < n->mac_table.in_use; i++) {
        if (n->mac_table.macs[i * ETH_ALEN] & 1) {
            break;
        }
    }
    n->mac_table.first_multi = i;
1221 1222 1223

    /* nc.link_down can't be migrated, so infer link_down according
     * to link status bit in n->status */
1224 1225 1226 1227
    link_down = (n->status & VIRTIO_NET_S_LINK_UP) == 0;
    for (i = 0; i < n->max_queues; i++) {
        qemu_get_subqueue(n->nic, i)->link_down = link_down;
    }
1228

A
aliguori 已提交
1229 1230 1231
    return 0;
}

1232
static void virtio_net_cleanup(NetClientState *nc)
1233
{
J
Jason Wang 已提交
1234
    VirtIONet *n = qemu_get_nic_opaque(nc);
1235

M
Mark McLoughlin 已提交
1236
    n->nic = NULL;
1237 1238
}

M
Mark McLoughlin 已提交
1239
static NetClientInfo net_virtio_info = {
1240
    .type = NET_CLIENT_OPTIONS_KIND_NIC,
M
Mark McLoughlin 已提交
1241 1242 1243 1244 1245 1246 1247
    .size = sizeof(NICState),
    .can_receive = virtio_net_can_receive,
    .receive = virtio_net_receive,
        .cleanup = virtio_net_cleanup,
    .link_status_changed = virtio_net_set_link_status,
};

1248 1249
static bool virtio_net_guest_notifier_pending(VirtIODevice *vdev, int idx)
{
1250
    VirtIONet *n = VIRTIO_NET(vdev);
J
Jason Wang 已提交
1251
    NetClientState *nc = qemu_get_subqueue(n->nic, vq2q(idx));
1252
    assert(n->vhost_started);
J
Jason Wang 已提交
1253
    return vhost_net_virtqueue_pending(tap_get_vhost_net(nc->peer), idx);
1254 1255 1256 1257 1258
}

static void virtio_net_guest_notifier_mask(VirtIODevice *vdev, int idx,
                                           bool mask)
{
1259
    VirtIONet *n = VIRTIO_NET(vdev);
J
Jason Wang 已提交
1260
    NetClientState *nc = qemu_get_subqueue(n->nic, vq2q(idx));
1261
    assert(n->vhost_started);
J
Jason Wang 已提交
1262
    vhost_net_virtqueue_mask(tap_get_vhost_net(nc->peer),
1263 1264 1265
                             vdev, idx, mask);
}

1266
void virtio_net_set_config_size(VirtIONet *n, uint32_t host_features)
A
aliguori 已提交
1267
{
1268 1269 1270 1271 1272 1273
    int i, config_size = 0;
    for (i = 0; feature_sizes[i].flags != 0; i++) {
        if (host_features & feature_sizes[i].flags) {
            config_size = MAX(feature_sizes[i].end, config_size);
        }
    }
1274 1275 1276
    n->config_size = config_size;
}

1277
static int virtio_net_device_init(VirtIODevice *vdev)
1278
{
1279
    int i;
A
aliguori 已提交
1280

1281 1282 1283 1284 1285
    DeviceState *qdev = DEVICE(vdev);
    VirtIONet *n = VIRTIO_NET(vdev);

    virtio_init(VIRTIO_DEVICE(n), "virtio-net", VIRTIO_ID_NET,
                                  n->config_size);
A
aliguori 已提交
1286

1287
    n->max_queues = MAX(n->nic_conf.queues, 1);
1288
    n->vqs = g_malloc0(sizeof(VirtIONetQueue) * n->max_queues);
1289
    n->vqs[0].rx_vq = virtio_add_queue(vdev, 256, virtio_net_handle_rx);
J
Jason Wang 已提交
1290 1291
    n->curr_queues = 1;
    n->vqs[0].n = n;
1292
    n->tx_timeout = n->net_conf.txtimer;
1293

1294 1295
    if (n->net_conf.tx && strcmp(n->net_conf.tx, "timer")
                       && strcmp(n->net_conf.tx, "bh")) {
1296 1297
        error_report("virtio-net: "
                     "Unknown option tx=%s, valid options: \"timer\" \"bh\"",
1298
                     n->net_conf.tx);
1299
        error_report("Defaulting to \"bh\"");
1300 1301
    }

1302
    if (n->net_conf.tx && !strcmp(n->net_conf.tx, "timer")) {
1303
        n->vqs[0].tx_vq = virtio_add_queue(vdev, 256,
J
Jason Wang 已提交
1304 1305 1306
                                           virtio_net_handle_tx_timer);
        n->vqs[0].tx_timer = qemu_new_timer_ns(vm_clock, virtio_net_tx_timer,
                                               &n->vqs[0]);
1307
    } else {
1308
        n->vqs[0].tx_vq = virtio_add_queue(vdev, 256,
J
Jason Wang 已提交
1309 1310
                                           virtio_net_handle_tx_bh);
        n->vqs[0].tx_bh = qemu_bh_new(virtio_net_tx_bh, &n->vqs[0]);
1311
    }
1312
    n->ctrl_vq = virtio_add_queue(vdev, 64, virtio_net_handle_ctrl);
1313 1314
    qemu_macaddr_default_if_unset(&n->nic_conf.macaddr);
    memcpy(&n->mac[0], &n->nic_conf.macaddr, sizeof(n->mac));
1315
    n->status = VIRTIO_NET_S_LINK_UP;
A
aliguori 已提交
1316

1317 1318
    n->nic = qemu_new_nic(&net_virtio_info, &n->nic_conf,
                          object_get_typename(OBJECT(qdev)), qdev->id, n);
1319 1320
    peer_test_vnet_hdr(n);
    if (peer_has_vnet_hdr(n)) {
J
Jason Wang 已提交
1321 1322 1323
        for (i = 0; i < n->max_queues; i++) {
            tap_using_vnet_hdr(qemu_get_subqueue(n->nic, i)->peer, true);
        }
1324 1325 1326 1327
        n->host_hdr_len = sizeof(struct virtio_net_hdr);
    } else {
        n->host_hdr_len = 0;
    }
M
Mark McLoughlin 已提交
1328

1329
    qemu_format_nic_info_str(qemu_get_queue(n->nic), n->nic_conf.macaddr.a);
1330

J
Jason Wang 已提交
1331
    n->vqs[0].tx_waiting = 0;
1332
    n->tx_burst = n->net_conf.txburst;
1333
    virtio_net_set_mrg_rx_bufs(n, 0);
1334
    n->promisc = 1; /* for compatibility */
A
aliguori 已提交
1335

1336
    n->mac_table.macs = g_malloc0(MAC_TABLE_ENTRIES * ETH_ALEN);
1337

1338
    n->vlans = g_malloc0(MAX_VLAN >> 3);
1339

1340 1341
    n->qdev = qdev;
    register_savevm(qdev, "virtio-net", -1, VIRTIO_NET_VM_VERSION,
A
aliguori 已提交
1342
                    virtio_net_save, virtio_net_load, n);
P
Paul Brook 已提交
1343

1344
    add_boot_device_path(n->nic_conf.bootindex, qdev, "/ethernet-phy@0");
1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377
    return 0;
}

static int virtio_net_device_exit(DeviceState *qdev)
{
    VirtIONet *n = VIRTIO_NET(qdev);
    VirtIODevice *vdev = VIRTIO_DEVICE(qdev);
    int i;

    /* This will stop vhost backend if appropriate. */
    virtio_net_set_status(vdev, 0);

    unregister_savevm(qdev, "virtio-net", n);

    g_free(n->mac_table.macs);
    g_free(n->vlans);

    for (i = 0; i < n->max_queues; i++) {
        VirtIONetQueue *q = &n->vqs[i];
        NetClientState *nc = qemu_get_subqueue(n->nic, i);

        qemu_purge_queued_packets(nc);

        if (q->tx_timer) {
            qemu_del_timer(q->tx_timer);
            qemu_free_timer(q->tx_timer);
        } else {
            qemu_bh_delete(q->tx_bh);
        }
    }

    g_free(n->vqs);
    qemu_del_nic(n->nic);
1378
    virtio_cleanup(vdev);
1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434

    return 0;
}

static void virtio_net_instance_init(Object *obj)
{
    VirtIONet *n = VIRTIO_NET(obj);

    /*
     * The default config_size is sizeof(struct virtio_net_config).
     * Can be overriden with virtio_net_set_config_size.
     */
    n->config_size = sizeof(struct virtio_net_config);
}

static Property virtio_net_properties[] = {
    DEFINE_NIC_PROPERTIES(VirtIONet, nic_conf),
    DEFINE_PROP_UINT32("x-txtimer", VirtIONet, net_conf.txtimer,
                                               TX_TIMER_INTERVAL),
    DEFINE_PROP_INT32("x-txburst", VirtIONet, net_conf.txburst, TX_BURST),
    DEFINE_PROP_STRING("tx", VirtIONet, net_conf.tx),
    DEFINE_PROP_END_OF_LIST(),
};

static void virtio_net_class_init(ObjectClass *klass, void *data)
{
    DeviceClass *dc = DEVICE_CLASS(klass);
    VirtioDeviceClass *vdc = VIRTIO_DEVICE_CLASS(klass);
    dc->exit = virtio_net_device_exit;
    dc->props = virtio_net_properties;
    vdc->init = virtio_net_device_init;
    vdc->get_config = virtio_net_get_config;
    vdc->set_config = virtio_net_set_config;
    vdc->get_features = virtio_net_get_features;
    vdc->set_features = virtio_net_set_features;
    vdc->bad_features = virtio_net_bad_features;
    vdc->reset = virtio_net_reset;
    vdc->set_status = virtio_net_set_status;
    vdc->guest_notifier_mask = virtio_net_guest_notifier_mask;
    vdc->guest_notifier_pending = virtio_net_guest_notifier_pending;
}

static const TypeInfo virtio_net_info = {
    .name = TYPE_VIRTIO_NET,
    .parent = TYPE_VIRTIO_DEVICE,
    .instance_size = sizeof(VirtIONet),
    .instance_init = virtio_net_instance_init,
    .class_init = virtio_net_class_init,
};

static void virtio_register_types(void)
{
    type_register_static(&virtio_net_info);
}

type_init(virtio_register_types)