virtio-net.c 41.7 KB
Newer Older
A
aliguori 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13
/*
 * Virtio Network Device
 *
 * Copyright IBM, Corp. 2007
 *
 * Authors:
 *  Anthony Liguori   <aliguori@us.ibm.com>
 *
 * This work is licensed under the terms of the GNU GPL, version 2.  See
 * the COPYING file in the top-level directory.
 *
 */

14
#include "qemu/iov.h"
P
Paolo Bonzini 已提交
15
#include "hw/virtio/virtio.h"
P
Paolo Bonzini 已提交
16
#include "net/net.h"
17
#include "net/checksum.h"
18
#include "net/tap.h"
19 20
#include "qemu/error-report.h"
#include "qemu/timer.h"
P
Paolo Bonzini 已提交
21 22
#include "hw/virtio/virtio-net.h"
#include "net/vhost_net.h"
23
#include "hw/virtio/virtio-bus.h"
A
aliguori 已提交
24

25
#define VIRTIO_NET_VM_VERSION    11
26

27
#define MAC_TABLE_ENTRIES    64
28
#define MAX_VLAN    (1 << 12)   /* Per 802.1Q definition */
29

30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51
/*
 * Calculate the number of bytes up to and including the given 'field' of
 * 'container'.
 */
#define endof(container, field) \
    (offsetof(container, field) + sizeof(((container *)0)->field))

typedef struct VirtIOFeature {
    uint32_t flags;
    size_t end;
} VirtIOFeature;

static VirtIOFeature feature_sizes[] = {
    {.flags = 1 << VIRTIO_NET_F_MAC,
     .end = endof(struct virtio_net_config, mac)},
    {.flags = 1 << VIRTIO_NET_F_STATUS,
     .end = endof(struct virtio_net_config, status)},
    {.flags = 1 << VIRTIO_NET_F_MQ,
     .end = endof(struct virtio_net_config, max_virtqueue_pairs)},
    {}
};

J
Jason Wang 已提交
52
static VirtIONetQueue *virtio_net_get_subqueue(NetClientState *nc)
53 54 55
{
    VirtIONet *n = qemu_get_nic_opaque(nc);

J
Jason Wang 已提交
56
    return &n->vqs[nc->queue_index];
57
}
J
Jason Wang 已提交
58 59 60 61 62 63

static int vq2q(int queue_index)
{
    return queue_index / 2;
}

A
aliguori 已提交
64 65 66 67
/* TODO
 * - we could suppress RX interrupt if we were so inclined.
 */

68
static void virtio_net_get_config(VirtIODevice *vdev, uint8_t *config)
A
aliguori 已提交
69
{
70
    VirtIONet *n = VIRTIO_NET(vdev);
A
aliguori 已提交
71 72
    struct virtio_net_config netcfg;

73
    stw_p(&netcfg.status, n->status);
J
Jason Wang 已提交
74
    stw_p(&netcfg.max_virtqueue_pairs, n->max_queues);
75
    memcpy(netcfg.mac, n->mac, ETH_ALEN);
76
    memcpy(config, &netcfg, n->config_size);
A
aliguori 已提交
77 78
}

79 80
static void virtio_net_set_config(VirtIODevice *vdev, const uint8_t *config)
{
81
    VirtIONet *n = VIRTIO_NET(vdev);
82
    struct virtio_net_config netcfg = {};
83

84
    memcpy(&netcfg, config, n->config_size);
85

86
    if (!(vdev->guest_features >> VIRTIO_NET_F_CTRL_MAC_ADDR & 1) &&
87
        memcmp(netcfg.mac, n->mac, ETH_ALEN)) {
88
        memcpy(n->mac, netcfg.mac, ETH_ALEN);
J
Jason Wang 已提交
89
        qemu_format_nic_info_str(qemu_get_queue(n->nic), n->mac);
90 91 92
    }
}

93 94
static bool virtio_net_started(VirtIONet *n, uint8_t status)
{
95
    VirtIODevice *vdev = VIRTIO_DEVICE(n);
96
    return (status & VIRTIO_CONFIG_S_DRIVER_OK) &&
97
        (n->status & VIRTIO_NET_S_LINK_UP) && vdev->vm_running;
98 99 100
}

static void virtio_net_vhost_status(VirtIONet *n, uint8_t status)
101
{
102
    VirtIODevice *vdev = VIRTIO_DEVICE(n);
J
Jason Wang 已提交
103
    NetClientState *nc = qemu_get_queue(n->nic);
J
Jason Wang 已提交
104
    int queues = n->multiqueue ? n->max_queues : 1;
J
Jason Wang 已提交
105 106

    if (!nc->peer) {
107 108
        return;
    }
J
Jason Wang 已提交
109
    if (nc->peer->info->type != NET_CLIENT_OPTIONS_KIND_TAP) {
110 111 112
        return;
    }

J
Jason Wang 已提交
113
    if (!tap_get_vhost_net(nc->peer)) {
114 115
        return;
    }
J
Jason Wang 已提交
116

117
    if (!!n->vhost_started == virtio_net_started(n, status) &&
J
Jason Wang 已提交
118
                              !nc->peer->link_down) {
119 120 121
        return;
    }
    if (!n->vhost_started) {
122
        int r;
123
        if (!vhost_net_query(tap_get_vhost_net(nc->peer), vdev)) {
124 125
            return;
        }
126
        n->vhost_started = 1;
127
        r = vhost_net_start(vdev, n->nic->ncs, queues);
128
        if (r < 0) {
129 130
            error_report("unable to start vhost net: %d: "
                         "falling back on userspace virtio", -r);
131
            n->vhost_started = 0;
132 133
        }
    } else {
134
        vhost_net_stop(vdev, n->nic->ncs, queues);
135 136 137 138
        n->vhost_started = 0;
    }
}

139 140
static void virtio_net_set_status(struct VirtIODevice *vdev, uint8_t status)
{
141
    VirtIONet *n = VIRTIO_NET(vdev);
J
Jason Wang 已提交
142 143 144
    VirtIONetQueue *q;
    int i;
    uint8_t queue_status;
145 146 147

    virtio_net_vhost_status(n, status);

J
Jason Wang 已提交
148 149
    for (i = 0; i < n->max_queues; i++) {
        q = &n->vqs[i];
150

J
Jason Wang 已提交
151 152
        if ((!n->multiqueue && i != 0) || i >= n->curr_queues) {
            queue_status = 0;
153
        } else {
J
Jason Wang 已提交
154
            queue_status = status;
155
        }
J
Jason Wang 已提交
156 157 158 159 160 161 162 163 164 165 166 167

        if (!q->tx_waiting) {
            continue;
        }

        if (virtio_net_started(n, queue_status) && !n->vhost_started) {
            if (q->tx_timer) {
                qemu_mod_timer(q->tx_timer,
                               qemu_get_clock_ns(vm_clock) + n->tx_timeout);
            } else {
                qemu_bh_schedule(q->tx_bh);
            }
168
        } else {
J
Jason Wang 已提交
169 170 171 172 173
            if (q->tx_timer) {
                qemu_del_timer(q->tx_timer);
            } else {
                qemu_bh_cancel(q->tx_bh);
            }
174 175 176 177
        }
    }
}

178
static void virtio_net_set_link_status(NetClientState *nc)
179
{
J
Jason Wang 已提交
180
    VirtIONet *n = qemu_get_nic_opaque(nc);
181
    VirtIODevice *vdev = VIRTIO_DEVICE(n);
182 183
    uint16_t old_status = n->status;

M
Mark McLoughlin 已提交
184
    if (nc->link_down)
185 186 187 188 189
        n->status &= ~VIRTIO_NET_S_LINK_UP;
    else
        n->status |= VIRTIO_NET_S_LINK_UP;

    if (n->status != old_status)
190
        virtio_notify_config(vdev);
191

192
    virtio_net_set_status(vdev, vdev->status);
193 194
}

195 196
static void virtio_net_reset(VirtIODevice *vdev)
{
197
    VirtIONet *n = VIRTIO_NET(vdev);
198 199 200 201

    /* Reset back to compatibility mode */
    n->promisc = 1;
    n->allmulti = 0;
202 203 204 205
    n->alluni = 0;
    n->nomulti = 0;
    n->nouni = 0;
    n->nobcast = 0;
J
Jason Wang 已提交
206 207
    /* multiqueue is disabled by default */
    n->curr_queues = 1;
208

209
    /* Flush any MAC and VLAN filter table state */
210
    n->mac_table.in_use = 0;
211
    n->mac_table.first_multi = 0;
212 213
    n->mac_table.multi_overflow = 0;
    n->mac_table.uni_overflow = 0;
214
    memset(n->mac_table.macs, 0, MAC_TABLE_ENTRIES * ETH_ALEN);
215
    memcpy(&n->mac[0], &n->nic->conf->macaddr, sizeof(n->mac));
216
    memset(n->vlans, 0, MAX_VLAN >> 3);
217 218
}

219
static void peer_test_vnet_hdr(VirtIONet *n)
M
Mark McLoughlin 已提交
220
{
J
Jason Wang 已提交
221 222
    NetClientState *nc = qemu_get_queue(n->nic);
    if (!nc->peer) {
223
        return;
J
Jason Wang 已提交
224
    }
M
Mark McLoughlin 已提交
225

J
Jason Wang 已提交
226
    if (nc->peer->info->type != NET_CLIENT_OPTIONS_KIND_TAP) {
227
        return;
J
Jason Wang 已提交
228
    }
M
Mark McLoughlin 已提交
229

J
Jason Wang 已提交
230
    n->has_vnet_hdr = tap_has_vnet_hdr(nc->peer);
231
}
M
Mark McLoughlin 已提交
232

233 234
static int peer_has_vnet_hdr(VirtIONet *n)
{
M
Mark McLoughlin 已提交
235 236 237
    return n->has_vnet_hdr;
}

238 239 240 241 242
static int peer_has_ufo(VirtIONet *n)
{
    if (!peer_has_vnet_hdr(n))
        return 0;

J
Jason Wang 已提交
243
    n->has_ufo = tap_has_ufo(qemu_get_queue(n->nic)->peer);
244 245 246 247

    return n->has_ufo;
}

248 249
static void virtio_net_set_mrg_rx_bufs(VirtIONet *n, int mergeable_rx_bufs)
{
J
Jason Wang 已提交
250 251 252
    int i;
    NetClientState *nc;

253 254 255 256 257
    n->mergeable_rx_bufs = mergeable_rx_bufs;

    n->guest_hdr_len = n->mergeable_rx_bufs ?
        sizeof(struct virtio_net_hdr_mrg_rxbuf) : sizeof(struct virtio_net_hdr);

J
Jason Wang 已提交
258 259 260 261 262 263 264 265
    for (i = 0; i < n->max_queues; i++) {
        nc = qemu_get_subqueue(n->nic, i);

        if (peer_has_vnet_hdr(n) &&
            tap_has_vnet_hdr_len(nc->peer, n->guest_hdr_len)) {
            tap_set_vnet_hdr_len(nc->peer, n->guest_hdr_len);
            n->host_hdr_len = n->guest_hdr_len;
        }
266 267 268
    }
}

J
Jason Wang 已提交
269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313
static int peer_attach(VirtIONet *n, int index)
{
    NetClientState *nc = qemu_get_subqueue(n->nic, index);

    if (!nc->peer) {
        return 0;
    }

    if (nc->peer->info->type != NET_CLIENT_OPTIONS_KIND_TAP) {
        return 0;
    }

    return tap_enable(nc->peer);
}

static int peer_detach(VirtIONet *n, int index)
{
    NetClientState *nc = qemu_get_subqueue(n->nic, index);

    if (!nc->peer) {
        return 0;
    }

    if (nc->peer->info->type !=  NET_CLIENT_OPTIONS_KIND_TAP) {
        return 0;
    }

    return tap_disable(nc->peer);
}

static void virtio_net_set_queues(VirtIONet *n)
{
    int i;

    for (i = 0; i < n->max_queues; i++) {
        if (i < n->curr_queues) {
            assert(!peer_attach(n, i));
        } else {
            assert(!peer_detach(n, i));
        }
    }
}

static void virtio_net_set_multiqueue(VirtIONet *n, int multiqueue, int ctrl);

314
static uint32_t virtio_net_get_features(VirtIODevice *vdev, uint32_t features)
A
aliguori 已提交
315
{
316
    VirtIONet *n = VIRTIO_NET(vdev);
J
Jason Wang 已提交
317
    NetClientState *nc = qemu_get_queue(n->nic);
A
aliguori 已提交
318

319 320
    features |= (1 << VIRTIO_NET_F_MAC);

321
    if (!peer_has_vnet_hdr(n)) {
322 323 324 325 326 327 328 329 330 331
        features &= ~(0x1 << VIRTIO_NET_F_CSUM);
        features &= ~(0x1 << VIRTIO_NET_F_HOST_TSO4);
        features &= ~(0x1 << VIRTIO_NET_F_HOST_TSO6);
        features &= ~(0x1 << VIRTIO_NET_F_HOST_ECN);

        features &= ~(0x1 << VIRTIO_NET_F_GUEST_CSUM);
        features &= ~(0x1 << VIRTIO_NET_F_GUEST_TSO4);
        features &= ~(0x1 << VIRTIO_NET_F_GUEST_TSO6);
        features &= ~(0x1 << VIRTIO_NET_F_GUEST_ECN);
    }
M
Mark McLoughlin 已提交
332

333 334 335
    if (!peer_has_vnet_hdr(n) || !peer_has_ufo(n)) {
        features &= ~(0x1 << VIRTIO_NET_F_GUEST_UFO);
        features &= ~(0x1 << VIRTIO_NET_F_HOST_UFO);
M
Mark McLoughlin 已提交
336 337
    }

J
Jason Wang 已提交
338
    if (!nc->peer || nc->peer->info->type != NET_CLIENT_OPTIONS_KIND_TAP) {
339 340
        return features;
    }
J
Jason Wang 已提交
341
    if (!tap_get_vhost_net(nc->peer)) {
342 343
        return features;
    }
J
Jason Wang 已提交
344
    return vhost_net_get_features(tap_get_vhost_net(nc->peer), features);
A
aliguori 已提交
345 346
}

347 348 349 350 351 352 353
static uint32_t virtio_net_bad_features(VirtIODevice *vdev)
{
    uint32_t features = 0;

    /* Linux kernel 2.6.25.  It understood MAC (as everyone must),
     * but also these: */
    features |= (1 << VIRTIO_NET_F_MAC);
354 355 356 357
    features |= (1 << VIRTIO_NET_F_CSUM);
    features |= (1 << VIRTIO_NET_F_HOST_TSO4);
    features |= (1 << VIRTIO_NET_F_HOST_TSO6);
    features |= (1 << VIRTIO_NET_F_HOST_ECN);
358

359
    return features;
360 361
}

A
aliguori 已提交
362 363
static void virtio_net_set_features(VirtIODevice *vdev, uint32_t features)
{
364
    VirtIONet *n = VIRTIO_NET(vdev);
J
Jason Wang 已提交
365 366 367 368
    int i;

    virtio_net_set_multiqueue(n, !!(features & (1 << VIRTIO_NET_F_MQ)),
                              !!(features & (1 << VIRTIO_NET_F_CTRL_VQ)));
A
aliguori 已提交
369

370
    virtio_net_set_mrg_rx_bufs(n, !!(features & (1 << VIRTIO_NET_F_MRG_RXBUF)));
371 372

    if (n->has_vnet_hdr) {
J
Jason Wang 已提交
373
        tap_set_offload(qemu_get_subqueue(n->nic, 0)->peer,
374 375 376
                        (features >> VIRTIO_NET_F_GUEST_CSUM) & 1,
                        (features >> VIRTIO_NET_F_GUEST_TSO4) & 1,
                        (features >> VIRTIO_NET_F_GUEST_TSO6) & 1,
377 378
                        (features >> VIRTIO_NET_F_GUEST_ECN)  & 1,
                        (features >> VIRTIO_NET_F_GUEST_UFO)  & 1);
379
    }
J
Jason Wang 已提交
380 381 382 383 384 385 386 387 388 389 390

    for (i = 0;  i < n->max_queues; i++) {
        NetClientState *nc = qemu_get_subqueue(n->nic, i);

        if (!nc->peer || nc->peer->info->type != NET_CLIENT_OPTIONS_KIND_TAP) {
            continue;
        }
        if (!tap_get_vhost_net(nc->peer)) {
            continue;
        }
        vhost_net_ack_features(tap_get_vhost_net(nc->peer), features);
D
David L Stevens 已提交
391
    }
A
aliguori 已提交
392 393
}

394
static int virtio_net_handle_rx_mode(VirtIONet *n, uint8_t cmd,
395
                                     struct iovec *iov, unsigned int iov_cnt)
396 397
{
    uint8_t on;
398
    size_t s;
399

400 401 402
    s = iov_to_buf(iov, iov_cnt, 0, &on, sizeof(on));
    if (s != sizeof(on)) {
        return VIRTIO_NET_ERR;
403 404
    }

A
Amos Kong 已提交
405
    if (cmd == VIRTIO_NET_CTRL_RX_PROMISC) {
406
        n->promisc = on;
A
Amos Kong 已提交
407
    } else if (cmd == VIRTIO_NET_CTRL_RX_ALLMULTI) {
408
        n->allmulti = on;
A
Amos Kong 已提交
409
    } else if (cmd == VIRTIO_NET_CTRL_RX_ALLUNI) {
410
        n->alluni = on;
A
Amos Kong 已提交
411
    } else if (cmd == VIRTIO_NET_CTRL_RX_NOMULTI) {
412
        n->nomulti = on;
A
Amos Kong 已提交
413
    } else if (cmd == VIRTIO_NET_CTRL_RX_NOUNI) {
414
        n->nouni = on;
A
Amos Kong 已提交
415
    } else if (cmd == VIRTIO_NET_CTRL_RX_NOBCAST) {
416
        n->nobcast = on;
417
    } else {
418
        return VIRTIO_NET_ERR;
419
    }
420 421 422 423

    return VIRTIO_NET_OK;
}

424
static int virtio_net_handle_mac(VirtIONet *n, uint8_t cmd,
425
                                 struct iovec *iov, unsigned int iov_cnt)
426 427
{
    struct virtio_net_ctrl_mac mac_data;
428
    size_t s;
429

430 431 432 433 434 435
    if (cmd == VIRTIO_NET_CTRL_MAC_ADDR_SET) {
        if (iov_size(iov, iov_cnt) != sizeof(n->mac)) {
            return VIRTIO_NET_ERR;
        }
        s = iov_to_buf(iov, iov_cnt, 0, &n->mac, sizeof(n->mac));
        assert(s == sizeof(n->mac));
J
Jason Wang 已提交
436
        qemu_format_nic_info_str(qemu_get_queue(n->nic), n->mac);
437 438 439
        return VIRTIO_NET_OK;
    }

440
    if (cmd != VIRTIO_NET_CTRL_MAC_TABLE_SET) {
441
        return VIRTIO_NET_ERR;
442
    }
443 444

    n->mac_table.in_use = 0;
445
    n->mac_table.first_multi = 0;
446 447
    n->mac_table.uni_overflow = 0;
    n->mac_table.multi_overflow = 0;
448 449
    memset(n->mac_table.macs, 0, MAC_TABLE_ENTRIES * ETH_ALEN);

450 451 452 453 454 455 456
    s = iov_to_buf(iov, iov_cnt, 0, &mac_data.entries,
                   sizeof(mac_data.entries));
    mac_data.entries = ldl_p(&mac_data.entries);
    if (s != sizeof(mac_data.entries)) {
        return VIRTIO_NET_ERR;
    }
    iov_discard_front(&iov, &iov_cnt, s);
457

458
    if (mac_data.entries * ETH_ALEN > iov_size(iov, iov_cnt)) {
459
        return VIRTIO_NET_ERR;
460
    }
461 462

    if (mac_data.entries <= MAC_TABLE_ENTRIES) {
463 464 465 466 467
        s = iov_to_buf(iov, iov_cnt, 0, n->mac_table.macs,
                       mac_data.entries * ETH_ALEN);
        if (s != mac_data.entries * ETH_ALEN) {
            return VIRTIO_NET_ERR;
        }
468 469
        n->mac_table.in_use += mac_data.entries;
    } else {
470
        n->mac_table.uni_overflow = 1;
471 472
    }

473 474
    iov_discard_front(&iov, &iov_cnt, mac_data.entries * ETH_ALEN);

475 476
    n->mac_table.first_multi = n->mac_table.in_use;

477 478 479 480 481 482 483 484
    s = iov_to_buf(iov, iov_cnt, 0, &mac_data.entries,
                   sizeof(mac_data.entries));
    mac_data.entries = ldl_p(&mac_data.entries);
    if (s != sizeof(mac_data.entries)) {
        return VIRTIO_NET_ERR;
    }

    iov_discard_front(&iov, &iov_cnt, s);
485

486
    if (mac_data.entries * ETH_ALEN != iov_size(iov, iov_cnt)) {
487
        return VIRTIO_NET_ERR;
488
    }
489

490 491 492 493 494
    if (n->mac_table.in_use + mac_data.entries <= MAC_TABLE_ENTRIES) {
        s = iov_to_buf(iov, iov_cnt, 0, n->mac_table.macs,
                       mac_data.entries * ETH_ALEN);
        if (s != mac_data.entries * ETH_ALEN) {
            return VIRTIO_NET_ERR;
495
        }
496 497 498
        n->mac_table.in_use += mac_data.entries;
    } else {
        n->mac_table.multi_overflow = 1;
499 500 501 502 503
    }

    return VIRTIO_NET_OK;
}

504
static int virtio_net_handle_vlan_table(VirtIONet *n, uint8_t cmd,
505
                                        struct iovec *iov, unsigned int iov_cnt)
506 507
{
    uint16_t vid;
508
    size_t s;
509

510 511 512
    s = iov_to_buf(iov, iov_cnt, 0, &vid, sizeof(vid));
    vid = lduw_p(&vid);
    if (s != sizeof(vid)) {
513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528
        return VIRTIO_NET_ERR;
    }

    if (vid >= MAX_VLAN)
        return VIRTIO_NET_ERR;

    if (cmd == VIRTIO_NET_CTRL_VLAN_ADD)
        n->vlans[vid >> 5] |= (1U << (vid & 0x1f));
    else if (cmd == VIRTIO_NET_CTRL_VLAN_DEL)
        n->vlans[vid >> 5] &= ~(1U << (vid & 0x1f));
    else
        return VIRTIO_NET_ERR;

    return VIRTIO_NET_OK;
}

J
Jason Wang 已提交
529
static int virtio_net_handle_mq(VirtIONet *n, uint8_t cmd,
530
                                struct iovec *iov, unsigned int iov_cnt)
J
Jason Wang 已提交
531
{
532
    VirtIODevice *vdev = VIRTIO_DEVICE(n);
533 534 535
    struct virtio_net_ctrl_mq mq;
    size_t s;
    uint16_t queues;
J
Jason Wang 已提交
536

537 538
    s = iov_to_buf(iov, iov_cnt, 0, &mq, sizeof(mq));
    if (s != sizeof(mq)) {
J
Jason Wang 已提交
539 540 541 542 543 544 545
        return VIRTIO_NET_ERR;
    }

    if (cmd != VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET) {
        return VIRTIO_NET_ERR;
    }

546
    queues = lduw_p(&mq.virtqueue_pairs);
J
Jason Wang 已提交
547

548 549 550
    if (queues < VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MIN ||
        queues > VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MAX ||
        queues > n->max_queues ||
J
Jason Wang 已提交
551 552 553 554
        !n->multiqueue) {
        return VIRTIO_NET_ERR;
    }

555
    n->curr_queues = queues;
J
Jason Wang 已提交
556 557
    /* stop the backend before changing the number of queues to avoid handling a
     * disabled queue */
558
    virtio_net_set_status(vdev, vdev->status);
J
Jason Wang 已提交
559 560 561 562
    virtio_net_set_queues(n);

    return VIRTIO_NET_OK;
}
563 564
static void virtio_net_handle_ctrl(VirtIODevice *vdev, VirtQueue *vq)
{
565
    VirtIONet *n = VIRTIO_NET(vdev);
566 567 568
    struct virtio_net_ctrl_hdr ctrl;
    virtio_net_ctrl_ack status = VIRTIO_NET_ERR;
    VirtQueueElement elem;
569 570 571
    size_t s;
    struct iovec *iov;
    unsigned int iov_cnt;
572 573

    while (virtqueue_pop(vq, &elem)) {
574 575
        if (iov_size(elem.in_sg, elem.in_num) < sizeof(status) ||
            iov_size(elem.out_sg, elem.out_num) < sizeof(ctrl)) {
576
            error_report("virtio-net ctrl missing headers");
577 578 579
            exit(1);
        }

580 581 582 583 584 585
        iov = elem.out_sg;
        iov_cnt = elem.out_num;
        s = iov_to_buf(iov, iov_cnt, 0, &ctrl, sizeof(ctrl));
        iov_discard_front(&iov, &iov_cnt, sizeof(ctrl));
        if (s != sizeof(ctrl)) {
            status = VIRTIO_NET_ERR;
A
Amos Kong 已提交
586
        } else if (ctrl.class == VIRTIO_NET_CTRL_RX) {
587 588 589 590 591
            status = virtio_net_handle_rx_mode(n, ctrl.cmd, iov, iov_cnt);
        } else if (ctrl.class == VIRTIO_NET_CTRL_MAC) {
            status = virtio_net_handle_mac(n, ctrl.cmd, iov, iov_cnt);
        } else if (ctrl.class == VIRTIO_NET_CTRL_VLAN) {
            status = virtio_net_handle_vlan_table(n, ctrl.cmd, iov, iov_cnt);
J
Jason Wang 已提交
592
        } else if (ctrl.class == VIRTIO_NET_CTRL_MQ) {
593
            status = virtio_net_handle_mq(n, ctrl.cmd, iov, iov_cnt);
594 595
        }

596 597
        s = iov_from_buf(elem.in_sg, elem.in_num, 0, &status, sizeof(status));
        assert(s == sizeof(status));
598 599 600 601 602 603

        virtqueue_push(vq, &elem, sizeof(status));
        virtio_notify(vdev, vq);
    }
}

A
aliguori 已提交
604 605 606 607
/* RX */

static void virtio_net_handle_rx(VirtIODevice *vdev, VirtQueue *vq)
{
608
    VirtIONet *n = VIRTIO_NET(vdev);
J
Jason Wang 已提交
609
    int queue_index = vq2q(virtio_get_queue_index(vq));
610

J
Jason Wang 已提交
611
    qemu_flush_queued_packets(qemu_get_subqueue(n->nic, queue_index));
A
aliguori 已提交
612 613
}

614
static int virtio_net_can_receive(NetClientState *nc)
A
aliguori 已提交
615
{
J
Jason Wang 已提交
616
    VirtIONet *n = qemu_get_nic_opaque(nc);
617
    VirtIODevice *vdev = VIRTIO_DEVICE(n);
J
Jason Wang 已提交
618
    VirtIONetQueue *q = virtio_net_get_subqueue(nc);
619

620
    if (!vdev->vm_running) {
621 622
        return 0;
    }
623

J
Jason Wang 已提交
624 625 626 627
    if (nc->queue_index >= n->curr_queues) {
        return 0;
    }

628
    if (!virtio_queue_ready(q->rx_vq) ||
629
        !(vdev->status & VIRTIO_CONFIG_S_DRIVER_OK)) {
A
aliguori 已提交
630
        return 0;
631
    }
A
aliguori 已提交
632

633 634 635
    return 1;
}

636
static int virtio_net_has_buffers(VirtIONetQueue *q, int bufsize)
637
{
638 639
    VirtIONet *n = q->n;
    if (virtio_queue_empty(q->rx_vq) ||
A
aliguori 已提交
640
        (n->mergeable_rx_bufs &&
641 642
         !virtqueue_avail_bytes(q->rx_vq, bufsize, 0))) {
        virtio_queue_set_notification(q->rx_vq, 1);
643 644 645 646 647

        /* To avoid a race condition where the guest has made some buffers
         * available after the above check but before notification was
         * enabled, check for available buffers again.
         */
648
        if (virtio_queue_empty(q->rx_vq) ||
649
            (n->mergeable_rx_bufs &&
650
             !virtqueue_avail_bytes(q->rx_vq, bufsize, 0))) {
651
            return 0;
652
        }
A
aliguori 已提交
653 654
    }

655
    virtio_queue_set_notification(q->rx_vq, 0);
A
aliguori 已提交
656 657 658
    return 1;
}

A
Anthony Liguori 已提交
659 660 661 662 663 664 665 666 667 668 669 670 671 672 673
/* dhclient uses AF_PACKET but doesn't pass auxdata to the kernel so
 * it never finds out that the packets don't have valid checksums.  This
 * causes dhclient to get upset.  Fedora's carried a patch for ages to
 * fix this with Xen but it hasn't appeared in an upstream release of
 * dhclient yet.
 *
 * To avoid breaking existing guests, we catch udp packets and add
 * checksums.  This is terrible but it's better than hacking the guest
 * kernels.
 *
 * N.B. if we introduce a zero-copy API, this operation is no longer free so
 * we should provide a mechanism to disable it to avoid polluting the host
 * cache.
 */
static void work_around_broken_dhclient(struct virtio_net_hdr *hdr,
M
Michael S. Tsirkin 已提交
674
                                        uint8_t *buf, size_t size)
A
Anthony Liguori 已提交
675 676 677 678 679 680
{
    if ((hdr->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM) && /* missing csum */
        (size > 27 && size < 1500) && /* normal sized MTU */
        (buf[12] == 0x08 && buf[13] == 0x00) && /* ethertype == IPv4 */
        (buf[23] == 17) && /* ip.protocol == UDP */
        (buf[34] == 0 && buf[35] == 67)) { /* udp.srcport == bootps */
M
Michael S. Tsirkin 已提交
681
        net_checksum_calculate(buf, size);
A
Anthony Liguori 已提交
682 683 684 685
        hdr->flags &= ~VIRTIO_NET_HDR_F_NEEDS_CSUM;
    }
}

686 687
static void receive_header(VirtIONet *n, const struct iovec *iov, int iov_cnt,
                           const void *buf, size_t size)
A
aliguori 已提交
688
{
M
Mark McLoughlin 已提交
689
    if (n->has_vnet_hdr) {
M
Michael S. Tsirkin 已提交
690 691
        /* FIXME this cast is evil */
        void *wbuf = (void *)buf;
692 693 694
        work_around_broken_dhclient(wbuf, wbuf + n->host_hdr_len,
                                    size - n->host_hdr_len);
        iov_from_buf(iov, iov_cnt, 0, buf, sizeof(struct virtio_net_hdr));
M
Michael S. Tsirkin 已提交
695 696 697 698 699 700
    } else {
        struct virtio_net_hdr hdr = {
            .flags = 0,
            .gso_type = VIRTIO_NET_HDR_GSO_NONE
        };
        iov_from_buf(iov, iov_cnt, 0, &hdr, sizeof hdr);
M
Mark McLoughlin 已提交
701
    }
A
aliguori 已提交
702 703
}

704 705 706
static int receive_filter(VirtIONet *n, const uint8_t *buf, int size)
{
    static const uint8_t bcast[] = {0xff, 0xff, 0xff, 0xff, 0xff, 0xff};
707
    static const uint8_t vlan[] = {0x81, 0x00};
708
    uint8_t *ptr = (uint8_t *)buf;
709
    int i;
710 711 712 713

    if (n->promisc)
        return 1;

714
    ptr += n->host_hdr_len;
M
Mark McLoughlin 已提交
715

716 717 718 719 720 721
    if (!memcmp(&ptr[12], vlan, sizeof(vlan))) {
        int vid = be16_to_cpup((uint16_t *)(ptr + 14)) & 0xfff;
        if (!(n->vlans[vid >> 5] & (1U << (vid & 0x1f))))
            return 0;
    }

722 723
    if (ptr[0] & 1) { // multicast
        if (!memcmp(ptr, bcast, sizeof(bcast))) {
724 725 726
            return !n->nobcast;
        } else if (n->nomulti) {
            return 0;
727
        } else if (n->allmulti || n->mac_table.multi_overflow) {
728 729
            return 1;
        }
730 731 732 733 734 735

        for (i = n->mac_table.first_multi; i < n->mac_table.in_use; i++) {
            if (!memcmp(ptr, &n->mac_table.macs[i * ETH_ALEN], ETH_ALEN)) {
                return 1;
            }
        }
736
    } else { // unicast
737 738 739
        if (n->nouni) {
            return 0;
        } else if (n->alluni || n->mac_table.uni_overflow) {
740 741
            return 1;
        } else if (!memcmp(ptr, n->mac, ETH_ALEN)) {
742 743
            return 1;
        }
744

745 746 747 748 749
        for (i = 0; i < n->mac_table.first_multi; i++) {
            if (!memcmp(ptr, &n->mac_table.macs[i * ETH_ALEN], ETH_ALEN)) {
                return 1;
            }
        }
750 751
    }

752 753 754
    return 0;
}

755
static ssize_t virtio_net_receive(NetClientState *nc, const uint8_t *buf, size_t size)
A
aliguori 已提交
756
{
J
Jason Wang 已提交
757
    VirtIONet *n = qemu_get_nic_opaque(nc);
J
Jason Wang 已提交
758
    VirtIONetQueue *q = virtio_net_get_subqueue(nc);
759
    VirtIODevice *vdev = VIRTIO_DEVICE(n);
760 761 762
    struct iovec mhdr_sg[VIRTQUEUE_MAX_SIZE];
    struct virtio_net_hdr_mrg_rxbuf mhdr;
    unsigned mhdr_cnt = 0;
M
Michael S. Tsirkin 已提交
763
    size_t offset, i, guest_offset;
A
aliguori 已提交
764

J
Jason Wang 已提交
765
    if (!virtio_net_can_receive(nc)) {
766
        return -1;
J
Jason Wang 已提交
767
    }
768

769
    /* hdr_len refers to the header we supply to the guest */
770
    if (!virtio_net_has_buffers(q, size + n->guest_hdr_len - n->host_hdr_len)) {
771
        return 0;
772
    }
A
aliguori 已提交
773

774
    if (!receive_filter(n, buf, size))
775
        return size;
776

A
aliguori 已提交
777 778 779 780 781
    offset = i = 0;

    while (offset < size) {
        VirtQueueElement elem;
        int len, total;
M
Michael S. Tsirkin 已提交
782
        const struct iovec *sg = elem.in_sg;
A
aliguori 已提交
783

A
Amit Shah 已提交
784
        total = 0;
A
aliguori 已提交
785

786
        if (virtqueue_pop(q->rx_vq, &elem) == 0) {
A
aliguori 已提交
787
            if (i == 0)
788
                return -1;
789
            error_report("virtio-net unexpected empty queue: "
790
                    "i %zd mergeable %d offset %zd, size %zd, "
791
                    "guest hdr len %zd, host hdr len %zd guest features 0x%x",
792
                    i, n->mergeable_rx_bufs, offset, size,
793
                    n->guest_hdr_len, n->host_hdr_len, vdev->guest_features);
A
aliguori 已提交
794 795 796 797
            exit(1);
        }

        if (elem.in_num < 1) {
798
            error_report("virtio-net receive queue contains no in buffers");
A
aliguori 已提交
799 800 801 802
            exit(1);
        }

        if (i == 0) {
803
            assert(offset == 0);
804 805 806 807 808 809
            if (n->mergeable_rx_bufs) {
                mhdr_cnt = iov_copy(mhdr_sg, ARRAY_SIZE(mhdr_sg),
                                    sg, elem.in_num,
                                    offsetof(typeof(mhdr), num_buffers),
                                    sizeof(mhdr.num_buffers));
            }
A
aliguori 已提交
810

811 812
            receive_header(n, sg, elem.in_num, buf, size);
            offset = n->host_hdr_len;
813
            total += n->guest_hdr_len;
M
Michael S. Tsirkin 已提交
814 815 816
            guest_offset = n->guest_hdr_len;
        } else {
            guest_offset = 0;
A
aliguori 已提交
817 818 819
        }

        /* copy in packet.  ugh */
M
Michael S. Tsirkin 已提交
820
        len = iov_from_buf(sg, elem.in_num, guest_offset,
821
                           buf + offset, size - offset);
A
aliguori 已提交
822
        total += len;
823 824 825 826 827 828
        offset += len;
        /* If buffers can't be merged, at this point we
         * must have consumed the complete packet.
         * Otherwise, drop it. */
        if (!n->mergeable_rx_bufs && offset < size) {
#if 0
829 830 831 832
            error_report("virtio-net truncated non-mergeable packet: "
                         "i %zd mergeable %d offset %zd, size %zd, "
                         "guest hdr len %zd, host hdr len %zd",
                         i, n->mergeable_rx_bufs,
833
                         offset, size, n->guest_hdr_len, n->host_hdr_len);
834 835 836
#endif
            return size;
        }
A
aliguori 已提交
837 838

        /* signal other side */
839
        virtqueue_fill(q->rx_vq, &elem, total, i++);
A
aliguori 已提交
840 841
    }

842 843 844 845 846
    if (mhdr_cnt) {
        stw_p(&mhdr.num_buffers, i);
        iov_from_buf(mhdr_sg, mhdr_cnt,
                     0,
                     &mhdr.num_buffers, sizeof mhdr.num_buffers);
847
    }
A
aliguori 已提交
848

849
    virtqueue_flush(q->rx_vq, i);
850
    virtio_notify(vdev, q->rx_vq);
851 852

    return size;
A
aliguori 已提交
853 854
}

855
static int32_t virtio_net_flush_tx(VirtIONetQueue *q);
856

857
static void virtio_net_tx_complete(NetClientState *nc, ssize_t len)
858
{
J
Jason Wang 已提交
859
    VirtIONet *n = qemu_get_nic_opaque(nc);
J
Jason Wang 已提交
860
    VirtIONetQueue *q = virtio_net_get_subqueue(nc);
861
    VirtIODevice *vdev = VIRTIO_DEVICE(n);
862

863
    virtqueue_push(q->tx_vq, &q->async_tx.elem, 0);
864
    virtio_notify(vdev, q->tx_vq);
865

866
    q->async_tx.elem.out_num = q->async_tx.len = 0;
867

868 869
    virtio_queue_set_notification(q->tx_vq, 1);
    virtio_net_flush_tx(q);
870 871
}

A
aliguori 已提交
872
/* TX */
873
static int32_t virtio_net_flush_tx(VirtIONetQueue *q)
A
aliguori 已提交
874
{
875
    VirtIONet *n = q->n;
876
    VirtIODevice *vdev = VIRTIO_DEVICE(n);
A
aliguori 已提交
877
    VirtQueueElement elem;
878
    int32_t num_packets = 0;
J
Jason Wang 已提交
879
    int queue_index = vq2q(virtio_get_queue_index(q->tx_vq));
880
    if (!(vdev->status & VIRTIO_CONFIG_S_DRIVER_OK)) {
881 882
        return num_packets;
    }
A
aliguori 已提交
883

884
    assert(vdev->vm_running);
885

886 887
    if (q->async_tx.elem.out_num) {
        virtio_queue_set_notification(q->tx_vq, 0);
888
        return num_packets;
889 890
    }

891
    while (virtqueue_pop(q->tx_vq, &elem)) {
892
        ssize_t ret, len;
A
aliguori 已提交
893 894
        unsigned int out_num = elem.out_num;
        struct iovec *out_sg = &elem.out_sg[0];
895
        struct iovec sg[VIRTQUEUE_MAX_SIZE];
A
aliguori 已提交
896

M
Michael S. Tsirkin 已提交
897
        if (out_num < 1) {
898
            error_report("virtio-net header not in first element");
A
aliguori 已提交
899 900 901
            exit(1);
        }

902 903 904 905 906 907 908 909 910 911 912 913 914 915 916
        /*
         * If host wants to see the guest header as is, we can
         * pass it on unchanged. Otherwise, copy just the parts
         * that host is interested in.
         */
        assert(n->host_hdr_len <= n->guest_hdr_len);
        if (n->host_hdr_len != n->guest_hdr_len) {
            unsigned sg_num = iov_copy(sg, ARRAY_SIZE(sg),
                                       out_sg, out_num,
                                       0, n->host_hdr_len);
            sg_num += iov_copy(sg + sg_num, ARRAY_SIZE(sg) - sg_num,
                             out_sg, out_num,
                             n->guest_hdr_len, -1);
            out_num = sg_num;
            out_sg = sg;
A
aliguori 已提交
917 918
        }

M
Michael S. Tsirkin 已提交
919
        len = n->guest_hdr_len;
920

J
Jason Wang 已提交
921 922
        ret = qemu_sendv_packet_async(qemu_get_subqueue(n->nic, queue_index),
                                      out_sg, out_num, virtio_net_tx_complete);
923
        if (ret == 0) {
924 925 926
            virtio_queue_set_notification(q->tx_vq, 0);
            q->async_tx.elem = elem;
            q->async_tx.len  = len;
927
            return -EBUSY;
928 929 930
        }

        len += ret;
A
aliguori 已提交
931

932
        virtqueue_push(q->tx_vq, &elem, 0);
933
        virtio_notify(vdev, q->tx_vq);
934 935 936 937

        if (++num_packets >= n->tx_burst) {
            break;
        }
A
aliguori 已提交
938
    }
939
    return num_packets;
A
aliguori 已提交
940 941
}

942
static void virtio_net_handle_tx_timer(VirtIODevice *vdev, VirtQueue *vq)
A
aliguori 已提交
943
{
944
    VirtIONet *n = VIRTIO_NET(vdev);
J
Jason Wang 已提交
945
    VirtIONetQueue *q = &n->vqs[vq2q(virtio_get_queue_index(vq))];
A
aliguori 已提交
946

947
    /* This happens when device was stopped but VCPU wasn't. */
948
    if (!vdev->vm_running) {
949
        q->tx_waiting = 1;
950 951 952
        return;
    }

953
    if (q->tx_waiting) {
A
aliguori 已提交
954
        virtio_queue_set_notification(vq, 1);
955 956 957
        qemu_del_timer(q->tx_timer);
        q->tx_waiting = 0;
        virtio_net_flush_tx(q);
A
aliguori 已提交
958
    } else {
959
        qemu_mod_timer(q->tx_timer,
960
                       qemu_get_clock_ns(vm_clock) + n->tx_timeout);
961
        q->tx_waiting = 1;
A
aliguori 已提交
962 963 964 965
        virtio_queue_set_notification(vq, 0);
    }
}

966 967
static void virtio_net_handle_tx_bh(VirtIODevice *vdev, VirtQueue *vq)
{
968
    VirtIONet *n = VIRTIO_NET(vdev);
J
Jason Wang 已提交
969
    VirtIONetQueue *q = &n->vqs[vq2q(virtio_get_queue_index(vq))];
970

971
    if (unlikely(q->tx_waiting)) {
972 973
        return;
    }
974
    q->tx_waiting = 1;
975
    /* This happens when device was stopped but VCPU wasn't. */
976
    if (!vdev->vm_running) {
977 978
        return;
    }
979
    virtio_queue_set_notification(vq, 0);
980
    qemu_bh_schedule(q->tx_bh);
981 982
}

A
aliguori 已提交
983 984
static void virtio_net_tx_timer(void *opaque)
{
985 986
    VirtIONetQueue *q = opaque;
    VirtIONet *n = q->n;
987 988
    VirtIODevice *vdev = VIRTIO_DEVICE(n);
    assert(vdev->vm_running);
A
aliguori 已提交
989

990
    q->tx_waiting = 0;
A
aliguori 已提交
991 992

    /* Just in case the driver is not ready on more */
993
    if (!(vdev->status & VIRTIO_CONFIG_S_DRIVER_OK)) {
A
aliguori 已提交
994
        return;
995
    }
A
aliguori 已提交
996

997 998
    virtio_queue_set_notification(q->tx_vq, 1);
    virtio_net_flush_tx(q);
A
aliguori 已提交
999 1000
}

1001 1002
static void virtio_net_tx_bh(void *opaque)
{
1003 1004
    VirtIONetQueue *q = opaque;
    VirtIONet *n = q->n;
1005
    VirtIODevice *vdev = VIRTIO_DEVICE(n);
1006 1007
    int32_t ret;

1008
    assert(vdev->vm_running);
1009

1010
    q->tx_waiting = 0;
1011 1012

    /* Just in case the driver is not ready on more */
1013
    if (unlikely(!(vdev->status & VIRTIO_CONFIG_S_DRIVER_OK))) {
1014
        return;
1015
    }
1016

1017
    ret = virtio_net_flush_tx(q);
1018 1019 1020 1021 1022 1023 1024
    if (ret == -EBUSY) {
        return; /* Notification re-enable handled by tx_complete */
    }

    /* If we flush a full burst of packets, assume there are
     * more coming and immediately reschedule */
    if (ret >= n->tx_burst) {
1025 1026
        qemu_bh_schedule(q->tx_bh);
        q->tx_waiting = 1;
1027 1028 1029 1030 1031 1032
        return;
    }

    /* If less than a full burst, re-enable notification and flush
     * anything that may have come in while we weren't looking.  If
     * we find something, assume the guest is still active and reschedule */
1033 1034 1035 1036 1037
    virtio_queue_set_notification(q->tx_vq, 1);
    if (virtio_net_flush_tx(q) > 0) {
        virtio_queue_set_notification(q->tx_vq, 0);
        qemu_bh_schedule(q->tx_bh);
        q->tx_waiting = 1;
1038 1039 1040
    }
}

J
Jason Wang 已提交
1041 1042
static void virtio_net_set_multiqueue(VirtIONet *n, int multiqueue, int ctrl)
{
1043
    VirtIODevice *vdev = VIRTIO_DEVICE(n);
J
Jason Wang 已提交
1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076
    int i, max = multiqueue ? n->max_queues : 1;

    n->multiqueue = multiqueue;

    for (i = 2; i <= n->max_queues * 2 + 1; i++) {
        virtio_del_queue(vdev, i);
    }

    for (i = 1; i < max; i++) {
        n->vqs[i].rx_vq = virtio_add_queue(vdev, 256, virtio_net_handle_rx);
        if (n->vqs[i].tx_timer) {
            n->vqs[i].tx_vq =
                virtio_add_queue(vdev, 256, virtio_net_handle_tx_timer);
            n->vqs[i].tx_timer = qemu_new_timer_ns(vm_clock,
                                                   virtio_net_tx_timer,
                                                   &n->vqs[i]);
        } else {
            n->vqs[i].tx_vq =
                virtio_add_queue(vdev, 256, virtio_net_handle_tx_bh);
            n->vqs[i].tx_bh = qemu_bh_new(virtio_net_tx_bh, &n->vqs[i]);
        }

        n->vqs[i].tx_waiting = 0;
        n->vqs[i].n = n;
    }

    if (ctrl) {
        n->ctrl_vq = virtio_add_queue(vdev, 64, virtio_net_handle_ctrl);
    }

    virtio_net_set_queues(n);
}

A
aliguori 已提交
1077 1078
static void virtio_net_save(QEMUFile *f, void *opaque)
{
1079
    int i;
A
aliguori 已提交
1080
    VirtIONet *n = opaque;
1081
    VirtIODevice *vdev = VIRTIO_DEVICE(n);
A
aliguori 已提交
1082

1083 1084 1085
    /* At this point, backend must be stopped, otherwise
     * it might keep writing to memory. */
    assert(!n->vhost_started);
1086
    virtio_save(vdev, f);
A
aliguori 已提交
1087

1088
    qemu_put_buffer(f, n->mac, ETH_ALEN);
1089
    qemu_put_be32(f, n->vqs[0].tx_waiting);
1090
    qemu_put_be32(f, n->mergeable_rx_bufs);
1091
    qemu_put_be16(f, n->status);
1092 1093
    qemu_put_byte(f, n->promisc);
    qemu_put_byte(f, n->allmulti);
1094 1095
    qemu_put_be32(f, n->mac_table.in_use);
    qemu_put_buffer(f, n->mac_table.macs, n->mac_table.in_use * ETH_ALEN);
1096
    qemu_put_buffer(f, (uint8_t *)n->vlans, MAX_VLAN >> 3);
M
Mark McLoughlin 已提交
1097
    qemu_put_be32(f, n->has_vnet_hdr);
1098 1099
    qemu_put_byte(f, n->mac_table.multi_overflow);
    qemu_put_byte(f, n->mac_table.uni_overflow);
1100 1101 1102 1103
    qemu_put_byte(f, n->alluni);
    qemu_put_byte(f, n->nomulti);
    qemu_put_byte(f, n->nouni);
    qemu_put_byte(f, n->nobcast);
1104
    qemu_put_byte(f, n->has_ufo);
1105 1106 1107 1108 1109 1110 1111
    if (n->max_queues > 1) {
        qemu_put_be16(f, n->max_queues);
        qemu_put_be16(f, n->curr_queues);
        for (i = 1; i < n->curr_queues; i++) {
            qemu_put_be32(f, n->vqs[i].tx_waiting);
        }
    }
A
aliguori 已提交
1112 1113 1114 1115 1116
}

static int virtio_net_load(QEMUFile *f, void *opaque, int version_id)
{
    VirtIONet *n = opaque;
1117
    VirtIODevice *vdev = VIRTIO_DEVICE(n);
1118
    int ret, i, link_down;
A
aliguori 已提交
1119

1120
    if (version_id < 2 || version_id > VIRTIO_NET_VM_VERSION)
A
aliguori 已提交
1121 1122
        return -EINVAL;

1123
    ret = virtio_load(vdev, f);
1124 1125 1126
    if (ret) {
        return ret;
    }
A
aliguori 已提交
1127

1128
    qemu_get_buffer(f, n->mac, ETH_ALEN);
1129
    n->vqs[0].tx_waiting = qemu_get_be32(f);
1130 1131

    virtio_net_set_mrg_rx_bufs(n, qemu_get_be32(f));
A
aliguori 已提交
1132

1133 1134 1135
    if (version_id >= 3)
        n->status = qemu_get_be16(f);

1136
    if (version_id >= 4) {
1137 1138 1139 1140 1141 1142 1143
        if (version_id < 8) {
            n->promisc = qemu_get_be32(f);
            n->allmulti = qemu_get_be32(f);
        } else {
            n->promisc = qemu_get_byte(f);
            n->allmulti = qemu_get_byte(f);
        }
1144 1145
    }

1146 1147 1148 1149 1150 1151 1152
    if (version_id >= 5) {
        n->mac_table.in_use = qemu_get_be32(f);
        /* MAC_TABLE_ENTRIES may be different from the saved image */
        if (n->mac_table.in_use <= MAC_TABLE_ENTRIES) {
            qemu_get_buffer(f, n->mac_table.macs,
                            n->mac_table.in_use * ETH_ALEN);
        } else if (n->mac_table.in_use) {
1153 1154 1155
            uint8_t *buf = g_malloc0(n->mac_table.in_use);
            qemu_get_buffer(f, buf, n->mac_table.in_use * ETH_ALEN);
            g_free(buf);
1156
            n->mac_table.multi_overflow = n->mac_table.uni_overflow = 1;
1157 1158 1159 1160
            n->mac_table.in_use = 0;
        }
    }
 
1161 1162 1163
    if (version_id >= 6)
        qemu_get_buffer(f, (uint8_t *)n->vlans, MAX_VLAN >> 3);

M
Mark McLoughlin 已提交
1164 1165
    if (version_id >= 7) {
        if (qemu_get_be32(f) && !peer_has_vnet_hdr(n)) {
1166
            error_report("virtio-net: saved image requires vnet_hdr=on");
M
Mark McLoughlin 已提交
1167 1168 1169 1170
            return -1;
        }

        if (n->has_vnet_hdr) {
J
Jason Wang 已提交
1171
            tap_set_offload(qemu_get_queue(n->nic)->peer,
1172 1173 1174 1175 1176
                    (vdev->guest_features >> VIRTIO_NET_F_GUEST_CSUM) & 1,
                    (vdev->guest_features >> VIRTIO_NET_F_GUEST_TSO4) & 1,
                    (vdev->guest_features >> VIRTIO_NET_F_GUEST_TSO6) & 1,
                    (vdev->guest_features >> VIRTIO_NET_F_GUEST_ECN)  & 1,
                    (vdev->guest_features >> VIRTIO_NET_F_GUEST_UFO)  & 1);
M
Mark McLoughlin 已提交
1177
        }
1178 1179
    }

1180 1181 1182 1183 1184
    if (version_id >= 9) {
        n->mac_table.multi_overflow = qemu_get_byte(f);
        n->mac_table.uni_overflow = qemu_get_byte(f);
    }

1185 1186 1187 1188 1189 1190 1191
    if (version_id >= 10) {
        n->alluni = qemu_get_byte(f);
        n->nomulti = qemu_get_byte(f);
        n->nouni = qemu_get_byte(f);
        n->nobcast = qemu_get_byte(f);
    }

1192 1193
    if (version_id >= 11) {
        if (qemu_get_byte(f) && !peer_has_ufo(n)) {
1194
            error_report("virtio-net: saved image requires TUN_F_UFO support");
1195 1196 1197 1198
            return -1;
        }
    }

1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212
    if (n->max_queues > 1) {
        if (n->max_queues != qemu_get_be16(f)) {
            error_report("virtio-net: different max_queues ");
            return -1;
        }

        n->curr_queues = qemu_get_be16(f);
        for (i = 1; i < n->curr_queues; i++) {
            n->vqs[i].tx_waiting = qemu_get_be32(f);
        }
    }

    virtio_net_set_queues(n);

1213 1214 1215 1216 1217 1218 1219
    /* Find the first multicast entry in the saved MAC filter */
    for (i = 0; i < n->mac_table.in_use; i++) {
        if (n->mac_table.macs[i * ETH_ALEN] & 1) {
            break;
        }
    }
    n->mac_table.first_multi = i;
1220 1221 1222

    /* nc.link_down can't be migrated, so infer link_down according
     * to link status bit in n->status */
1223 1224 1225 1226
    link_down = (n->status & VIRTIO_NET_S_LINK_UP) == 0;
    for (i = 0; i < n->max_queues; i++) {
        qemu_get_subqueue(n->nic, i)->link_down = link_down;
    }
1227

A
aliguori 已提交
1228 1229 1230
    return 0;
}

1231
static void virtio_net_cleanup(NetClientState *nc)
1232
{
J
Jason Wang 已提交
1233
    VirtIONet *n = qemu_get_nic_opaque(nc);
1234

M
Mark McLoughlin 已提交
1235
    n->nic = NULL;
1236 1237
}

M
Mark McLoughlin 已提交
1238
static NetClientInfo net_virtio_info = {
1239
    .type = NET_CLIENT_OPTIONS_KIND_NIC,
M
Mark McLoughlin 已提交
1240 1241 1242 1243 1244 1245 1246
    .size = sizeof(NICState),
    .can_receive = virtio_net_can_receive,
    .receive = virtio_net_receive,
        .cleanup = virtio_net_cleanup,
    .link_status_changed = virtio_net_set_link_status,
};

1247 1248
static bool virtio_net_guest_notifier_pending(VirtIODevice *vdev, int idx)
{
1249
    VirtIONet *n = VIRTIO_NET(vdev);
J
Jason Wang 已提交
1250
    NetClientState *nc = qemu_get_subqueue(n->nic, vq2q(idx));
1251
    assert(n->vhost_started);
J
Jason Wang 已提交
1252
    return vhost_net_virtqueue_pending(tap_get_vhost_net(nc->peer), idx);
1253 1254 1255 1256 1257
}

static void virtio_net_guest_notifier_mask(VirtIODevice *vdev, int idx,
                                           bool mask)
{
1258
    VirtIONet *n = VIRTIO_NET(vdev);
J
Jason Wang 已提交
1259
    NetClientState *nc = qemu_get_subqueue(n->nic, vq2q(idx));
1260
    assert(n->vhost_started);
J
Jason Wang 已提交
1261
    vhost_net_virtqueue_mask(tap_get_vhost_net(nc->peer),
1262 1263 1264
                             vdev, idx, mask);
}

1265
void virtio_net_set_config_size(VirtIONet *n, uint32_t host_features)
A
aliguori 已提交
1266
{
1267 1268 1269 1270 1271 1272
    int i, config_size = 0;
    for (i = 0; feature_sizes[i].flags != 0; i++) {
        if (host_features & feature_sizes[i].flags) {
            config_size = MAX(feature_sizes[i].end, config_size);
        }
    }
1273 1274 1275
    n->config_size = config_size;
}

1276
static int virtio_net_device_init(VirtIODevice *vdev)
1277
{
1278
    int i;
A
aliguori 已提交
1279

1280 1281 1282 1283 1284
    DeviceState *qdev = DEVICE(vdev);
    VirtIONet *n = VIRTIO_NET(vdev);

    virtio_init(VIRTIO_DEVICE(n), "virtio-net", VIRTIO_ID_NET,
                                  n->config_size);
A
aliguori 已提交
1285

1286
    n->max_queues = MAX(n->nic_conf.queues, 1);
1287
    n->vqs = g_malloc0(sizeof(VirtIONetQueue) * n->max_queues);
1288
    n->vqs[0].rx_vq = virtio_add_queue(vdev, 256, virtio_net_handle_rx);
J
Jason Wang 已提交
1289 1290
    n->curr_queues = 1;
    n->vqs[0].n = n;
1291
    n->tx_timeout = n->net_conf.txtimer;
1292

1293 1294
    if (n->net_conf.tx && strcmp(n->net_conf.tx, "timer")
                       && strcmp(n->net_conf.tx, "bh")) {
1295 1296
        error_report("virtio-net: "
                     "Unknown option tx=%s, valid options: \"timer\" \"bh\"",
1297
                     n->net_conf.tx);
1298
        error_report("Defaulting to \"bh\"");
1299 1300
    }

1301
    if (n->net_conf.tx && !strcmp(n->net_conf.tx, "timer")) {
1302
        n->vqs[0].tx_vq = virtio_add_queue(vdev, 256,
J
Jason Wang 已提交
1303 1304 1305
                                           virtio_net_handle_tx_timer);
        n->vqs[0].tx_timer = qemu_new_timer_ns(vm_clock, virtio_net_tx_timer,
                                               &n->vqs[0]);
1306
    } else {
1307
        n->vqs[0].tx_vq = virtio_add_queue(vdev, 256,
J
Jason Wang 已提交
1308 1309
                                           virtio_net_handle_tx_bh);
        n->vqs[0].tx_bh = qemu_bh_new(virtio_net_tx_bh, &n->vqs[0]);
1310
    }
1311
    n->ctrl_vq = virtio_add_queue(vdev, 64, virtio_net_handle_ctrl);
1312 1313
    qemu_macaddr_default_if_unset(&n->nic_conf.macaddr);
    memcpy(&n->mac[0], &n->nic_conf.macaddr, sizeof(n->mac));
1314
    n->status = VIRTIO_NET_S_LINK_UP;
A
aliguori 已提交
1315

1316 1317
    n->nic = qemu_new_nic(&net_virtio_info, &n->nic_conf,
                          object_get_typename(OBJECT(qdev)), qdev->id, n);
1318 1319
    peer_test_vnet_hdr(n);
    if (peer_has_vnet_hdr(n)) {
J
Jason Wang 已提交
1320 1321 1322
        for (i = 0; i < n->max_queues; i++) {
            tap_using_vnet_hdr(qemu_get_subqueue(n->nic, i)->peer, true);
        }
1323 1324 1325 1326
        n->host_hdr_len = sizeof(struct virtio_net_hdr);
    } else {
        n->host_hdr_len = 0;
    }
M
Mark McLoughlin 已提交
1327

1328
    qemu_format_nic_info_str(qemu_get_queue(n->nic), n->nic_conf.macaddr.a);
1329

J
Jason Wang 已提交
1330
    n->vqs[0].tx_waiting = 0;
1331
    n->tx_burst = n->net_conf.txburst;
1332
    virtio_net_set_mrg_rx_bufs(n, 0);
1333
    n->promisc = 1; /* for compatibility */
A
aliguori 已提交
1334

1335
    n->mac_table.macs = g_malloc0(MAC_TABLE_ENTRIES * ETH_ALEN);
1336

1337
    n->vlans = g_malloc0(MAX_VLAN >> 3);
1338

1339 1340
    n->qdev = qdev;
    register_savevm(qdev, "virtio-net", -1, VIRTIO_NET_VM_VERSION,
A
aliguori 已提交
1341
                    virtio_net_save, virtio_net_load, n);
P
Paul Brook 已提交
1342

1343
    add_boot_device_path(n->nic_conf.bootindex, qdev, "/ethernet-phy@0");
1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376
    return 0;
}

static int virtio_net_device_exit(DeviceState *qdev)
{
    VirtIONet *n = VIRTIO_NET(qdev);
    VirtIODevice *vdev = VIRTIO_DEVICE(qdev);
    int i;

    /* This will stop vhost backend if appropriate. */
    virtio_net_set_status(vdev, 0);

    unregister_savevm(qdev, "virtio-net", n);

    g_free(n->mac_table.macs);
    g_free(n->vlans);

    for (i = 0; i < n->max_queues; i++) {
        VirtIONetQueue *q = &n->vqs[i];
        NetClientState *nc = qemu_get_subqueue(n->nic, i);

        qemu_purge_queued_packets(nc);

        if (q->tx_timer) {
            qemu_del_timer(q->tx_timer);
            qemu_free_timer(q->tx_timer);
        } else {
            qemu_bh_delete(q->tx_bh);
        }
    }

    g_free(n->vqs);
    qemu_del_nic(n->nic);
1377
    virtio_cleanup(vdev);
1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433

    return 0;
}

static void virtio_net_instance_init(Object *obj)
{
    VirtIONet *n = VIRTIO_NET(obj);

    /*
     * The default config_size is sizeof(struct virtio_net_config).
     * Can be overriden with virtio_net_set_config_size.
     */
    n->config_size = sizeof(struct virtio_net_config);
}

static Property virtio_net_properties[] = {
    DEFINE_NIC_PROPERTIES(VirtIONet, nic_conf),
    DEFINE_PROP_UINT32("x-txtimer", VirtIONet, net_conf.txtimer,
                                               TX_TIMER_INTERVAL),
    DEFINE_PROP_INT32("x-txburst", VirtIONet, net_conf.txburst, TX_BURST),
    DEFINE_PROP_STRING("tx", VirtIONet, net_conf.tx),
    DEFINE_PROP_END_OF_LIST(),
};

static void virtio_net_class_init(ObjectClass *klass, void *data)
{
    DeviceClass *dc = DEVICE_CLASS(klass);
    VirtioDeviceClass *vdc = VIRTIO_DEVICE_CLASS(klass);
    dc->exit = virtio_net_device_exit;
    dc->props = virtio_net_properties;
    vdc->init = virtio_net_device_init;
    vdc->get_config = virtio_net_get_config;
    vdc->set_config = virtio_net_set_config;
    vdc->get_features = virtio_net_get_features;
    vdc->set_features = virtio_net_set_features;
    vdc->bad_features = virtio_net_bad_features;
    vdc->reset = virtio_net_reset;
    vdc->set_status = virtio_net_set_status;
    vdc->guest_notifier_mask = virtio_net_guest_notifier_mask;
    vdc->guest_notifier_pending = virtio_net_guest_notifier_pending;
}

static const TypeInfo virtio_net_info = {
    .name = TYPE_VIRTIO_NET,
    .parent = TYPE_VIRTIO_DEVICE,
    .instance_size = sizeof(VirtIONet),
    .instance_init = virtio_net_instance_init,
    .class_init = virtio_net_class_init,
};

static void virtio_register_types(void)
{
    type_register_static(&virtio_net_info);
}

type_init(virtio_register_types)