virtio-net.c 43.9 KB
Newer Older
A
aliguori 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13
/*
 * Virtio Network Device
 *
 * Copyright IBM, Corp. 2007
 *
 * Authors:
 *  Anthony Liguori   <aliguori@us.ibm.com>
 *
 * This work is licensed under the terms of the GNU GPL, version 2.  See
 * the COPYING file in the top-level directory.
 *
 */

14
#include "qemu/iov.h"
P
Paolo Bonzini 已提交
15
#include "hw/virtio/virtio.h"
P
Paolo Bonzini 已提交
16
#include "net/net.h"
17
#include "net/checksum.h"
18
#include "net/tap.h"
19 20
#include "qemu/error-report.h"
#include "qemu/timer.h"
P
Paolo Bonzini 已提交
21 22
#include "hw/virtio/virtio-net.h"
#include "net/vhost_net.h"
23
#include "hw/virtio/virtio-bus.h"
A
aliguori 已提交
24

25
#define VIRTIO_NET_VM_VERSION    11
26

27
#define MAC_TABLE_ENTRIES    64
28
#define MAX_VLAN    (1 << 12)   /* Per 802.1Q definition */
29

30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51
/*
 * Calculate the number of bytes up to and including the given 'field' of
 * 'container'.
 */
#define endof(container, field) \
    (offsetof(container, field) + sizeof(((container *)0)->field))

typedef struct VirtIOFeature {
    uint32_t flags;
    size_t end;
} VirtIOFeature;

static VirtIOFeature feature_sizes[] = {
    {.flags = 1 << VIRTIO_NET_F_MAC,
     .end = endof(struct virtio_net_config, mac)},
    {.flags = 1 << VIRTIO_NET_F_STATUS,
     .end = endof(struct virtio_net_config, status)},
    {.flags = 1 << VIRTIO_NET_F_MQ,
     .end = endof(struct virtio_net_config, max_virtqueue_pairs)},
    {}
};

J
Jason Wang 已提交
52
static VirtIONetQueue *virtio_net_get_subqueue(NetClientState *nc)
53 54 55
{
    VirtIONet *n = qemu_get_nic_opaque(nc);

J
Jason Wang 已提交
56
    return &n->vqs[nc->queue_index];
57
}
J
Jason Wang 已提交
58 59 60 61 62 63

static int vq2q(int queue_index)
{
    return queue_index / 2;
}

A
aliguori 已提交
64 65 66 67
/* TODO
 * - we could suppress RX interrupt if we were so inclined.
 */

68 69 70
/*
 * Moving to QOM later in this serie.
 */
A
aliguori 已提交
71 72 73 74 75
static VirtIONet *to_virtio_net(VirtIODevice *vdev)
{
    return (VirtIONet *)vdev;
}

76
static void virtio_net_get_config(VirtIODevice *vdev, uint8_t *config)
A
aliguori 已提交
77 78 79 80
{
    VirtIONet *n = to_virtio_net(vdev);
    struct virtio_net_config netcfg;

81
    stw_p(&netcfg.status, n->status);
J
Jason Wang 已提交
82
    stw_p(&netcfg.max_virtqueue_pairs, n->max_queues);
83
    memcpy(netcfg.mac, n->mac, ETH_ALEN);
84
    memcpy(config, &netcfg, n->config_size);
A
aliguori 已提交
85 86
}

87 88 89
static void virtio_net_set_config(VirtIODevice *vdev, const uint8_t *config)
{
    VirtIONet *n = to_virtio_net(vdev);
90
    struct virtio_net_config netcfg = {};
91

92
    memcpy(&netcfg, config, n->config_size);
93

94 95
    if (!(n->vdev.guest_features >> VIRTIO_NET_F_CTRL_MAC_ADDR & 1) &&
        memcmp(netcfg.mac, n->mac, ETH_ALEN)) {
96
        memcpy(n->mac, netcfg.mac, ETH_ALEN);
J
Jason Wang 已提交
97
        qemu_format_nic_info_str(qemu_get_queue(n->nic), n->mac);
98 99 100
    }
}

101 102 103
static bool virtio_net_started(VirtIONet *n, uint8_t status)
{
    return (status & VIRTIO_CONFIG_S_DRIVER_OK) &&
104
        (n->status & VIRTIO_NET_S_LINK_UP) && n->vdev.vm_running;
105 106 107
}

static void virtio_net_vhost_status(VirtIONet *n, uint8_t status)
108
{
J
Jason Wang 已提交
109
    NetClientState *nc = qemu_get_queue(n->nic);
J
Jason Wang 已提交
110
    int queues = n->multiqueue ? n->max_queues : 1;
J
Jason Wang 已提交
111 112

    if (!nc->peer) {
113 114
        return;
    }
J
Jason Wang 已提交
115
    if (nc->peer->info->type != NET_CLIENT_OPTIONS_KIND_TAP) {
116 117 118
        return;
    }

J
Jason Wang 已提交
119
    if (!tap_get_vhost_net(nc->peer)) {
120 121
        return;
    }
J
Jason Wang 已提交
122

123
    if (!!n->vhost_started == virtio_net_started(n, status) &&
J
Jason Wang 已提交
124
                              !nc->peer->link_down) {
125 126 127
        return;
    }
    if (!n->vhost_started) {
128
        int r;
J
Jason Wang 已提交
129
        if (!vhost_net_query(tap_get_vhost_net(nc->peer), &n->vdev)) {
130 131
            return;
        }
132
        n->vhost_started = 1;
J
Jason Wang 已提交
133
        r = vhost_net_start(&n->vdev, n->nic->ncs, queues);
134
        if (r < 0) {
135 136
            error_report("unable to start vhost net: %d: "
                         "falling back on userspace virtio", -r);
137
            n->vhost_started = 0;
138 139
        }
    } else {
J
Jason Wang 已提交
140
        vhost_net_stop(&n->vdev, n->nic->ncs, queues);
141 142 143 144
        n->vhost_started = 0;
    }
}

145 146 147
static void virtio_net_set_status(struct VirtIODevice *vdev, uint8_t status)
{
    VirtIONet *n = to_virtio_net(vdev);
J
Jason Wang 已提交
148 149 150
    VirtIONetQueue *q;
    int i;
    uint8_t queue_status;
151 152 153

    virtio_net_vhost_status(n, status);

J
Jason Wang 已提交
154 155
    for (i = 0; i < n->max_queues; i++) {
        q = &n->vqs[i];
156

J
Jason Wang 已提交
157 158
        if ((!n->multiqueue && i != 0) || i >= n->curr_queues) {
            queue_status = 0;
159
        } else {
J
Jason Wang 已提交
160
            queue_status = status;
161
        }
J
Jason Wang 已提交
162 163 164 165 166 167 168 169 170 171 172 173

        if (!q->tx_waiting) {
            continue;
        }

        if (virtio_net_started(n, queue_status) && !n->vhost_started) {
            if (q->tx_timer) {
                qemu_mod_timer(q->tx_timer,
                               qemu_get_clock_ns(vm_clock) + n->tx_timeout);
            } else {
                qemu_bh_schedule(q->tx_bh);
            }
174
        } else {
J
Jason Wang 已提交
175 176 177 178 179
            if (q->tx_timer) {
                qemu_del_timer(q->tx_timer);
            } else {
                qemu_bh_cancel(q->tx_bh);
            }
180 181 182 183
        }
    }
}

184
static void virtio_net_set_link_status(NetClientState *nc)
185
{
J
Jason Wang 已提交
186
    VirtIONet *n = qemu_get_nic_opaque(nc);
187 188
    uint16_t old_status = n->status;

M
Mark McLoughlin 已提交
189
    if (nc->link_down)
190 191 192 193 194 195
        n->status &= ~VIRTIO_NET_S_LINK_UP;
    else
        n->status |= VIRTIO_NET_S_LINK_UP;

    if (n->status != old_status)
        virtio_notify_config(&n->vdev);
196 197

    virtio_net_set_status(&n->vdev, n->vdev.status);
198 199
}

200 201 202 203 204 205 206
static void virtio_net_reset(VirtIODevice *vdev)
{
    VirtIONet *n = to_virtio_net(vdev);

    /* Reset back to compatibility mode */
    n->promisc = 1;
    n->allmulti = 0;
207 208 209 210
    n->alluni = 0;
    n->nomulti = 0;
    n->nouni = 0;
    n->nobcast = 0;
J
Jason Wang 已提交
211 212
    /* multiqueue is disabled by default */
    n->curr_queues = 1;
213

214
    /* Flush any MAC and VLAN filter table state */
215
    n->mac_table.in_use = 0;
216
    n->mac_table.first_multi = 0;
217 218
    n->mac_table.multi_overflow = 0;
    n->mac_table.uni_overflow = 0;
219
    memset(n->mac_table.macs, 0, MAC_TABLE_ENTRIES * ETH_ALEN);
220
    memcpy(&n->mac[0], &n->nic->conf->macaddr, sizeof(n->mac));
221
    memset(n->vlans, 0, MAX_VLAN >> 3);
222 223
}

224
static void peer_test_vnet_hdr(VirtIONet *n)
M
Mark McLoughlin 已提交
225
{
J
Jason Wang 已提交
226 227
    NetClientState *nc = qemu_get_queue(n->nic);
    if (!nc->peer) {
228
        return;
J
Jason Wang 已提交
229
    }
M
Mark McLoughlin 已提交
230

J
Jason Wang 已提交
231
    if (nc->peer->info->type != NET_CLIENT_OPTIONS_KIND_TAP) {
232
        return;
J
Jason Wang 已提交
233
    }
M
Mark McLoughlin 已提交
234

J
Jason Wang 已提交
235
    n->has_vnet_hdr = tap_has_vnet_hdr(nc->peer);
236
}
M
Mark McLoughlin 已提交
237

238 239
static int peer_has_vnet_hdr(VirtIONet *n)
{
M
Mark McLoughlin 已提交
240 241 242
    return n->has_vnet_hdr;
}

243 244 245 246 247
static int peer_has_ufo(VirtIONet *n)
{
    if (!peer_has_vnet_hdr(n))
        return 0;

J
Jason Wang 已提交
248
    n->has_ufo = tap_has_ufo(qemu_get_queue(n->nic)->peer);
249 250 251 252

    return n->has_ufo;
}

253 254
static void virtio_net_set_mrg_rx_bufs(VirtIONet *n, int mergeable_rx_bufs)
{
J
Jason Wang 已提交
255 256 257
    int i;
    NetClientState *nc;

258 259 260 261 262
    n->mergeable_rx_bufs = mergeable_rx_bufs;

    n->guest_hdr_len = n->mergeable_rx_bufs ?
        sizeof(struct virtio_net_hdr_mrg_rxbuf) : sizeof(struct virtio_net_hdr);

J
Jason Wang 已提交
263 264 265 266 267 268 269 270
    for (i = 0; i < n->max_queues; i++) {
        nc = qemu_get_subqueue(n->nic, i);

        if (peer_has_vnet_hdr(n) &&
            tap_has_vnet_hdr_len(nc->peer, n->guest_hdr_len)) {
            tap_set_vnet_hdr_len(nc->peer, n->guest_hdr_len);
            n->host_hdr_len = n->guest_hdr_len;
        }
271 272 273
    }
}

J
Jason Wang 已提交
274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318
static int peer_attach(VirtIONet *n, int index)
{
    NetClientState *nc = qemu_get_subqueue(n->nic, index);

    if (!nc->peer) {
        return 0;
    }

    if (nc->peer->info->type != NET_CLIENT_OPTIONS_KIND_TAP) {
        return 0;
    }

    return tap_enable(nc->peer);
}

static int peer_detach(VirtIONet *n, int index)
{
    NetClientState *nc = qemu_get_subqueue(n->nic, index);

    if (!nc->peer) {
        return 0;
    }

    if (nc->peer->info->type !=  NET_CLIENT_OPTIONS_KIND_TAP) {
        return 0;
    }

    return tap_disable(nc->peer);
}

static void virtio_net_set_queues(VirtIONet *n)
{
    int i;

    for (i = 0; i < n->max_queues; i++) {
        if (i < n->curr_queues) {
            assert(!peer_attach(n, i));
        } else {
            assert(!peer_detach(n, i));
        }
    }
}

static void virtio_net_set_multiqueue(VirtIONet *n, int multiqueue, int ctrl);

319
static uint32_t virtio_net_get_features(VirtIODevice *vdev, uint32_t features)
A
aliguori 已提交
320
{
M
Mark McLoughlin 已提交
321
    VirtIONet *n = to_virtio_net(vdev);
J
Jason Wang 已提交
322
    NetClientState *nc = qemu_get_queue(n->nic);
A
aliguori 已提交
323

324 325
    features |= (1 << VIRTIO_NET_F_MAC);

326
    if (!peer_has_vnet_hdr(n)) {
327 328 329 330 331 332 333 334 335 336
        features &= ~(0x1 << VIRTIO_NET_F_CSUM);
        features &= ~(0x1 << VIRTIO_NET_F_HOST_TSO4);
        features &= ~(0x1 << VIRTIO_NET_F_HOST_TSO6);
        features &= ~(0x1 << VIRTIO_NET_F_HOST_ECN);

        features &= ~(0x1 << VIRTIO_NET_F_GUEST_CSUM);
        features &= ~(0x1 << VIRTIO_NET_F_GUEST_TSO4);
        features &= ~(0x1 << VIRTIO_NET_F_GUEST_TSO6);
        features &= ~(0x1 << VIRTIO_NET_F_GUEST_ECN);
    }
M
Mark McLoughlin 已提交
337

338 339 340
    if (!peer_has_vnet_hdr(n) || !peer_has_ufo(n)) {
        features &= ~(0x1 << VIRTIO_NET_F_GUEST_UFO);
        features &= ~(0x1 << VIRTIO_NET_F_HOST_UFO);
M
Mark McLoughlin 已提交
341 342
    }

J
Jason Wang 已提交
343
    if (!nc->peer || nc->peer->info->type != NET_CLIENT_OPTIONS_KIND_TAP) {
344 345
        return features;
    }
J
Jason Wang 已提交
346
    if (!tap_get_vhost_net(nc->peer)) {
347 348
        return features;
    }
J
Jason Wang 已提交
349
    return vhost_net_get_features(tap_get_vhost_net(nc->peer), features);
A
aliguori 已提交
350 351
}

352 353 354 355 356 357 358
static uint32_t virtio_net_bad_features(VirtIODevice *vdev)
{
    uint32_t features = 0;

    /* Linux kernel 2.6.25.  It understood MAC (as everyone must),
     * but also these: */
    features |= (1 << VIRTIO_NET_F_MAC);
359 360 361 362
    features |= (1 << VIRTIO_NET_F_CSUM);
    features |= (1 << VIRTIO_NET_F_HOST_TSO4);
    features |= (1 << VIRTIO_NET_F_HOST_TSO6);
    features |= (1 << VIRTIO_NET_F_HOST_ECN);
363

364
    return features;
365 366
}

A
aliguori 已提交
367 368 369
static void virtio_net_set_features(VirtIODevice *vdev, uint32_t features)
{
    VirtIONet *n = to_virtio_net(vdev);
J
Jason Wang 已提交
370 371 372 373
    int i;

    virtio_net_set_multiqueue(n, !!(features & (1 << VIRTIO_NET_F_MQ)),
                              !!(features & (1 << VIRTIO_NET_F_CTRL_VQ)));
A
aliguori 已提交
374

375
    virtio_net_set_mrg_rx_bufs(n, !!(features & (1 << VIRTIO_NET_F_MRG_RXBUF)));
376 377

    if (n->has_vnet_hdr) {
J
Jason Wang 已提交
378
        tap_set_offload(qemu_get_subqueue(n->nic, 0)->peer,
379 380 381
                        (features >> VIRTIO_NET_F_GUEST_CSUM) & 1,
                        (features >> VIRTIO_NET_F_GUEST_TSO4) & 1,
                        (features >> VIRTIO_NET_F_GUEST_TSO6) & 1,
382 383
                        (features >> VIRTIO_NET_F_GUEST_ECN)  & 1,
                        (features >> VIRTIO_NET_F_GUEST_UFO)  & 1);
384
    }
J
Jason Wang 已提交
385 386 387 388 389 390 391 392 393 394 395

    for (i = 0;  i < n->max_queues; i++) {
        NetClientState *nc = qemu_get_subqueue(n->nic, i);

        if (!nc->peer || nc->peer->info->type != NET_CLIENT_OPTIONS_KIND_TAP) {
            continue;
        }
        if (!tap_get_vhost_net(nc->peer)) {
            continue;
        }
        vhost_net_ack_features(tap_get_vhost_net(nc->peer), features);
D
David L Stevens 已提交
396
    }
A
aliguori 已提交
397 398
}

399
static int virtio_net_handle_rx_mode(VirtIONet *n, uint8_t cmd,
400
                                     struct iovec *iov, unsigned int iov_cnt)
401 402
{
    uint8_t on;
403
    size_t s;
404

405 406 407
    s = iov_to_buf(iov, iov_cnt, 0, &on, sizeof(on));
    if (s != sizeof(on)) {
        return VIRTIO_NET_ERR;
408 409
    }

A
Amos Kong 已提交
410
    if (cmd == VIRTIO_NET_CTRL_RX_PROMISC) {
411
        n->promisc = on;
A
Amos Kong 已提交
412
    } else if (cmd == VIRTIO_NET_CTRL_RX_ALLMULTI) {
413
        n->allmulti = on;
A
Amos Kong 已提交
414
    } else if (cmd == VIRTIO_NET_CTRL_RX_ALLUNI) {
415
        n->alluni = on;
A
Amos Kong 已提交
416
    } else if (cmd == VIRTIO_NET_CTRL_RX_NOMULTI) {
417
        n->nomulti = on;
A
Amos Kong 已提交
418
    } else if (cmd == VIRTIO_NET_CTRL_RX_NOUNI) {
419
        n->nouni = on;
A
Amos Kong 已提交
420
    } else if (cmd == VIRTIO_NET_CTRL_RX_NOBCAST) {
421
        n->nobcast = on;
422
    } else {
423
        return VIRTIO_NET_ERR;
424
    }
425 426 427 428

    return VIRTIO_NET_OK;
}

429
static int virtio_net_handle_mac(VirtIONet *n, uint8_t cmd,
430
                                 struct iovec *iov, unsigned int iov_cnt)
431 432
{
    struct virtio_net_ctrl_mac mac_data;
433
    size_t s;
434

435 436 437 438 439 440
    if (cmd == VIRTIO_NET_CTRL_MAC_ADDR_SET) {
        if (iov_size(iov, iov_cnt) != sizeof(n->mac)) {
            return VIRTIO_NET_ERR;
        }
        s = iov_to_buf(iov, iov_cnt, 0, &n->mac, sizeof(n->mac));
        assert(s == sizeof(n->mac));
J
Jason Wang 已提交
441
        qemu_format_nic_info_str(qemu_get_queue(n->nic), n->mac);
442 443 444
        return VIRTIO_NET_OK;
    }

445
    if (cmd != VIRTIO_NET_CTRL_MAC_TABLE_SET) {
446
        return VIRTIO_NET_ERR;
447
    }
448 449

    n->mac_table.in_use = 0;
450
    n->mac_table.first_multi = 0;
451 452
    n->mac_table.uni_overflow = 0;
    n->mac_table.multi_overflow = 0;
453 454
    memset(n->mac_table.macs, 0, MAC_TABLE_ENTRIES * ETH_ALEN);

455 456 457 458 459 460 461
    s = iov_to_buf(iov, iov_cnt, 0, &mac_data.entries,
                   sizeof(mac_data.entries));
    mac_data.entries = ldl_p(&mac_data.entries);
    if (s != sizeof(mac_data.entries)) {
        return VIRTIO_NET_ERR;
    }
    iov_discard_front(&iov, &iov_cnt, s);
462

463
    if (mac_data.entries * ETH_ALEN > iov_size(iov, iov_cnt)) {
464
        return VIRTIO_NET_ERR;
465
    }
466 467

    if (mac_data.entries <= MAC_TABLE_ENTRIES) {
468 469 470 471 472
        s = iov_to_buf(iov, iov_cnt, 0, n->mac_table.macs,
                       mac_data.entries * ETH_ALEN);
        if (s != mac_data.entries * ETH_ALEN) {
            return VIRTIO_NET_ERR;
        }
473 474
        n->mac_table.in_use += mac_data.entries;
    } else {
475
        n->mac_table.uni_overflow = 1;
476 477
    }

478 479
    iov_discard_front(&iov, &iov_cnt, mac_data.entries * ETH_ALEN);

480 481
    n->mac_table.first_multi = n->mac_table.in_use;

482 483 484 485 486 487 488 489
    s = iov_to_buf(iov, iov_cnt, 0, &mac_data.entries,
                   sizeof(mac_data.entries));
    mac_data.entries = ldl_p(&mac_data.entries);
    if (s != sizeof(mac_data.entries)) {
        return VIRTIO_NET_ERR;
    }

    iov_discard_front(&iov, &iov_cnt, s);
490

491
    if (mac_data.entries * ETH_ALEN != iov_size(iov, iov_cnt)) {
492
        return VIRTIO_NET_ERR;
493
    }
494

495 496 497 498 499
    if (n->mac_table.in_use + mac_data.entries <= MAC_TABLE_ENTRIES) {
        s = iov_to_buf(iov, iov_cnt, 0, n->mac_table.macs,
                       mac_data.entries * ETH_ALEN);
        if (s != mac_data.entries * ETH_ALEN) {
            return VIRTIO_NET_ERR;
500
        }
501 502 503
        n->mac_table.in_use += mac_data.entries;
    } else {
        n->mac_table.multi_overflow = 1;
504 505 506 507 508
    }

    return VIRTIO_NET_OK;
}

509
static int virtio_net_handle_vlan_table(VirtIONet *n, uint8_t cmd,
510
                                        struct iovec *iov, unsigned int iov_cnt)
511 512
{
    uint16_t vid;
513
    size_t s;
514

515 516 517
    s = iov_to_buf(iov, iov_cnt, 0, &vid, sizeof(vid));
    vid = lduw_p(&vid);
    if (s != sizeof(vid)) {
518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533
        return VIRTIO_NET_ERR;
    }

    if (vid >= MAX_VLAN)
        return VIRTIO_NET_ERR;

    if (cmd == VIRTIO_NET_CTRL_VLAN_ADD)
        n->vlans[vid >> 5] |= (1U << (vid & 0x1f));
    else if (cmd == VIRTIO_NET_CTRL_VLAN_DEL)
        n->vlans[vid >> 5] &= ~(1U << (vid & 0x1f));
    else
        return VIRTIO_NET_ERR;

    return VIRTIO_NET_OK;
}

J
Jason Wang 已提交
534
static int virtio_net_handle_mq(VirtIONet *n, uint8_t cmd,
535
                                struct iovec *iov, unsigned int iov_cnt)
J
Jason Wang 已提交
536
{
537 538 539
    struct virtio_net_ctrl_mq mq;
    size_t s;
    uint16_t queues;
J
Jason Wang 已提交
540

541 542
    s = iov_to_buf(iov, iov_cnt, 0, &mq, sizeof(mq));
    if (s != sizeof(mq)) {
J
Jason Wang 已提交
543 544 545 546 547 548 549
        return VIRTIO_NET_ERR;
    }

    if (cmd != VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET) {
        return VIRTIO_NET_ERR;
    }

550
    queues = lduw_p(&mq.virtqueue_pairs);
J
Jason Wang 已提交
551

552 553 554
    if (queues < VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MIN ||
        queues > VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MAX ||
        queues > n->max_queues ||
J
Jason Wang 已提交
555 556 557 558
        !n->multiqueue) {
        return VIRTIO_NET_ERR;
    }

559
    n->curr_queues = queues;
J
Jason Wang 已提交
560 561 562 563 564 565 566
    /* stop the backend before changing the number of queues to avoid handling a
     * disabled queue */
    virtio_net_set_status(&n->vdev, n->vdev.status);
    virtio_net_set_queues(n);

    return VIRTIO_NET_OK;
}
567 568
static void virtio_net_handle_ctrl(VirtIODevice *vdev, VirtQueue *vq)
{
569
    VirtIONet *n = to_virtio_net(vdev);
570 571 572
    struct virtio_net_ctrl_hdr ctrl;
    virtio_net_ctrl_ack status = VIRTIO_NET_ERR;
    VirtQueueElement elem;
573 574 575
    size_t s;
    struct iovec *iov;
    unsigned int iov_cnt;
576 577

    while (virtqueue_pop(vq, &elem)) {
578 579
        if (iov_size(elem.in_sg, elem.in_num) < sizeof(status) ||
            iov_size(elem.out_sg, elem.out_num) < sizeof(ctrl)) {
580
            error_report("virtio-net ctrl missing headers");
581 582 583
            exit(1);
        }

584 585 586 587 588 589
        iov = elem.out_sg;
        iov_cnt = elem.out_num;
        s = iov_to_buf(iov, iov_cnt, 0, &ctrl, sizeof(ctrl));
        iov_discard_front(&iov, &iov_cnt, sizeof(ctrl));
        if (s != sizeof(ctrl)) {
            status = VIRTIO_NET_ERR;
A
Amos Kong 已提交
590
        } else if (ctrl.class == VIRTIO_NET_CTRL_RX) {
591 592 593 594 595
            status = virtio_net_handle_rx_mode(n, ctrl.cmd, iov, iov_cnt);
        } else if (ctrl.class == VIRTIO_NET_CTRL_MAC) {
            status = virtio_net_handle_mac(n, ctrl.cmd, iov, iov_cnt);
        } else if (ctrl.class == VIRTIO_NET_CTRL_VLAN) {
            status = virtio_net_handle_vlan_table(n, ctrl.cmd, iov, iov_cnt);
J
Jason Wang 已提交
596
        } else if (ctrl.class == VIRTIO_NET_CTRL_MQ) {
597
            status = virtio_net_handle_mq(n, ctrl.cmd, iov, iov_cnt);
598 599
        }

600 601
        s = iov_from_buf(elem.in_sg, elem.in_num, 0, &status, sizeof(status));
        assert(s == sizeof(status));
602 603 604 605 606 607

        virtqueue_push(vq, &elem, sizeof(status));
        virtio_notify(vdev, vq);
    }
}

A
aliguori 已提交
608 609 610 611
/* RX */

static void virtio_net_handle_rx(VirtIODevice *vdev, VirtQueue *vq)
{
612
    VirtIONet *n = to_virtio_net(vdev);
J
Jason Wang 已提交
613
    int queue_index = vq2q(virtio_get_queue_index(vq));
614

J
Jason Wang 已提交
615
    qemu_flush_queued_packets(qemu_get_subqueue(n->nic, queue_index));
A
aliguori 已提交
616 617
}

618
static int virtio_net_can_receive(NetClientState *nc)
A
aliguori 已提交
619
{
J
Jason Wang 已提交
620
    VirtIONet *n = qemu_get_nic_opaque(nc);
J
Jason Wang 已提交
621
    VirtIONetQueue *q = virtio_net_get_subqueue(nc);
622

623
    if (!n->vdev.vm_running) {
624 625
        return 0;
    }
626

J
Jason Wang 已提交
627 628 629 630
    if (nc->queue_index >= n->curr_queues) {
        return 0;
    }

631 632
    if (!virtio_queue_ready(q->rx_vq) ||
        !(n->vdev.status & VIRTIO_CONFIG_S_DRIVER_OK)) {
A
aliguori 已提交
633
        return 0;
634
    }
A
aliguori 已提交
635

636 637 638
    return 1;
}

639
static int virtio_net_has_buffers(VirtIONetQueue *q, int bufsize)
640
{
641 642
    VirtIONet *n = q->n;
    if (virtio_queue_empty(q->rx_vq) ||
A
aliguori 已提交
643
        (n->mergeable_rx_bufs &&
644 645
         !virtqueue_avail_bytes(q->rx_vq, bufsize, 0))) {
        virtio_queue_set_notification(q->rx_vq, 1);
646 647 648 649 650

        /* To avoid a race condition where the guest has made some buffers
         * available after the above check but before notification was
         * enabled, check for available buffers again.
         */
651
        if (virtio_queue_empty(q->rx_vq) ||
652
            (n->mergeable_rx_bufs &&
653
             !virtqueue_avail_bytes(q->rx_vq, bufsize, 0))) {
654
            return 0;
655
        }
A
aliguori 已提交
656 657
    }

658
    virtio_queue_set_notification(q->rx_vq, 0);
A
aliguori 已提交
659 660 661
    return 1;
}

A
Anthony Liguori 已提交
662 663 664 665 666 667 668 669 670 671 672 673 674 675 676
/* dhclient uses AF_PACKET but doesn't pass auxdata to the kernel so
 * it never finds out that the packets don't have valid checksums.  This
 * causes dhclient to get upset.  Fedora's carried a patch for ages to
 * fix this with Xen but it hasn't appeared in an upstream release of
 * dhclient yet.
 *
 * To avoid breaking existing guests, we catch udp packets and add
 * checksums.  This is terrible but it's better than hacking the guest
 * kernels.
 *
 * N.B. if we introduce a zero-copy API, this operation is no longer free so
 * we should provide a mechanism to disable it to avoid polluting the host
 * cache.
 */
static void work_around_broken_dhclient(struct virtio_net_hdr *hdr,
M
Michael S. Tsirkin 已提交
677
                                        uint8_t *buf, size_t size)
A
Anthony Liguori 已提交
678 679 680 681 682 683
{
    if ((hdr->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM) && /* missing csum */
        (size > 27 && size < 1500) && /* normal sized MTU */
        (buf[12] == 0x08 && buf[13] == 0x00) && /* ethertype == IPv4 */
        (buf[23] == 17) && /* ip.protocol == UDP */
        (buf[34] == 0 && buf[35] == 67)) { /* udp.srcport == bootps */
M
Michael S. Tsirkin 已提交
684
        net_checksum_calculate(buf, size);
A
Anthony Liguori 已提交
685 686 687 688
        hdr->flags &= ~VIRTIO_NET_HDR_F_NEEDS_CSUM;
    }
}

689 690
static void receive_header(VirtIONet *n, const struct iovec *iov, int iov_cnt,
                           const void *buf, size_t size)
A
aliguori 已提交
691
{
M
Mark McLoughlin 已提交
692
    if (n->has_vnet_hdr) {
M
Michael S. Tsirkin 已提交
693 694
        /* FIXME this cast is evil */
        void *wbuf = (void *)buf;
695 696 697
        work_around_broken_dhclient(wbuf, wbuf + n->host_hdr_len,
                                    size - n->host_hdr_len);
        iov_from_buf(iov, iov_cnt, 0, buf, sizeof(struct virtio_net_hdr));
M
Michael S. Tsirkin 已提交
698 699 700 701 702 703
    } else {
        struct virtio_net_hdr hdr = {
            .flags = 0,
            .gso_type = VIRTIO_NET_HDR_GSO_NONE
        };
        iov_from_buf(iov, iov_cnt, 0, &hdr, sizeof hdr);
M
Mark McLoughlin 已提交
704
    }
A
aliguori 已提交
705 706
}

707 708 709
static int receive_filter(VirtIONet *n, const uint8_t *buf, int size)
{
    static const uint8_t bcast[] = {0xff, 0xff, 0xff, 0xff, 0xff, 0xff};
710
    static const uint8_t vlan[] = {0x81, 0x00};
711
    uint8_t *ptr = (uint8_t *)buf;
712
    int i;
713 714 715 716

    if (n->promisc)
        return 1;

717
    ptr += n->host_hdr_len;
M
Mark McLoughlin 已提交
718

719 720 721 722 723 724
    if (!memcmp(&ptr[12], vlan, sizeof(vlan))) {
        int vid = be16_to_cpup((uint16_t *)(ptr + 14)) & 0xfff;
        if (!(n->vlans[vid >> 5] & (1U << (vid & 0x1f))))
            return 0;
    }

725 726
    if (ptr[0] & 1) { // multicast
        if (!memcmp(ptr, bcast, sizeof(bcast))) {
727 728 729
            return !n->nobcast;
        } else if (n->nomulti) {
            return 0;
730
        } else if (n->allmulti || n->mac_table.multi_overflow) {
731 732
            return 1;
        }
733 734 735 736 737 738

        for (i = n->mac_table.first_multi; i < n->mac_table.in_use; i++) {
            if (!memcmp(ptr, &n->mac_table.macs[i * ETH_ALEN], ETH_ALEN)) {
                return 1;
            }
        }
739
    } else { // unicast
740 741 742
        if (n->nouni) {
            return 0;
        } else if (n->alluni || n->mac_table.uni_overflow) {
743 744
            return 1;
        } else if (!memcmp(ptr, n->mac, ETH_ALEN)) {
745 746
            return 1;
        }
747

748 749 750 751 752
        for (i = 0; i < n->mac_table.first_multi; i++) {
            if (!memcmp(ptr, &n->mac_table.macs[i * ETH_ALEN], ETH_ALEN)) {
                return 1;
            }
        }
753 754
    }

755 756 757
    return 0;
}

758
static ssize_t virtio_net_receive(NetClientState *nc, const uint8_t *buf, size_t size)
A
aliguori 已提交
759
{
J
Jason Wang 已提交
760
    VirtIONet *n = qemu_get_nic_opaque(nc);
J
Jason Wang 已提交
761
    VirtIONetQueue *q = virtio_net_get_subqueue(nc);
762 763 764
    struct iovec mhdr_sg[VIRTQUEUE_MAX_SIZE];
    struct virtio_net_hdr_mrg_rxbuf mhdr;
    unsigned mhdr_cnt = 0;
M
Michael S. Tsirkin 已提交
765
    size_t offset, i, guest_offset;
A
aliguori 已提交
766

J
Jason Wang 已提交
767
    if (!virtio_net_can_receive(nc)) {
768
        return -1;
J
Jason Wang 已提交
769
    }
770

771
    /* hdr_len refers to the header we supply to the guest */
772
    if (!virtio_net_has_buffers(q, size + n->guest_hdr_len - n->host_hdr_len)) {
773
        return 0;
774
    }
A
aliguori 已提交
775

776
    if (!receive_filter(n, buf, size))
777
        return size;
778

A
aliguori 已提交
779 780 781 782 783
    offset = i = 0;

    while (offset < size) {
        VirtQueueElement elem;
        int len, total;
M
Michael S. Tsirkin 已提交
784
        const struct iovec *sg = elem.in_sg;
A
aliguori 已提交
785

A
Amit Shah 已提交
786
        total = 0;
A
aliguori 已提交
787

788
        if (virtqueue_pop(q->rx_vq, &elem) == 0) {
A
aliguori 已提交
789
            if (i == 0)
790
                return -1;
791
            error_report("virtio-net unexpected empty queue: "
792
                    "i %zd mergeable %d offset %zd, size %zd, "
793
                    "guest hdr len %zd, host hdr len %zd guest features 0x%x",
794
                    i, n->mergeable_rx_bufs, offset, size,
795
                    n->guest_hdr_len, n->host_hdr_len, n->vdev.guest_features);
A
aliguori 已提交
796 797 798 799
            exit(1);
        }

        if (elem.in_num < 1) {
800
            error_report("virtio-net receive queue contains no in buffers");
A
aliguori 已提交
801 802 803 804
            exit(1);
        }

        if (i == 0) {
805
            assert(offset == 0);
806 807 808 809 810 811
            if (n->mergeable_rx_bufs) {
                mhdr_cnt = iov_copy(mhdr_sg, ARRAY_SIZE(mhdr_sg),
                                    sg, elem.in_num,
                                    offsetof(typeof(mhdr), num_buffers),
                                    sizeof(mhdr.num_buffers));
            }
A
aliguori 已提交
812

813 814
            receive_header(n, sg, elem.in_num, buf, size);
            offset = n->host_hdr_len;
815
            total += n->guest_hdr_len;
M
Michael S. Tsirkin 已提交
816 817 818
            guest_offset = n->guest_hdr_len;
        } else {
            guest_offset = 0;
A
aliguori 已提交
819 820 821
        }

        /* copy in packet.  ugh */
M
Michael S. Tsirkin 已提交
822
        len = iov_from_buf(sg, elem.in_num, guest_offset,
823
                           buf + offset, size - offset);
A
aliguori 已提交
824
        total += len;
825 826 827 828 829 830
        offset += len;
        /* If buffers can't be merged, at this point we
         * must have consumed the complete packet.
         * Otherwise, drop it. */
        if (!n->mergeable_rx_bufs && offset < size) {
#if 0
831 832 833 834
            error_report("virtio-net truncated non-mergeable packet: "
                         "i %zd mergeable %d offset %zd, size %zd, "
                         "guest hdr len %zd, host hdr len %zd",
                         i, n->mergeable_rx_bufs,
835
                         offset, size, n->guest_hdr_len, n->host_hdr_len);
836 837 838
#endif
            return size;
        }
A
aliguori 已提交
839 840

        /* signal other side */
841
        virtqueue_fill(q->rx_vq, &elem, total, i++);
A
aliguori 已提交
842 843
    }

844 845 846 847 848
    if (mhdr_cnt) {
        stw_p(&mhdr.num_buffers, i);
        iov_from_buf(mhdr_sg, mhdr_cnt,
                     0,
                     &mhdr.num_buffers, sizeof mhdr.num_buffers);
849
    }
A
aliguori 已提交
850

851 852
    virtqueue_flush(q->rx_vq, i);
    virtio_notify(&n->vdev, q->rx_vq);
853 854

    return size;
A
aliguori 已提交
855 856
}

857
static int32_t virtio_net_flush_tx(VirtIONetQueue *q);
858

859
static void virtio_net_tx_complete(NetClientState *nc, ssize_t len)
860
{
J
Jason Wang 已提交
861
    VirtIONet *n = qemu_get_nic_opaque(nc);
J
Jason Wang 已提交
862
    VirtIONetQueue *q = virtio_net_get_subqueue(nc);
863

864 865
    virtqueue_push(q->tx_vq, &q->async_tx.elem, 0);
    virtio_notify(&n->vdev, q->tx_vq);
866

867
    q->async_tx.elem.out_num = q->async_tx.len = 0;
868

869 870
    virtio_queue_set_notification(q->tx_vq, 1);
    virtio_net_flush_tx(q);
871 872
}

A
aliguori 已提交
873
/* TX */
874
static int32_t virtio_net_flush_tx(VirtIONetQueue *q)
A
aliguori 已提交
875
{
876
    VirtIONet *n = q->n;
A
aliguori 已提交
877
    VirtQueueElement elem;
878
    int32_t num_packets = 0;
J
Jason Wang 已提交
879
    int queue_index = vq2q(virtio_get_queue_index(q->tx_vq));
880 881 882
    if (!(n->vdev.status & VIRTIO_CONFIG_S_DRIVER_OK)) {
        return num_packets;
    }
A
aliguori 已提交
883

884
    assert(n->vdev.vm_running);
885

886 887
    if (q->async_tx.elem.out_num) {
        virtio_queue_set_notification(q->tx_vq, 0);
888
        return num_packets;
889 890
    }

891
    while (virtqueue_pop(q->tx_vq, &elem)) {
892
        ssize_t ret, len;
A
aliguori 已提交
893 894
        unsigned int out_num = elem.out_num;
        struct iovec *out_sg = &elem.out_sg[0];
895
        struct iovec sg[VIRTQUEUE_MAX_SIZE];
A
aliguori 已提交
896

M
Michael S. Tsirkin 已提交
897
        if (out_num < 1) {
898
            error_report("virtio-net header not in first element");
A
aliguori 已提交
899 900 901
            exit(1);
        }

902 903 904 905 906 907 908 909 910 911 912 913 914 915 916
        /*
         * If host wants to see the guest header as is, we can
         * pass it on unchanged. Otherwise, copy just the parts
         * that host is interested in.
         */
        assert(n->host_hdr_len <= n->guest_hdr_len);
        if (n->host_hdr_len != n->guest_hdr_len) {
            unsigned sg_num = iov_copy(sg, ARRAY_SIZE(sg),
                                       out_sg, out_num,
                                       0, n->host_hdr_len);
            sg_num += iov_copy(sg + sg_num, ARRAY_SIZE(sg) - sg_num,
                             out_sg, out_num,
                             n->guest_hdr_len, -1);
            out_num = sg_num;
            out_sg = sg;
A
aliguori 已提交
917 918
        }

M
Michael S. Tsirkin 已提交
919
        len = n->guest_hdr_len;
920

J
Jason Wang 已提交
921 922
        ret = qemu_sendv_packet_async(qemu_get_subqueue(n->nic, queue_index),
                                      out_sg, out_num, virtio_net_tx_complete);
923
        if (ret == 0) {
924 925 926
            virtio_queue_set_notification(q->tx_vq, 0);
            q->async_tx.elem = elem;
            q->async_tx.len  = len;
927
            return -EBUSY;
928 929 930
        }

        len += ret;
A
aliguori 已提交
931

932 933
        virtqueue_push(q->tx_vq, &elem, 0);
        virtio_notify(&n->vdev, q->tx_vq);
934 935 936 937

        if (++num_packets >= n->tx_burst) {
            break;
        }
A
aliguori 已提交
938
    }
939
    return num_packets;
A
aliguori 已提交
940 941
}

942
static void virtio_net_handle_tx_timer(VirtIODevice *vdev, VirtQueue *vq)
A
aliguori 已提交
943 944
{
    VirtIONet *n = to_virtio_net(vdev);
J
Jason Wang 已提交
945
    VirtIONetQueue *q = &n->vqs[vq2q(virtio_get_queue_index(vq))];
A
aliguori 已提交
946

947
    /* This happens when device was stopped but VCPU wasn't. */
948
    if (!n->vdev.vm_running) {
949
        q->tx_waiting = 1;
950 951 952
        return;
    }

953
    if (q->tx_waiting) {
A
aliguori 已提交
954
        virtio_queue_set_notification(vq, 1);
955 956 957
        qemu_del_timer(q->tx_timer);
        q->tx_waiting = 0;
        virtio_net_flush_tx(q);
A
aliguori 已提交
958
    } else {
959
        qemu_mod_timer(q->tx_timer,
960
                       qemu_get_clock_ns(vm_clock) + n->tx_timeout);
961
        q->tx_waiting = 1;
A
aliguori 已提交
962 963 964 965
        virtio_queue_set_notification(vq, 0);
    }
}

966 967 968
static void virtio_net_handle_tx_bh(VirtIODevice *vdev, VirtQueue *vq)
{
    VirtIONet *n = to_virtio_net(vdev);
J
Jason Wang 已提交
969
    VirtIONetQueue *q = &n->vqs[vq2q(virtio_get_queue_index(vq))];
970

971
    if (unlikely(q->tx_waiting)) {
972 973
        return;
    }
974
    q->tx_waiting = 1;
975
    /* This happens when device was stopped but VCPU wasn't. */
976
    if (!n->vdev.vm_running) {
977 978
        return;
    }
979
    virtio_queue_set_notification(vq, 0);
980
    qemu_bh_schedule(q->tx_bh);
981 982
}

A
aliguori 已提交
983 984
static void virtio_net_tx_timer(void *opaque)
{
985 986
    VirtIONetQueue *q = opaque;
    VirtIONet *n = q->n;
987
    assert(n->vdev.vm_running);
A
aliguori 已提交
988

989
    q->tx_waiting = 0;
A
aliguori 已提交
990 991 992 993 994

    /* Just in case the driver is not ready on more */
    if (!(n->vdev.status & VIRTIO_CONFIG_S_DRIVER_OK))
        return;

995 996
    virtio_queue_set_notification(q->tx_vq, 1);
    virtio_net_flush_tx(q);
A
aliguori 已提交
997 998
}

999 1000
static void virtio_net_tx_bh(void *opaque)
{
1001 1002
    VirtIONetQueue *q = opaque;
    VirtIONet *n = q->n;
1003 1004
    int32_t ret;

1005
    assert(n->vdev.vm_running);
1006

1007
    q->tx_waiting = 0;
1008 1009 1010 1011 1012

    /* Just in case the driver is not ready on more */
    if (unlikely(!(n->vdev.status & VIRTIO_CONFIG_S_DRIVER_OK)))
        return;

1013
    ret = virtio_net_flush_tx(q);
1014 1015 1016 1017 1018 1019 1020
    if (ret == -EBUSY) {
        return; /* Notification re-enable handled by tx_complete */
    }

    /* If we flush a full burst of packets, assume there are
     * more coming and immediately reschedule */
    if (ret >= n->tx_burst) {
1021 1022
        qemu_bh_schedule(q->tx_bh);
        q->tx_waiting = 1;
1023 1024 1025 1026 1027 1028
        return;
    }

    /* If less than a full burst, re-enable notification and flush
     * anything that may have come in while we weren't looking.  If
     * we find something, assume the guest is still active and reschedule */
1029 1030 1031 1032 1033
    virtio_queue_set_notification(q->tx_vq, 1);
    if (virtio_net_flush_tx(q) > 0) {
        virtio_queue_set_notification(q->tx_vq, 0);
        qemu_bh_schedule(q->tx_bh);
        q->tx_waiting = 1;
1034 1035 1036
    }
}

J
Jason Wang 已提交
1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072
static void virtio_net_set_multiqueue(VirtIONet *n, int multiqueue, int ctrl)
{
    VirtIODevice *vdev = &n->vdev;
    int i, max = multiqueue ? n->max_queues : 1;

    n->multiqueue = multiqueue;

    for (i = 2; i <= n->max_queues * 2 + 1; i++) {
        virtio_del_queue(vdev, i);
    }

    for (i = 1; i < max; i++) {
        n->vqs[i].rx_vq = virtio_add_queue(vdev, 256, virtio_net_handle_rx);
        if (n->vqs[i].tx_timer) {
            n->vqs[i].tx_vq =
                virtio_add_queue(vdev, 256, virtio_net_handle_tx_timer);
            n->vqs[i].tx_timer = qemu_new_timer_ns(vm_clock,
                                                   virtio_net_tx_timer,
                                                   &n->vqs[i]);
        } else {
            n->vqs[i].tx_vq =
                virtio_add_queue(vdev, 256, virtio_net_handle_tx_bh);
            n->vqs[i].tx_bh = qemu_bh_new(virtio_net_tx_bh, &n->vqs[i]);
        }

        n->vqs[i].tx_waiting = 0;
        n->vqs[i].n = n;
    }

    if (ctrl) {
        n->ctrl_vq = virtio_add_queue(vdev, 64, virtio_net_handle_ctrl);
    }

    virtio_net_set_queues(n);
}

A
aliguori 已提交
1073 1074
static void virtio_net_save(QEMUFile *f, void *opaque)
{
1075
    int i;
A
aliguori 已提交
1076 1077
    VirtIONet *n = opaque;

1078 1079 1080
    /* At this point, backend must be stopped, otherwise
     * it might keep writing to memory. */
    assert(!n->vhost_started);
A
aliguori 已提交
1081 1082
    virtio_save(&n->vdev, f);

1083
    qemu_put_buffer(f, n->mac, ETH_ALEN);
1084
    qemu_put_be32(f, n->vqs[0].tx_waiting);
1085
    qemu_put_be32(f, n->mergeable_rx_bufs);
1086
    qemu_put_be16(f, n->status);
1087 1088
    qemu_put_byte(f, n->promisc);
    qemu_put_byte(f, n->allmulti);
1089 1090
    qemu_put_be32(f, n->mac_table.in_use);
    qemu_put_buffer(f, n->mac_table.macs, n->mac_table.in_use * ETH_ALEN);
1091
    qemu_put_buffer(f, (uint8_t *)n->vlans, MAX_VLAN >> 3);
M
Mark McLoughlin 已提交
1092
    qemu_put_be32(f, n->has_vnet_hdr);
1093 1094
    qemu_put_byte(f, n->mac_table.multi_overflow);
    qemu_put_byte(f, n->mac_table.uni_overflow);
1095 1096 1097 1098
    qemu_put_byte(f, n->alluni);
    qemu_put_byte(f, n->nomulti);
    qemu_put_byte(f, n->nouni);
    qemu_put_byte(f, n->nobcast);
1099
    qemu_put_byte(f, n->has_ufo);
1100 1101 1102 1103 1104 1105 1106
    if (n->max_queues > 1) {
        qemu_put_be16(f, n->max_queues);
        qemu_put_be16(f, n->curr_queues);
        for (i = 1; i < n->curr_queues; i++) {
            qemu_put_be32(f, n->vqs[i].tx_waiting);
        }
    }
A
aliguori 已提交
1107 1108 1109 1110 1111
}

static int virtio_net_load(QEMUFile *f, void *opaque, int version_id)
{
    VirtIONet *n = opaque;
1112
    int ret, i, link_down;
A
aliguori 已提交
1113

1114
    if (version_id < 2 || version_id > VIRTIO_NET_VM_VERSION)
A
aliguori 已提交
1115 1116
        return -EINVAL;

1117 1118 1119 1120
    ret = virtio_load(&n->vdev, f);
    if (ret) {
        return ret;
    }
A
aliguori 已提交
1121

1122
    qemu_get_buffer(f, n->mac, ETH_ALEN);
1123
    n->vqs[0].tx_waiting = qemu_get_be32(f);
1124 1125

    virtio_net_set_mrg_rx_bufs(n, qemu_get_be32(f));
A
aliguori 已提交
1126

1127 1128 1129
    if (version_id >= 3)
        n->status = qemu_get_be16(f);

1130
    if (version_id >= 4) {
1131 1132 1133 1134 1135 1136 1137
        if (version_id < 8) {
            n->promisc = qemu_get_be32(f);
            n->allmulti = qemu_get_be32(f);
        } else {
            n->promisc = qemu_get_byte(f);
            n->allmulti = qemu_get_byte(f);
        }
1138 1139
    }

1140 1141 1142 1143 1144 1145 1146
    if (version_id >= 5) {
        n->mac_table.in_use = qemu_get_be32(f);
        /* MAC_TABLE_ENTRIES may be different from the saved image */
        if (n->mac_table.in_use <= MAC_TABLE_ENTRIES) {
            qemu_get_buffer(f, n->mac_table.macs,
                            n->mac_table.in_use * ETH_ALEN);
        } else if (n->mac_table.in_use) {
1147 1148 1149
            uint8_t *buf = g_malloc0(n->mac_table.in_use);
            qemu_get_buffer(f, buf, n->mac_table.in_use * ETH_ALEN);
            g_free(buf);
1150
            n->mac_table.multi_overflow = n->mac_table.uni_overflow = 1;
1151 1152 1153 1154
            n->mac_table.in_use = 0;
        }
    }
 
1155 1156 1157
    if (version_id >= 6)
        qemu_get_buffer(f, (uint8_t *)n->vlans, MAX_VLAN >> 3);

M
Mark McLoughlin 已提交
1158 1159
    if (version_id >= 7) {
        if (qemu_get_be32(f) && !peer_has_vnet_hdr(n)) {
1160
            error_report("virtio-net: saved image requires vnet_hdr=on");
M
Mark McLoughlin 已提交
1161 1162 1163 1164
            return -1;
        }

        if (n->has_vnet_hdr) {
J
Jason Wang 已提交
1165
            tap_set_offload(qemu_get_queue(n->nic)->peer,
1166 1167 1168 1169 1170
                    (n->vdev.guest_features >> VIRTIO_NET_F_GUEST_CSUM) & 1,
                    (n->vdev.guest_features >> VIRTIO_NET_F_GUEST_TSO4) & 1,
                    (n->vdev.guest_features >> VIRTIO_NET_F_GUEST_TSO6) & 1,
                    (n->vdev.guest_features >> VIRTIO_NET_F_GUEST_ECN)  & 1,
                    (n->vdev.guest_features >> VIRTIO_NET_F_GUEST_UFO)  & 1);
M
Mark McLoughlin 已提交
1171
        }
1172 1173
    }

1174 1175 1176 1177 1178
    if (version_id >= 9) {
        n->mac_table.multi_overflow = qemu_get_byte(f);
        n->mac_table.uni_overflow = qemu_get_byte(f);
    }

1179 1180 1181 1182 1183 1184 1185
    if (version_id >= 10) {
        n->alluni = qemu_get_byte(f);
        n->nomulti = qemu_get_byte(f);
        n->nouni = qemu_get_byte(f);
        n->nobcast = qemu_get_byte(f);
    }

1186 1187
    if (version_id >= 11) {
        if (qemu_get_byte(f) && !peer_has_ufo(n)) {
1188
            error_report("virtio-net: saved image requires TUN_F_UFO support");
1189 1190 1191 1192
            return -1;
        }
    }

1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206
    if (n->max_queues > 1) {
        if (n->max_queues != qemu_get_be16(f)) {
            error_report("virtio-net: different max_queues ");
            return -1;
        }

        n->curr_queues = qemu_get_be16(f);
        for (i = 1; i < n->curr_queues; i++) {
            n->vqs[i].tx_waiting = qemu_get_be32(f);
        }
    }

    virtio_net_set_queues(n);

1207 1208 1209 1210 1211 1212 1213
    /* Find the first multicast entry in the saved MAC filter */
    for (i = 0; i < n->mac_table.in_use; i++) {
        if (n->mac_table.macs[i * ETH_ALEN] & 1) {
            break;
        }
    }
    n->mac_table.first_multi = i;
1214 1215 1216

    /* nc.link_down can't be migrated, so infer link_down according
     * to link status bit in n->status */
1217 1218 1219 1220
    link_down = (n->status & VIRTIO_NET_S_LINK_UP) == 0;
    for (i = 0; i < n->max_queues; i++) {
        qemu_get_subqueue(n->nic, i)->link_down = link_down;
    }
1221

A
aliguori 已提交
1222 1223 1224
    return 0;
}

1225
static void virtio_net_cleanup(NetClientState *nc)
1226
{
J
Jason Wang 已提交
1227
    VirtIONet *n = qemu_get_nic_opaque(nc);
1228

M
Mark McLoughlin 已提交
1229
    n->nic = NULL;
1230 1231
}

M
Mark McLoughlin 已提交
1232
static NetClientInfo net_virtio_info = {
1233
    .type = NET_CLIENT_OPTIONS_KIND_NIC,
M
Mark McLoughlin 已提交
1234 1235 1236 1237 1238 1239 1240
    .size = sizeof(NICState),
    .can_receive = virtio_net_can_receive,
    .receive = virtio_net_receive,
        .cleanup = virtio_net_cleanup,
    .link_status_changed = virtio_net_set_link_status,
};

1241 1242 1243
static bool virtio_net_guest_notifier_pending(VirtIODevice *vdev, int idx)
{
    VirtIONet *n = to_virtio_net(vdev);
J
Jason Wang 已提交
1244
    NetClientState *nc = qemu_get_subqueue(n->nic, vq2q(idx));
1245
    assert(n->vhost_started);
J
Jason Wang 已提交
1246
    return vhost_net_virtqueue_pending(tap_get_vhost_net(nc->peer), idx);
1247 1248 1249 1250 1251 1252
}

static void virtio_net_guest_notifier_mask(VirtIODevice *vdev, int idx,
                                           bool mask)
{
    VirtIONet *n = to_virtio_net(vdev);
J
Jason Wang 已提交
1253
    NetClientState *nc = qemu_get_subqueue(n->nic, vq2q(idx));
1254
    assert(n->vhost_started);
J
Jason Wang 已提交
1255
    vhost_net_virtqueue_mask(tap_get_vhost_net(nc->peer),
1256 1257 1258
                             vdev, idx, mask);
}

1259
void virtio_net_set_config_size(VirtIONet *n, uint32_t host_features)
A
aliguori 已提交
1260
{
1261 1262 1263 1264 1265 1266
    int i, config_size = 0;
    for (i = 0; feature_sizes[i].flags != 0; i++) {
        if (host_features & feature_sizes[i].flags) {
            config_size = MAX(feature_sizes[i].end, config_size);
        }
    }
1267 1268 1269 1270 1271 1272 1273 1274 1275 1276
    n->config_size = config_size;
}

static VirtIODevice *virtio_net_common_init(DeviceState *dev, NICConf *conf,
                                            virtio_net_conf *net,
                                            uint32_t host_features,
                                            VirtIONet **pn)
{
    VirtIONet *n = *pn;
    int i, config_size = 0;
A
aliguori 已提交
1277

1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296
    /*
     * We have two cases here: the old virtio-net-pci device, and the
     * refactored virtio-net.
     */
    if (n == NULL) {
        /* virtio-net-pci */
        for (i = 0; feature_sizes[i].flags != 0; i++) {
            if (host_features & feature_sizes[i].flags) {
                config_size = MAX(feature_sizes[i].end, config_size);
            }
        }
        n = (VirtIONet *)virtio_common_init("virtio-net", VIRTIO_ID_NET,
                                            config_size, sizeof(VirtIONet));
        n->config_size = config_size;
    } else {
        /* virtio-net */
        virtio_init(VIRTIO_DEVICE(n), "virtio-net", VIRTIO_ID_NET,
                                                    n->config_size);
    }
A
aliguori 已提交
1297

1298 1299
    n->vdev.get_config = virtio_net_get_config;
    n->vdev.set_config = virtio_net_set_config;
A
aliguori 已提交
1300 1301
    n->vdev.get_features = virtio_net_get_features;
    n->vdev.set_features = virtio_net_set_features;
1302
    n->vdev.bad_features = virtio_net_bad_features;
1303
    n->vdev.reset = virtio_net_reset;
1304
    n->vdev.set_status = virtio_net_set_status;
1305 1306
    n->vdev.guest_notifier_mask = virtio_net_guest_notifier_mask;
    n->vdev.guest_notifier_pending = virtio_net_guest_notifier_pending;
1307 1308
    n->max_queues = MAX(conf->queues, 1);
    n->vqs = g_malloc0(sizeof(VirtIONetQueue) * n->max_queues);
J
Jason Wang 已提交
1309 1310 1311 1312
    n->vqs[0].rx_vq = virtio_add_queue(&n->vdev, 256, virtio_net_handle_rx);
    n->curr_queues = 1;
    n->vqs[0].n = n;
    n->tx_timeout = net->txtimer;
1313 1314

    if (net->tx && strcmp(net->tx, "timer") && strcmp(net->tx, "bh")) {
1315 1316 1317 1318
        error_report("virtio-net: "
                     "Unknown option tx=%s, valid options: \"timer\" \"bh\"",
                     net->tx);
        error_report("Defaulting to \"bh\"");
1319 1320 1321
    }

    if (net->tx && !strcmp(net->tx, "timer")) {
J
Jason Wang 已提交
1322 1323 1324 1325
        n->vqs[0].tx_vq = virtio_add_queue(&n->vdev, 256,
                                           virtio_net_handle_tx_timer);
        n->vqs[0].tx_timer = qemu_new_timer_ns(vm_clock, virtio_net_tx_timer,
                                               &n->vqs[0]);
1326
    } else {
J
Jason Wang 已提交
1327 1328 1329
        n->vqs[0].tx_vq = virtio_add_queue(&n->vdev, 256,
                                           virtio_net_handle_tx_bh);
        n->vqs[0].tx_bh = qemu_bh_new(virtio_net_tx_bh, &n->vqs[0]);
1330
    }
1331
    n->ctrl_vq = virtio_add_queue(&n->vdev, 64, virtio_net_handle_ctrl);
1332
    qemu_macaddr_default_if_unset(&conf->macaddr);
1333
    memcpy(&n->mac[0], &conf->macaddr, sizeof(n->mac));
1334
    n->status = VIRTIO_NET_S_LINK_UP;
A
aliguori 已提交
1335

1336
    n->nic = qemu_new_nic(&net_virtio_info, conf, object_get_typename(OBJECT(dev)), dev->id, n);
1337 1338
    peer_test_vnet_hdr(n);
    if (peer_has_vnet_hdr(n)) {
J
Jason Wang 已提交
1339 1340 1341
        for (i = 0; i < n->max_queues; i++) {
            tap_using_vnet_hdr(qemu_get_subqueue(n->nic, i)->peer, true);
        }
1342 1343 1344 1345
        n->host_hdr_len = sizeof(struct virtio_net_hdr);
    } else {
        n->host_hdr_len = 0;
    }
M
Mark McLoughlin 已提交
1346

J
Jason Wang 已提交
1347
    qemu_format_nic_info_str(qemu_get_queue(n->nic), conf->macaddr.a);
1348

J
Jason Wang 已提交
1349
    n->vqs[0].tx_waiting = 0;
1350
    n->tx_burst = net->txburst;
1351
    virtio_net_set_mrg_rx_bufs(n, 0);
1352
    n->promisc = 1; /* for compatibility */
A
aliguori 已提交
1353

1354
    n->mac_table.macs = g_malloc0(MAC_TABLE_ENTRIES * ETH_ALEN);
1355

1356
    n->vlans = g_malloc0(MAX_VLAN >> 3);
1357

1358 1359
    n->qdev = dev;
    register_savevm(dev, "virtio-net", -1, VIRTIO_NET_VM_VERSION,
A
aliguori 已提交
1360
                    virtio_net_save, virtio_net_load, n);
P
Paul Brook 已提交
1361

1362 1363
    add_boot_device_path(conf->bootindex, dev, "/ethernet-phy@0");

P
Paul Brook 已提交
1364
    return &n->vdev;
P
Paul Brook 已提交
1365
}
1366

1367 1368 1369 1370 1371 1372 1373
VirtIODevice *virtio_net_init(DeviceState *dev, NICConf *conf,
                              virtio_net_conf *net, uint32_t host_features)
{
    VirtIONet *n = NULL;
    return virtio_net_common_init(dev, conf, net, host_features, &n);
}

1374 1375 1376
void virtio_net_exit(VirtIODevice *vdev)
{
    VirtIONet *n = DO_UPCAST(VirtIONet, vdev, vdev);
J
Jason Wang 已提交
1377
    int i;
1378

1379 1380
    /* This will stop vhost backend if appropriate. */
    virtio_net_set_status(vdev, 0);
1381

1382
    unregister_savevm(n->qdev, "virtio-net", n);
1383

1384 1385
    g_free(n->mac_table.macs);
    g_free(n->vlans);
1386

J
Jason Wang 已提交
1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398
    for (i = 0; i < n->max_queues; i++) {
        VirtIONetQueue *q = &n->vqs[i];
        NetClientState *nc = qemu_get_subqueue(n->nic, i);

        qemu_purge_queued_packets(nc);

        if (q->tx_timer) {
            qemu_del_timer(q->tx_timer);
            qemu_free_timer(q->tx_timer);
        } else {
            qemu_bh_delete(q->tx_bh);
        }
1399
    }
1400

1401
    g_free(n->vqs);
J
Jason Wang 已提交
1402
    qemu_del_nic(n->nic);
1403
    virtio_cleanup(&n->vdev);
1404
}
1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508

static int virtio_net_device_init(VirtIODevice *vdev)
{
    DeviceState *qdev = DEVICE(vdev);
    VirtIONet *n = VIRTIO_NET(vdev);

    /*
     * Initially, the new VirtIONet device will have a config size =
     * sizeof(struct config), because we can't get host_features here.
     */
    if (virtio_net_common_init(qdev, &(n->nic_conf),
                               &(n->net_conf), 0, &n) == NULL) {
        return -1;
    }
    return 0;
}

static int virtio_net_device_exit(DeviceState *qdev)
{
    VirtIONet *n = VIRTIO_NET(qdev);
    VirtIODevice *vdev = VIRTIO_DEVICE(qdev);
    int i;

    /* This will stop vhost backend if appropriate. */
    virtio_net_set_status(vdev, 0);

    unregister_savevm(qdev, "virtio-net", n);

    g_free(n->mac_table.macs);
    g_free(n->vlans);

    for (i = 0; i < n->max_queues; i++) {
        VirtIONetQueue *q = &n->vqs[i];
        NetClientState *nc = qemu_get_subqueue(n->nic, i);

        qemu_purge_queued_packets(nc);

        if (q->tx_timer) {
            qemu_del_timer(q->tx_timer);
            qemu_free_timer(q->tx_timer);
        } else {
            qemu_bh_delete(q->tx_bh);
        }
    }

    g_free(n->vqs);
    qemu_del_nic(n->nic);
    virtio_common_cleanup(&n->vdev);

    return 0;
}

static void virtio_net_instance_init(Object *obj)
{
    VirtIONet *n = VIRTIO_NET(obj);

    /*
     * The default config_size is sizeof(struct virtio_net_config).
     * Can be overriden with virtio_net_set_config_size.
     */
    n->config_size = sizeof(struct virtio_net_config);
}

static Property virtio_net_properties[] = {
    DEFINE_NIC_PROPERTIES(VirtIONet, nic_conf),
    DEFINE_PROP_UINT32("x-txtimer", VirtIONet, net_conf.txtimer,
                                               TX_TIMER_INTERVAL),
    DEFINE_PROP_INT32("x-txburst", VirtIONet, net_conf.txburst, TX_BURST),
    DEFINE_PROP_STRING("tx", VirtIONet, net_conf.tx),
    DEFINE_PROP_END_OF_LIST(),
};

static void virtio_net_class_init(ObjectClass *klass, void *data)
{
    DeviceClass *dc = DEVICE_CLASS(klass);
    VirtioDeviceClass *vdc = VIRTIO_DEVICE_CLASS(klass);
    dc->exit = virtio_net_device_exit;
    dc->props = virtio_net_properties;
    vdc->init = virtio_net_device_init;
    vdc->get_config = virtio_net_get_config;
    vdc->set_config = virtio_net_set_config;
    vdc->get_features = virtio_net_get_features;
    vdc->set_features = virtio_net_set_features;
    vdc->bad_features = virtio_net_bad_features;
    vdc->reset = virtio_net_reset;
    vdc->set_status = virtio_net_set_status;
    vdc->guest_notifier_mask = virtio_net_guest_notifier_mask;
    vdc->guest_notifier_pending = virtio_net_guest_notifier_pending;
}

static const TypeInfo virtio_net_info = {
    .name = TYPE_VIRTIO_NET,
    .parent = TYPE_VIRTIO_DEVICE,
    .instance_size = sizeof(VirtIONet),
    .instance_init = virtio_net_instance_init,
    .class_init = virtio_net_class_init,
};

static void virtio_register_types(void)
{
    type_register_static(&virtio_net_info);
}

type_init(virtio_register_types)