virtio-net.c 60.3 KB
Newer Older
A
aliguori 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13
/*
 * Virtio Network Device
 *
 * Copyright IBM, Corp. 2007
 *
 * Authors:
 *  Anthony Liguori   <aliguori@us.ibm.com>
 *
 * This work is licensed under the terms of the GNU GPL, version 2.  See
 * the COPYING file in the top-level directory.
 *
 */

P
Peter Maydell 已提交
14
#include "qemu/osdep.h"
15
#include "qemu/iov.h"
P
Paolo Bonzini 已提交
16
#include "hw/virtio/virtio.h"
P
Paolo Bonzini 已提交
17
#include "net/net.h"
18
#include "net/checksum.h"
19
#include "net/tap.h"
20 21
#include "qemu/error-report.h"
#include "qemu/timer.h"
P
Paolo Bonzini 已提交
22 23
#include "hw/virtio/virtio-net.h"
#include "net/vhost_net.h"
24
#include "hw/virtio/virtio-bus.h"
25
#include "qapi/qmp/qjson.h"
26
#include "qapi-event.h"
27
#include "hw/virtio/virtio-access.h"
A
aliguori 已提交
28

29
#define VIRTIO_NET_VM_VERSION    11
30

31
#define MAC_TABLE_ENTRIES    64
32
#define MAX_VLAN    (1 << 12)   /* Per 802.1Q definition */
33

34 35 36 37 38
/* previously fixed value */
#define VIRTIO_NET_RX_QUEUE_DEFAULT_SIZE 256
/* for now, only allow larger queues; with virtio-1, guest can downsize */
#define VIRTIO_NET_RX_QUEUE_MIN_SIZE VIRTIO_NET_RX_QUEUE_DEFAULT_SIZE

39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60
/*
 * Calculate the number of bytes up to and including the given 'field' of
 * 'container'.
 */
#define endof(container, field) \
    (offsetof(container, field) + sizeof(((container *)0)->field))

typedef struct VirtIOFeature {
    uint32_t flags;
    size_t end;
} VirtIOFeature;

static VirtIOFeature feature_sizes[] = {
    {.flags = 1 << VIRTIO_NET_F_MAC,
     .end = endof(struct virtio_net_config, mac)},
    {.flags = 1 << VIRTIO_NET_F_STATUS,
     .end = endof(struct virtio_net_config, status)},
    {.flags = 1 << VIRTIO_NET_F_MQ,
     .end = endof(struct virtio_net_config, max_virtqueue_pairs)},
    {}
};

J
Jason Wang 已提交
61
static VirtIONetQueue *virtio_net_get_subqueue(NetClientState *nc)
62 63 64
{
    VirtIONet *n = qemu_get_nic_opaque(nc);

J
Jason Wang 已提交
65
    return &n->vqs[nc->queue_index];
66
}
J
Jason Wang 已提交
67 68 69 70 71 72

static int vq2q(int queue_index)
{
    return queue_index / 2;
}

A
aliguori 已提交
73 74 75 76
/* TODO
 * - we could suppress RX interrupt if we were so inclined.
 */

77
static void virtio_net_get_config(VirtIODevice *vdev, uint8_t *config)
A
aliguori 已提交
78
{
79
    VirtIONet *n = VIRTIO_NET(vdev);
A
aliguori 已提交
80 81
    struct virtio_net_config netcfg;

82 83
    virtio_stw_p(vdev, &netcfg.status, n->status);
    virtio_stw_p(vdev, &netcfg.max_virtqueue_pairs, n->max_queues);
84
    memcpy(netcfg.mac, n->mac, ETH_ALEN);
85
    memcpy(config, &netcfg, n->config_size);
A
aliguori 已提交
86 87
}

88 89
static void virtio_net_set_config(VirtIODevice *vdev, const uint8_t *config)
{
90
    VirtIONet *n = VIRTIO_NET(vdev);
91
    struct virtio_net_config netcfg = {};
92

93
    memcpy(&netcfg, config, n->config_size);
94

95 96
    if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_MAC_ADDR) &&
        !virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1) &&
97
        memcmp(netcfg.mac, n->mac, ETH_ALEN)) {
98
        memcpy(n->mac, netcfg.mac, ETH_ALEN);
J
Jason Wang 已提交
99
        qemu_format_nic_info_str(qemu_get_queue(n->nic), n->mac);
100 101 102
    }
}

103 104
static bool virtio_net_started(VirtIONet *n, uint8_t status)
{
105
    VirtIODevice *vdev = VIRTIO_DEVICE(n);
106
    return (status & VIRTIO_CONFIG_S_DRIVER_OK) &&
107
        (n->status & VIRTIO_NET_S_LINK_UP) && vdev->vm_running;
108 109
}

J
Jason Wang 已提交
110 111 112 113 114 115 116 117 118 119
static void virtio_net_announce_timer(void *opaque)
{
    VirtIONet *n = opaque;
    VirtIODevice *vdev = VIRTIO_DEVICE(n);

    n->announce_counter--;
    n->status |= VIRTIO_NET_S_ANNOUNCE;
    virtio_notify_config(vdev);
}

120
static void virtio_net_vhost_status(VirtIONet *n, uint8_t status)
121
{
122
    VirtIODevice *vdev = VIRTIO_DEVICE(n);
J
Jason Wang 已提交
123
    NetClientState *nc = qemu_get_queue(n->nic);
J
Jason Wang 已提交
124
    int queues = n->multiqueue ? n->max_queues : 1;
J
Jason Wang 已提交
125

126
    if (!get_vhost_net(nc->peer)) {
127 128
        return;
    }
J
Jason Wang 已提交
129

130 131
    if ((virtio_net_started(n, status) && !nc->peer->link_down) ==
        !!n->vhost_started) {
132 133 134
        return;
    }
    if (!n->vhost_started) {
135 136
        int r, i;

137 138 139 140 141 142 143
        if (n->needs_vnet_hdr_swap) {
            error_report("backend does not support %s vnet headers; "
                         "falling back on userspace virtio",
                         virtio_is_big_endian(vdev) ? "BE" : "LE");
            return;
        }

144 145 146 147 148 149 150 151 152 153 154
        /* Any packets outstanding? Purge them to avoid touching rings
         * when vhost is running.
         */
        for (i = 0;  i < queues; i++) {
            NetClientState *qnc = qemu_get_subqueue(n->nic, i);

            /* Purge both directions: TX and RX. */
            qemu_net_queue_purge(qnc->peer->incoming_queue, qnc);
            qemu_net_queue_purge(qnc->incoming_queue, qnc->peer);
        }

155
        n->vhost_started = 1;
156
        r = vhost_net_start(vdev, n->nic->ncs, queues);
157
        if (r < 0) {
158 159
            error_report("unable to start vhost net: %d: "
                         "falling back on userspace virtio", -r);
160
            n->vhost_started = 0;
161 162
        }
    } else {
163
        vhost_net_stop(vdev, n->nic->ncs, queues);
164 165 166 167
        n->vhost_started = 0;
    }
}

168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220
static int virtio_net_set_vnet_endian_one(VirtIODevice *vdev,
                                          NetClientState *peer,
                                          bool enable)
{
    if (virtio_is_big_endian(vdev)) {
        return qemu_set_vnet_be(peer, enable);
    } else {
        return qemu_set_vnet_le(peer, enable);
    }
}

static bool virtio_net_set_vnet_endian(VirtIODevice *vdev, NetClientState *ncs,
                                       int queues, bool enable)
{
    int i;

    for (i = 0; i < queues; i++) {
        if (virtio_net_set_vnet_endian_one(vdev, ncs[i].peer, enable) < 0 &&
            enable) {
            while (--i >= 0) {
                virtio_net_set_vnet_endian_one(vdev, ncs[i].peer, false);
            }

            return true;
        }
    }

    return false;
}

static void virtio_net_vnet_endian_status(VirtIONet *n, uint8_t status)
{
    VirtIODevice *vdev = VIRTIO_DEVICE(n);
    int queues = n->multiqueue ? n->max_queues : 1;

    if (virtio_net_started(n, status)) {
        /* Before using the device, we tell the network backend about the
         * endianness to use when parsing vnet headers. If the backend
         * can't do it, we fallback onto fixing the headers in the core
         * virtio-net code.
         */
        n->needs_vnet_hdr_swap = virtio_net_set_vnet_endian(vdev, n->nic->ncs,
                                                            queues, true);
    } else if (virtio_net_started(n, vdev->status)) {
        /* After using the device, we need to reset the network backend to
         * the default (guest native endianness), otherwise the guest may
         * lose network connectivity if it is rebooted into a different
         * endianness.
         */
        virtio_net_set_vnet_endian(vdev, n->nic->ncs, queues, false);
    }
}

221 222 223 224 225 226 227 228
static void virtio_net_drop_tx_queue_data(VirtIODevice *vdev, VirtQueue *vq)
{
    unsigned int dropped = virtqueue_drop_all(vq);
    if (dropped) {
        virtio_notify(vdev, vq);
    }
}

229 230
static void virtio_net_set_status(struct VirtIODevice *vdev, uint8_t status)
{
231
    VirtIONet *n = VIRTIO_NET(vdev);
J
Jason Wang 已提交
232 233 234
    VirtIONetQueue *q;
    int i;
    uint8_t queue_status;
235

236
    virtio_net_vnet_endian_status(n, status);
237 238
    virtio_net_vhost_status(n, status);

J
Jason Wang 已提交
239
    for (i = 0; i < n->max_queues; i++) {
240 241
        NetClientState *ncs = qemu_get_subqueue(n->nic, i);
        bool queue_started;
J
Jason Wang 已提交
242
        q = &n->vqs[i];
243

J
Jason Wang 已提交
244 245
        if ((!n->multiqueue && i != 0) || i >= n->curr_queues) {
            queue_status = 0;
246
        } else {
J
Jason Wang 已提交
247
            queue_status = status;
248
        }
249 250 251 252 253 254
        queue_started =
            virtio_net_started(n, queue_status) && !n->vhost_started;

        if (queue_started) {
            qemu_flush_queued_packets(ncs);
        }
J
Jason Wang 已提交
255 256 257 258 259

        if (!q->tx_waiting) {
            continue;
        }

260
        if (queue_started) {
J
Jason Wang 已提交
261
            if (q->tx_timer) {
262 263
                timer_mod(q->tx_timer,
                               qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + n->tx_timeout);
J
Jason Wang 已提交
264 265 266
            } else {
                qemu_bh_schedule(q->tx_bh);
            }
267
        } else {
J
Jason Wang 已提交
268
            if (q->tx_timer) {
269
                timer_del(q->tx_timer);
J
Jason Wang 已提交
270 271 272
            } else {
                qemu_bh_cancel(q->tx_bh);
            }
273 274 275 276 277 278 279 280
            if ((n->status & VIRTIO_NET_S_LINK_UP) == 0 &&
                (queue_status & VIRTIO_CONFIG_S_DRIVER_OK)) {
                /* if tx is waiting we are likely have some packets in tx queue
                 * and disabled notification */
                q->tx_waiting = 0;
                virtio_queue_set_notification(q->tx_vq, 1);
                virtio_net_drop_tx_queue_data(vdev, q->tx_vq);
            }
281 282 283 284
        }
    }
}

285
static void virtio_net_set_link_status(NetClientState *nc)
286
{
J
Jason Wang 已提交
287
    VirtIONet *n = qemu_get_nic_opaque(nc);
288
    VirtIODevice *vdev = VIRTIO_DEVICE(n);
289 290
    uint16_t old_status = n->status;

M
Mark McLoughlin 已提交
291
    if (nc->link_down)
292 293 294 295 296
        n->status &= ~VIRTIO_NET_S_LINK_UP;
    else
        n->status |= VIRTIO_NET_S_LINK_UP;

    if (n->status != old_status)
297
        virtio_notify_config(vdev);
298

299
    virtio_net_set_status(vdev, vdev->status);
300 301
}

302 303 304 305 306
static void rxfilter_notify(NetClientState *nc)
{
    VirtIONet *n = qemu_get_nic_opaque(nc);

    if (nc->rxfilter_notify_enabled) {
307
        gchar *path = object_get_canonical_path(OBJECT(n->qdev));
308 309
        qapi_event_send_nic_rx_filter_changed(!!n->netclient_name,
                                              n->netclient_name, path, &error_abort);
310
        g_free(path);
311 312 313 314 315 316

        /* disable event notification to avoid events flooding */
        nc->rxfilter_notify_enabled = 0;
    }
}

317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336
static intList *get_vlan_table(VirtIONet *n)
{
    intList *list, *entry;
    int i, j;

    list = NULL;
    for (i = 0; i < MAX_VLAN >> 5; i++) {
        for (j = 0; n->vlans[i] && j <= 0x1f; j++) {
            if (n->vlans[i] & (1U << j)) {
                entry = g_malloc0(sizeof(*entry));
                entry->value = (i << 5) + j;
                entry->next = list;
                list = entry;
            }
        }
    }

    return list;
}

337 338 339
static RxFilterInfo *virtio_net_query_rxfilter(NetClientState *nc)
{
    VirtIONet *n = qemu_get_nic_opaque(nc);
340
    VirtIODevice *vdev = VIRTIO_DEVICE(n);
341 342
    RxFilterInfo *info;
    strList *str_list, *entry;
343
    int i;
344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368

    info = g_malloc0(sizeof(*info));
    info->name = g_strdup(nc->name);
    info->promiscuous = n->promisc;

    if (n->nouni) {
        info->unicast = RX_STATE_NONE;
    } else if (n->alluni) {
        info->unicast = RX_STATE_ALL;
    } else {
        info->unicast = RX_STATE_NORMAL;
    }

    if (n->nomulti) {
        info->multicast = RX_STATE_NONE;
    } else if (n->allmulti) {
        info->multicast = RX_STATE_ALL;
    } else {
        info->multicast = RX_STATE_NORMAL;
    }

    info->broadcast_allowed = n->nobcast;
    info->multicast_overflow = n->mac_table.multi_overflow;
    info->unicast_overflow = n->mac_table.uni_overflow;

369
    info->main_mac = qemu_mac_strdup_printf(n->mac);
370 371 372 373

    str_list = NULL;
    for (i = 0; i < n->mac_table.first_multi; i++) {
        entry = g_malloc0(sizeof(*entry));
374
        entry->value = qemu_mac_strdup_printf(n->mac_table.macs + i * ETH_ALEN);
375 376 377 378 379 380 381 382
        entry->next = str_list;
        str_list = entry;
    }
    info->unicast_table = str_list;

    str_list = NULL;
    for (i = n->mac_table.first_multi; i < n->mac_table.in_use; i++) {
        entry = g_malloc0(sizeof(*entry));
383
        entry->value = qemu_mac_strdup_printf(n->mac_table.macs + i * ETH_ALEN);
384 385 386 387
        entry->next = str_list;
        str_list = entry;
    }
    info->multicast_table = str_list;
388
    info->vlan_table = get_vlan_table(n);
389

390
    if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_VLAN)) {
391 392 393 394 395
        info->vlan = RX_STATE_ALL;
    } else if (!info->vlan_table) {
        info->vlan = RX_STATE_NONE;
    } else {
        info->vlan = RX_STATE_NORMAL;
396 397 398 399 400 401 402 403
    }

    /* enable event notification after query */
    nc->rxfilter_notify_enabled = 1;

    return info;
}

404 405
static void virtio_net_reset(VirtIODevice *vdev)
{
406
    VirtIONet *n = VIRTIO_NET(vdev);
407 408 409 410

    /* Reset back to compatibility mode */
    n->promisc = 1;
    n->allmulti = 0;
411 412 413 414
    n->alluni = 0;
    n->nomulti = 0;
    n->nouni = 0;
    n->nobcast = 0;
J
Jason Wang 已提交
415 416
    /* multiqueue is disabled by default */
    n->curr_queues = 1;
J
Jason Wang 已提交
417 418 419
    timer_del(n->announce_timer);
    n->announce_counter = 0;
    n->status &= ~VIRTIO_NET_S_ANNOUNCE;
420

421
    /* Flush any MAC and VLAN filter table state */
422
    n->mac_table.in_use = 0;
423
    n->mac_table.first_multi = 0;
424 425
    n->mac_table.multi_overflow = 0;
    n->mac_table.uni_overflow = 0;
426
    memset(n->mac_table.macs, 0, MAC_TABLE_ENTRIES * ETH_ALEN);
427
    memcpy(&n->mac[0], &n->nic->conf->macaddr, sizeof(n->mac));
428
    qemu_format_nic_info_str(qemu_get_queue(n->nic), n->mac);
429
    memset(n->vlans, 0, MAX_VLAN >> 3);
430 431
}

432
static void peer_test_vnet_hdr(VirtIONet *n)
M
Mark McLoughlin 已提交
433
{
J
Jason Wang 已提交
434 435
    NetClientState *nc = qemu_get_queue(n->nic);
    if (!nc->peer) {
436
        return;
J
Jason Wang 已提交
437
    }
M
Mark McLoughlin 已提交
438

439
    n->has_vnet_hdr = qemu_has_vnet_hdr(nc->peer);
440
}
M
Mark McLoughlin 已提交
441

442 443
static int peer_has_vnet_hdr(VirtIONet *n)
{
M
Mark McLoughlin 已提交
444 445 446
    return n->has_vnet_hdr;
}

447 448 449 450 451
static int peer_has_ufo(VirtIONet *n)
{
    if (!peer_has_vnet_hdr(n))
        return 0;

452
    n->has_ufo = qemu_has_ufo(qemu_get_queue(n->nic)->peer);
453 454 455 456

    return n->has_ufo;
}

457 458
static void virtio_net_set_mrg_rx_bufs(VirtIONet *n, int mergeable_rx_bufs,
                                       int version_1)
459
{
J
Jason Wang 已提交
460 461 462
    int i;
    NetClientState *nc;

463 464
    n->mergeable_rx_bufs = mergeable_rx_bufs;

465 466 467 468 469 470 471
    if (version_1) {
        n->guest_hdr_len = sizeof(struct virtio_net_hdr_mrg_rxbuf);
    } else {
        n->guest_hdr_len = n->mergeable_rx_bufs ?
            sizeof(struct virtio_net_hdr_mrg_rxbuf) :
            sizeof(struct virtio_net_hdr);
    }
472

J
Jason Wang 已提交
473 474 475 476
    for (i = 0; i < n->max_queues; i++) {
        nc = qemu_get_subqueue(n->nic, i);

        if (peer_has_vnet_hdr(n) &&
477 478
            qemu_has_vnet_hdr_len(nc->peer, n->guest_hdr_len)) {
            qemu_set_vnet_hdr_len(nc->peer, n->guest_hdr_len);
J
Jason Wang 已提交
479 480
            n->host_hdr_len = n->guest_hdr_len;
        }
481 482 483
    }
}

J
Jason Wang 已提交
484 485 486 487 488 489 490 491
static int peer_attach(VirtIONet *n, int index)
{
    NetClientState *nc = qemu_get_subqueue(n->nic, index);

    if (!nc->peer) {
        return 0;
    }

492
    if (nc->peer->info->type == NET_CLIENT_DRIVER_VHOST_USER) {
493 494 495
        vhost_set_vring_enable(nc->peer, 1);
    }

496
    if (nc->peer->info->type != NET_CLIENT_DRIVER_TAP) {
J
Jason Wang 已提交
497 498 499 500 501 502 503 504 505 506 507 508 509 510
        return 0;
    }

    return tap_enable(nc->peer);
}

static int peer_detach(VirtIONet *n, int index)
{
    NetClientState *nc = qemu_get_subqueue(n->nic, index);

    if (!nc->peer) {
        return 0;
    }

511
    if (nc->peer->info->type == NET_CLIENT_DRIVER_VHOST_USER) {
512 513 514
        vhost_set_vring_enable(nc->peer, 0);
    }

515
    if (nc->peer->info->type !=  NET_CLIENT_DRIVER_TAP) {
J
Jason Wang 已提交
516 517 518 519 520 521 522 523 524
        return 0;
    }

    return tap_disable(nc->peer);
}

static void virtio_net_set_queues(VirtIONet *n)
{
    int i;
525
    int r;
J
Jason Wang 已提交
526

527 528 529 530
    if (n->nic->peer_deleted) {
        return;
    }

J
Jason Wang 已提交
531 532
    for (i = 0; i < n->max_queues; i++) {
        if (i < n->curr_queues) {
533 534
            r = peer_attach(n, i);
            assert(!r);
J
Jason Wang 已提交
535
        } else {
536 537
            r = peer_detach(n, i);
            assert(!r);
J
Jason Wang 已提交
538 539 540 541
        }
    }
}

J
Jason Wang 已提交
542
static void virtio_net_set_multiqueue(VirtIONet *n, int multiqueue);
J
Jason Wang 已提交
543

J
Jason Wang 已提交
544 545
static uint64_t virtio_net_get_features(VirtIODevice *vdev, uint64_t features,
                                        Error **errp)
A
aliguori 已提交
546
{
547
    VirtIONet *n = VIRTIO_NET(vdev);
J
Jason Wang 已提交
548
    NetClientState *nc = qemu_get_queue(n->nic);
A
aliguori 已提交
549

550 551 552
    /* Firstly sync all virtio-net possible supported features */
    features |= n->host_features;

553
    virtio_add_feature(&features, VIRTIO_NET_F_MAC);
554

555
    if (!peer_has_vnet_hdr(n)) {
556 557 558 559
        virtio_clear_feature(&features, VIRTIO_NET_F_CSUM);
        virtio_clear_feature(&features, VIRTIO_NET_F_HOST_TSO4);
        virtio_clear_feature(&features, VIRTIO_NET_F_HOST_TSO6);
        virtio_clear_feature(&features, VIRTIO_NET_F_HOST_ECN);
560

561 562 563 564
        virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_CSUM);
        virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_TSO4);
        virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_TSO6);
        virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_ECN);
565
    }
M
Mark McLoughlin 已提交
566

567
    if (!peer_has_vnet_hdr(n) || !peer_has_ufo(n)) {
568 569
        virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_UFO);
        virtio_clear_feature(&features, VIRTIO_NET_F_HOST_UFO);
M
Mark McLoughlin 已提交
570 571
    }

572
    if (!get_vhost_net(nc->peer)) {
573 574
        return features;
    }
575
    return vhost_net_get_features(get_vhost_net(nc->peer), features);
A
aliguori 已提交
576 577
}

G
Gerd Hoffmann 已提交
578
static uint64_t virtio_net_bad_features(VirtIODevice *vdev)
579
{
G
Gerd Hoffmann 已提交
580
    uint64_t features = 0;
581 582 583

    /* Linux kernel 2.6.25.  It understood MAC (as everyone must),
     * but also these: */
584 585 586 587 588
    virtio_add_feature(&features, VIRTIO_NET_F_MAC);
    virtio_add_feature(&features, VIRTIO_NET_F_CSUM);
    virtio_add_feature(&features, VIRTIO_NET_F_HOST_TSO4);
    virtio_add_feature(&features, VIRTIO_NET_F_HOST_TSO6);
    virtio_add_feature(&features, VIRTIO_NET_F_HOST_ECN);
589

590
    return features;
591 592
}

593 594
static void virtio_net_apply_guest_offloads(VirtIONet *n)
{
595
    qemu_set_offload(qemu_get_queue(n->nic)->peer,
596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620
            !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_CSUM)),
            !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_TSO4)),
            !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_TSO6)),
            !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_ECN)),
            !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_UFO)));
}

static uint64_t virtio_net_guest_offloads_by_features(uint32_t features)
{
    static const uint64_t guest_offloads_mask =
        (1ULL << VIRTIO_NET_F_GUEST_CSUM) |
        (1ULL << VIRTIO_NET_F_GUEST_TSO4) |
        (1ULL << VIRTIO_NET_F_GUEST_TSO6) |
        (1ULL << VIRTIO_NET_F_GUEST_ECN)  |
        (1ULL << VIRTIO_NET_F_GUEST_UFO);

    return guest_offloads_mask & features;
}

static inline uint64_t virtio_net_supported_guest_offloads(VirtIONet *n)
{
    VirtIODevice *vdev = VIRTIO_DEVICE(n);
    return virtio_net_guest_offloads_by_features(vdev->guest_features);
}

G
Gerd Hoffmann 已提交
621
static void virtio_net_set_features(VirtIODevice *vdev, uint64_t features)
A
aliguori 已提交
622
{
623
    VirtIONet *n = VIRTIO_NET(vdev);
J
Jason Wang 已提交
624 625
    int i;

626
    virtio_net_set_multiqueue(n,
627
                              virtio_has_feature(features, VIRTIO_NET_F_MQ));
A
aliguori 已提交
628

629
    virtio_net_set_mrg_rx_bufs(n,
630 631 632 633
                               virtio_has_feature(features,
                                                  VIRTIO_NET_F_MRG_RXBUF),
                               virtio_has_feature(features,
                                                  VIRTIO_F_VERSION_1));
634 635

    if (n->has_vnet_hdr) {
636 637 638
        n->curr_guest_offloads =
            virtio_net_guest_offloads_by_features(features);
        virtio_net_apply_guest_offloads(n);
639
    }
J
Jason Wang 已提交
640 641 642 643

    for (i = 0;  i < n->max_queues; i++) {
        NetClientState *nc = qemu_get_subqueue(n->nic, i);

644
        if (!get_vhost_net(nc->peer)) {
J
Jason Wang 已提交
645 646
            continue;
        }
647
        vhost_net_ack_features(get_vhost_net(nc->peer), features);
D
David L Stevens 已提交
648
    }
649

650
    if (virtio_has_feature(features, VIRTIO_NET_F_CTRL_VLAN)) {
651 652 653 654
        memset(n->vlans, 0, MAX_VLAN >> 3);
    } else {
        memset(n->vlans, 0xff, MAX_VLAN >> 3);
    }
A
aliguori 已提交
655 656
}

657
static int virtio_net_handle_rx_mode(VirtIONet *n, uint8_t cmd,
658
                                     struct iovec *iov, unsigned int iov_cnt)
659 660
{
    uint8_t on;
661
    size_t s;
662
    NetClientState *nc = qemu_get_queue(n->nic);
663

664 665 666
    s = iov_to_buf(iov, iov_cnt, 0, &on, sizeof(on));
    if (s != sizeof(on)) {
        return VIRTIO_NET_ERR;
667 668
    }

A
Amos Kong 已提交
669
    if (cmd == VIRTIO_NET_CTRL_RX_PROMISC) {
670
        n->promisc = on;
A
Amos Kong 已提交
671
    } else if (cmd == VIRTIO_NET_CTRL_RX_ALLMULTI) {
672
        n->allmulti = on;
A
Amos Kong 已提交
673
    } else if (cmd == VIRTIO_NET_CTRL_RX_ALLUNI) {
674
        n->alluni = on;
A
Amos Kong 已提交
675
    } else if (cmd == VIRTIO_NET_CTRL_RX_NOMULTI) {
676
        n->nomulti = on;
A
Amos Kong 已提交
677
    } else if (cmd == VIRTIO_NET_CTRL_RX_NOUNI) {
678
        n->nouni = on;
A
Amos Kong 已提交
679
    } else if (cmd == VIRTIO_NET_CTRL_RX_NOBCAST) {
680
        n->nobcast = on;
681
    } else {
682
        return VIRTIO_NET_ERR;
683
    }
684

685 686
    rxfilter_notify(nc);

687 688 689
    return VIRTIO_NET_OK;
}

690 691 692 693 694 695 696
static int virtio_net_handle_offloads(VirtIONet *n, uint8_t cmd,
                                     struct iovec *iov, unsigned int iov_cnt)
{
    VirtIODevice *vdev = VIRTIO_DEVICE(n);
    uint64_t offloads;
    size_t s;

697
    if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_GUEST_OFFLOADS)) {
698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726
        return VIRTIO_NET_ERR;
    }

    s = iov_to_buf(iov, iov_cnt, 0, &offloads, sizeof(offloads));
    if (s != sizeof(offloads)) {
        return VIRTIO_NET_ERR;
    }

    if (cmd == VIRTIO_NET_CTRL_GUEST_OFFLOADS_SET) {
        uint64_t supported_offloads;

        if (!n->has_vnet_hdr) {
            return VIRTIO_NET_ERR;
        }

        supported_offloads = virtio_net_supported_guest_offloads(n);
        if (offloads & ~supported_offloads) {
            return VIRTIO_NET_ERR;
        }

        n->curr_guest_offloads = offloads;
        virtio_net_apply_guest_offloads(n);

        return VIRTIO_NET_OK;
    } else {
        return VIRTIO_NET_ERR;
    }
}

727
static int virtio_net_handle_mac(VirtIONet *n, uint8_t cmd,
728
                                 struct iovec *iov, unsigned int iov_cnt)
729
{
730
    VirtIODevice *vdev = VIRTIO_DEVICE(n);
731
    struct virtio_net_ctrl_mac mac_data;
732
    size_t s;
733
    NetClientState *nc = qemu_get_queue(n->nic);
734

735 736 737 738 739 740
    if (cmd == VIRTIO_NET_CTRL_MAC_ADDR_SET) {
        if (iov_size(iov, iov_cnt) != sizeof(n->mac)) {
            return VIRTIO_NET_ERR;
        }
        s = iov_to_buf(iov, iov_cnt, 0, &n->mac, sizeof(n->mac));
        assert(s == sizeof(n->mac));
J
Jason Wang 已提交
741
        qemu_format_nic_info_str(qemu_get_queue(n->nic), n->mac);
742 743
        rxfilter_notify(nc);

744 745 746
        return VIRTIO_NET_OK;
    }

747
    if (cmd != VIRTIO_NET_CTRL_MAC_TABLE_SET) {
748
        return VIRTIO_NET_ERR;
749
    }
750

751 752 753 754 755
    int in_use = 0;
    int first_multi = 0;
    uint8_t uni_overflow = 0;
    uint8_t multi_overflow = 0;
    uint8_t *macs = g_malloc0(MAC_TABLE_ENTRIES * ETH_ALEN);
756

757 758
    s = iov_to_buf(iov, iov_cnt, 0, &mac_data.entries,
                   sizeof(mac_data.entries));
759
    mac_data.entries = virtio_ldl_p(vdev, &mac_data.entries);
760
    if (s != sizeof(mac_data.entries)) {
761
        goto error;
762 763
    }
    iov_discard_front(&iov, &iov_cnt, s);
764

765
    if (mac_data.entries * ETH_ALEN > iov_size(iov, iov_cnt)) {
766
        goto error;
767
    }
768 769

    if (mac_data.entries <= MAC_TABLE_ENTRIES) {
770
        s = iov_to_buf(iov, iov_cnt, 0, macs,
771 772
                       mac_data.entries * ETH_ALEN);
        if (s != mac_data.entries * ETH_ALEN) {
773
            goto error;
774
        }
775
        in_use += mac_data.entries;
776
    } else {
777
        uni_overflow = 1;
778 779
    }

780 781
    iov_discard_front(&iov, &iov_cnt, mac_data.entries * ETH_ALEN);

782
    first_multi = in_use;
783

784 785
    s = iov_to_buf(iov, iov_cnt, 0, &mac_data.entries,
                   sizeof(mac_data.entries));
786
    mac_data.entries = virtio_ldl_p(vdev, &mac_data.entries);
787
    if (s != sizeof(mac_data.entries)) {
788
        goto error;
789 790 791
    }

    iov_discard_front(&iov, &iov_cnt, s);
792

793
    if (mac_data.entries * ETH_ALEN != iov_size(iov, iov_cnt)) {
794
        goto error;
795
    }
796

797
    if (mac_data.entries <= MAC_TABLE_ENTRIES - in_use) {
798
        s = iov_to_buf(iov, iov_cnt, 0, &macs[in_use * ETH_ALEN],
799 800
                       mac_data.entries * ETH_ALEN);
        if (s != mac_data.entries * ETH_ALEN) {
801
            goto error;
802
        }
803
        in_use += mac_data.entries;
804
    } else {
805
        multi_overflow = 1;
806 807
    }

808 809 810 811 812 813
    n->mac_table.in_use = in_use;
    n->mac_table.first_multi = first_multi;
    n->mac_table.uni_overflow = uni_overflow;
    n->mac_table.multi_overflow = multi_overflow;
    memcpy(n->mac_table.macs, macs, MAC_TABLE_ENTRIES * ETH_ALEN);
    g_free(macs);
814 815
    rxfilter_notify(nc);

816
    return VIRTIO_NET_OK;
817 818

error:
819
    g_free(macs);
820
    return VIRTIO_NET_ERR;
821 822
}

823
static int virtio_net_handle_vlan_table(VirtIONet *n, uint8_t cmd,
824
                                        struct iovec *iov, unsigned int iov_cnt)
825
{
826
    VirtIODevice *vdev = VIRTIO_DEVICE(n);
827
    uint16_t vid;
828
    size_t s;
829
    NetClientState *nc = qemu_get_queue(n->nic);
830

831
    s = iov_to_buf(iov, iov_cnt, 0, &vid, sizeof(vid));
832
    vid = virtio_lduw_p(vdev, &vid);
833
    if (s != sizeof(vid)) {
834 835 836 837 838 839 840 841 842 843 844 845 846
        return VIRTIO_NET_ERR;
    }

    if (vid >= MAX_VLAN)
        return VIRTIO_NET_ERR;

    if (cmd == VIRTIO_NET_CTRL_VLAN_ADD)
        n->vlans[vid >> 5] |= (1U << (vid & 0x1f));
    else if (cmd == VIRTIO_NET_CTRL_VLAN_DEL)
        n->vlans[vid >> 5] &= ~(1U << (vid & 0x1f));
    else
        return VIRTIO_NET_ERR;

847 848
    rxfilter_notify(nc);

849 850 851
    return VIRTIO_NET_OK;
}

J
Jason Wang 已提交
852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868
static int virtio_net_handle_announce(VirtIONet *n, uint8_t cmd,
                                      struct iovec *iov, unsigned int iov_cnt)
{
    if (cmd == VIRTIO_NET_CTRL_ANNOUNCE_ACK &&
        n->status & VIRTIO_NET_S_ANNOUNCE) {
        n->status &= ~VIRTIO_NET_S_ANNOUNCE;
        if (n->announce_counter) {
            timer_mod(n->announce_timer,
                      qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL) +
                      self_announce_delay(n->announce_counter));
        }
        return VIRTIO_NET_OK;
    } else {
        return VIRTIO_NET_ERR;
    }
}

J
Jason Wang 已提交
869
static int virtio_net_handle_mq(VirtIONet *n, uint8_t cmd,
870
                                struct iovec *iov, unsigned int iov_cnt)
J
Jason Wang 已提交
871
{
872
    VirtIODevice *vdev = VIRTIO_DEVICE(n);
873 874 875
    struct virtio_net_ctrl_mq mq;
    size_t s;
    uint16_t queues;
J
Jason Wang 已提交
876

877 878
    s = iov_to_buf(iov, iov_cnt, 0, &mq, sizeof(mq));
    if (s != sizeof(mq)) {
J
Jason Wang 已提交
879 880 881 882 883 884 885
        return VIRTIO_NET_ERR;
    }

    if (cmd != VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET) {
        return VIRTIO_NET_ERR;
    }

886
    queues = virtio_lduw_p(vdev, &mq.virtqueue_pairs);
J
Jason Wang 已提交
887

888 889 890
    if (queues < VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MIN ||
        queues > VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MAX ||
        queues > n->max_queues ||
J
Jason Wang 已提交
891 892 893 894
        !n->multiqueue) {
        return VIRTIO_NET_ERR;
    }

895
    n->curr_queues = queues;
J
Jason Wang 已提交
896 897
    /* stop the backend before changing the number of queues to avoid handling a
     * disabled queue */
898
    virtio_net_set_status(vdev, vdev->status);
J
Jason Wang 已提交
899 900 901 902
    virtio_net_set_queues(n);

    return VIRTIO_NET_OK;
}
903

904 905
static void virtio_net_handle_ctrl(VirtIODevice *vdev, VirtQueue *vq)
{
906
    VirtIONet *n = VIRTIO_NET(vdev);
907 908
    struct virtio_net_ctrl_hdr ctrl;
    virtio_net_ctrl_ack status = VIRTIO_NET_ERR;
909
    VirtQueueElement *elem;
910
    size_t s;
J
Jason Wang 已提交
911
    struct iovec *iov, *iov2;
912
    unsigned int iov_cnt;
913

914 915 916 917 918 919 920
    for (;;) {
        elem = virtqueue_pop(vq, sizeof(VirtQueueElement));
        if (!elem) {
            break;
        }
        if (iov_size(elem->in_sg, elem->in_num) < sizeof(status) ||
            iov_size(elem->out_sg, elem->out_num) < sizeof(ctrl)) {
921 922 923 924
            virtio_error(vdev, "virtio-net ctrl missing headers");
            virtqueue_detach_element(vq, elem, 0);
            g_free(elem);
            break;
925 926
        }

927 928
        iov_cnt = elem->out_num;
        iov2 = iov = g_memdup(elem->out_sg, sizeof(struct iovec) * elem->out_num);
929 930 931 932
        s = iov_to_buf(iov, iov_cnt, 0, &ctrl, sizeof(ctrl));
        iov_discard_front(&iov, &iov_cnt, sizeof(ctrl));
        if (s != sizeof(ctrl)) {
            status = VIRTIO_NET_ERR;
A
Amos Kong 已提交
933
        } else if (ctrl.class == VIRTIO_NET_CTRL_RX) {
934 935 936 937 938
            status = virtio_net_handle_rx_mode(n, ctrl.cmd, iov, iov_cnt);
        } else if (ctrl.class == VIRTIO_NET_CTRL_MAC) {
            status = virtio_net_handle_mac(n, ctrl.cmd, iov, iov_cnt);
        } else if (ctrl.class == VIRTIO_NET_CTRL_VLAN) {
            status = virtio_net_handle_vlan_table(n, ctrl.cmd, iov, iov_cnt);
J
Jason Wang 已提交
939 940
        } else if (ctrl.class == VIRTIO_NET_CTRL_ANNOUNCE) {
            status = virtio_net_handle_announce(n, ctrl.cmd, iov, iov_cnt);
J
Jason Wang 已提交
941
        } else if (ctrl.class == VIRTIO_NET_CTRL_MQ) {
942
            status = virtio_net_handle_mq(n, ctrl.cmd, iov, iov_cnt);
943 944
        } else if (ctrl.class == VIRTIO_NET_CTRL_GUEST_OFFLOADS) {
            status = virtio_net_handle_offloads(n, ctrl.cmd, iov, iov_cnt);
945 946
        }

947
        s = iov_from_buf(elem->in_sg, elem->in_num, 0, &status, sizeof(status));
948
        assert(s == sizeof(status));
949

950
        virtqueue_push(vq, elem, sizeof(status));
951
        virtio_notify(vdev, vq);
J
Jason Wang 已提交
952
        g_free(iov2);
953
        g_free(elem);
954 955 956
    }
}

A
aliguori 已提交
957 958 959 960
/* RX */

static void virtio_net_handle_rx(VirtIODevice *vdev, VirtQueue *vq)
{
961
    VirtIONet *n = VIRTIO_NET(vdev);
J
Jason Wang 已提交
962
    int queue_index = vq2q(virtio_get_queue_index(vq));
963

J
Jason Wang 已提交
964
    qemu_flush_queued_packets(qemu_get_subqueue(n->nic, queue_index));
A
aliguori 已提交
965 966
}

967
static int virtio_net_can_receive(NetClientState *nc)
A
aliguori 已提交
968
{
J
Jason Wang 已提交
969
    VirtIONet *n = qemu_get_nic_opaque(nc);
970
    VirtIODevice *vdev = VIRTIO_DEVICE(n);
J
Jason Wang 已提交
971
    VirtIONetQueue *q = virtio_net_get_subqueue(nc);
972

973
    if (!vdev->vm_running) {
974 975
        return 0;
    }
976

J
Jason Wang 已提交
977 978 979 980
    if (nc->queue_index >= n->curr_queues) {
        return 0;
    }

981
    if (!virtio_queue_ready(q->rx_vq) ||
982
        !(vdev->status & VIRTIO_CONFIG_S_DRIVER_OK)) {
A
aliguori 已提交
983
        return 0;
984
    }
A
aliguori 已提交
985

986 987 988
    return 1;
}

989
static int virtio_net_has_buffers(VirtIONetQueue *q, int bufsize)
990
{
991 992
    VirtIONet *n = q->n;
    if (virtio_queue_empty(q->rx_vq) ||
A
aliguori 已提交
993
        (n->mergeable_rx_bufs &&
994 995
         !virtqueue_avail_bytes(q->rx_vq, bufsize, 0))) {
        virtio_queue_set_notification(q->rx_vq, 1);
996 997 998 999 1000

        /* To avoid a race condition where the guest has made some buffers
         * available after the above check but before notification was
         * enabled, check for available buffers again.
         */
1001
        if (virtio_queue_empty(q->rx_vq) ||
1002
            (n->mergeable_rx_bufs &&
1003
             !virtqueue_avail_bytes(q->rx_vq, bufsize, 0))) {
1004
            return 0;
1005
        }
A
aliguori 已提交
1006 1007
    }

1008
    virtio_queue_set_notification(q->rx_vq, 0);
A
aliguori 已提交
1009 1010 1011
    return 1;
}

1012
static void virtio_net_hdr_swap(VirtIODevice *vdev, struct virtio_net_hdr *hdr)
1013
{
1014 1015 1016 1017
    virtio_tswap16s(vdev, &hdr->hdr_len);
    virtio_tswap16s(vdev, &hdr->gso_size);
    virtio_tswap16s(vdev, &hdr->csum_start);
    virtio_tswap16s(vdev, &hdr->csum_offset);
1018 1019
}

A
Anthony Liguori 已提交
1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034
/* dhclient uses AF_PACKET but doesn't pass auxdata to the kernel so
 * it never finds out that the packets don't have valid checksums.  This
 * causes dhclient to get upset.  Fedora's carried a patch for ages to
 * fix this with Xen but it hasn't appeared in an upstream release of
 * dhclient yet.
 *
 * To avoid breaking existing guests, we catch udp packets and add
 * checksums.  This is terrible but it's better than hacking the guest
 * kernels.
 *
 * N.B. if we introduce a zero-copy API, this operation is no longer free so
 * we should provide a mechanism to disable it to avoid polluting the host
 * cache.
 */
static void work_around_broken_dhclient(struct virtio_net_hdr *hdr,
M
Michael S. Tsirkin 已提交
1035
                                        uint8_t *buf, size_t size)
A
Anthony Liguori 已提交
1036 1037 1038 1039 1040 1041
{
    if ((hdr->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM) && /* missing csum */
        (size > 27 && size < 1500) && /* normal sized MTU */
        (buf[12] == 0x08 && buf[13] == 0x00) && /* ethertype == IPv4 */
        (buf[23] == 17) && /* ip.protocol == UDP */
        (buf[34] == 0 && buf[35] == 67)) { /* udp.srcport == bootps */
M
Michael S. Tsirkin 已提交
1042
        net_checksum_calculate(buf, size);
A
Anthony Liguori 已提交
1043 1044 1045 1046
        hdr->flags &= ~VIRTIO_NET_HDR_F_NEEDS_CSUM;
    }
}

1047 1048
static void receive_header(VirtIONet *n, const struct iovec *iov, int iov_cnt,
                           const void *buf, size_t size)
A
aliguori 已提交
1049
{
M
Mark McLoughlin 已提交
1050
    if (n->has_vnet_hdr) {
M
Michael S. Tsirkin 已提交
1051 1052
        /* FIXME this cast is evil */
        void *wbuf = (void *)buf;
1053 1054
        work_around_broken_dhclient(wbuf, wbuf + n->host_hdr_len,
                                    size - n->host_hdr_len);
1055 1056 1057 1058

        if (n->needs_vnet_hdr_swap) {
            virtio_net_hdr_swap(VIRTIO_DEVICE(n), wbuf);
        }
1059
        iov_from_buf(iov, iov_cnt, 0, buf, sizeof(struct virtio_net_hdr));
M
Michael S. Tsirkin 已提交
1060 1061 1062 1063 1064 1065
    } else {
        struct virtio_net_hdr hdr = {
            .flags = 0,
            .gso_type = VIRTIO_NET_HDR_GSO_NONE
        };
        iov_from_buf(iov, iov_cnt, 0, &hdr, sizeof hdr);
M
Mark McLoughlin 已提交
1066
    }
A
aliguori 已提交
1067 1068
}

1069 1070 1071
static int receive_filter(VirtIONet *n, const uint8_t *buf, int size)
{
    static const uint8_t bcast[] = {0xff, 0xff, 0xff, 0xff, 0xff, 0xff};
1072
    static const uint8_t vlan[] = {0x81, 0x00};
1073
    uint8_t *ptr = (uint8_t *)buf;
1074
    int i;
1075 1076 1077 1078

    if (n->promisc)
        return 1;

1079
    ptr += n->host_hdr_len;
M
Mark McLoughlin 已提交
1080

1081
    if (!memcmp(&ptr[12], vlan, sizeof(vlan))) {
1082
        int vid = lduw_be_p(ptr + 14) & 0xfff;
1083 1084 1085 1086
        if (!(n->vlans[vid >> 5] & (1U << (vid & 0x1f))))
            return 0;
    }

1087 1088
    if (ptr[0] & 1) { // multicast
        if (!memcmp(ptr, bcast, sizeof(bcast))) {
1089 1090 1091
            return !n->nobcast;
        } else if (n->nomulti) {
            return 0;
1092
        } else if (n->allmulti || n->mac_table.multi_overflow) {
1093 1094
            return 1;
        }
1095 1096 1097 1098 1099 1100

        for (i = n->mac_table.first_multi; i < n->mac_table.in_use; i++) {
            if (!memcmp(ptr, &n->mac_table.macs[i * ETH_ALEN], ETH_ALEN)) {
                return 1;
            }
        }
1101
    } else { // unicast
1102 1103 1104
        if (n->nouni) {
            return 0;
        } else if (n->alluni || n->mac_table.uni_overflow) {
1105 1106
            return 1;
        } else if (!memcmp(ptr, n->mac, ETH_ALEN)) {
1107 1108
            return 1;
        }
1109

1110 1111 1112 1113 1114
        for (i = 0; i < n->mac_table.first_multi; i++) {
            if (!memcmp(ptr, &n->mac_table.macs[i * ETH_ALEN], ETH_ALEN)) {
                return 1;
            }
        }
1115 1116
    }

1117 1118 1119
    return 0;
}

1120
static ssize_t virtio_net_receive(NetClientState *nc, const uint8_t *buf, size_t size)
A
aliguori 已提交
1121
{
J
Jason Wang 已提交
1122
    VirtIONet *n = qemu_get_nic_opaque(nc);
J
Jason Wang 已提交
1123
    VirtIONetQueue *q = virtio_net_get_subqueue(nc);
1124
    VirtIODevice *vdev = VIRTIO_DEVICE(n);
1125 1126 1127
    struct iovec mhdr_sg[VIRTQUEUE_MAX_SIZE];
    struct virtio_net_hdr_mrg_rxbuf mhdr;
    unsigned mhdr_cnt = 0;
M
Michael S. Tsirkin 已提交
1128
    size_t offset, i, guest_offset;
A
aliguori 已提交
1129

J
Jason Wang 已提交
1130
    if (!virtio_net_can_receive(nc)) {
1131
        return -1;
J
Jason Wang 已提交
1132
    }
1133

1134
    /* hdr_len refers to the header we supply to the guest */
1135
    if (!virtio_net_has_buffers(q, size + n->guest_hdr_len - n->host_hdr_len)) {
1136
        return 0;
1137
    }
A
aliguori 已提交
1138

1139
    if (!receive_filter(n, buf, size))
1140
        return size;
1141

A
aliguori 已提交
1142 1143 1144
    offset = i = 0;

    while (offset < size) {
1145
        VirtQueueElement *elem;
A
aliguori 已提交
1146
        int len, total;
1147
        const struct iovec *sg;
A
aliguori 已提交
1148

A
Amit Shah 已提交
1149
        total = 0;
A
aliguori 已提交
1150

1151 1152
        elem = virtqueue_pop(q->rx_vq, sizeof(VirtQueueElement));
        if (!elem) {
1153 1154 1155 1156 1157 1158 1159 1160 1161 1162
            if (i) {
                virtio_error(vdev, "virtio-net unexpected empty queue: "
                             "i %zd mergeable %d offset %zd, size %zd, "
                             "guest hdr len %zd, host hdr len %zd "
                             "guest features 0x%" PRIx64,
                             i, n->mergeable_rx_bufs, offset, size,
                             n->guest_hdr_len, n->host_hdr_len,
                             vdev->guest_features);
            }
            return -1;
A
aliguori 已提交
1163 1164
        }

1165
        if (elem->in_num < 1) {
1166 1167 1168 1169 1170
            virtio_error(vdev,
                         "virtio-net receive queue contains no in buffers");
            virtqueue_detach_element(q->rx_vq, elem, 0);
            g_free(elem);
            return -1;
A
aliguori 已提交
1171 1172
        }

1173
        sg = elem->in_sg;
A
aliguori 已提交
1174
        if (i == 0) {
1175
            assert(offset == 0);
1176 1177
            if (n->mergeable_rx_bufs) {
                mhdr_cnt = iov_copy(mhdr_sg, ARRAY_SIZE(mhdr_sg),
1178
                                    sg, elem->in_num,
1179 1180 1181
                                    offsetof(typeof(mhdr), num_buffers),
                                    sizeof(mhdr.num_buffers));
            }
A
aliguori 已提交
1182

1183
            receive_header(n, sg, elem->in_num, buf, size);
1184
            offset = n->host_hdr_len;
1185
            total += n->guest_hdr_len;
M
Michael S. Tsirkin 已提交
1186 1187 1188
            guest_offset = n->guest_hdr_len;
        } else {
            guest_offset = 0;
A
aliguori 已提交
1189 1190 1191
        }

        /* copy in packet.  ugh */
1192
        len = iov_from_buf(sg, elem->in_num, guest_offset,
1193
                           buf + offset, size - offset);
A
aliguori 已提交
1194
        total += len;
1195 1196 1197 1198 1199
        offset += len;
        /* If buffers can't be merged, at this point we
         * must have consumed the complete packet.
         * Otherwise, drop it. */
        if (!n->mergeable_rx_bufs && offset < size) {
1200
            virtqueue_unpop(q->rx_vq, elem, total);
1201
            g_free(elem);
1202 1203
            return size;
        }
A
aliguori 已提交
1204 1205

        /* signal other side */
1206 1207
        virtqueue_fill(q->rx_vq, elem, total, i++);
        g_free(elem);
A
aliguori 已提交
1208 1209
    }

1210
    if (mhdr_cnt) {
1211
        virtio_stw_p(vdev, &mhdr.num_buffers, i);
1212 1213 1214
        iov_from_buf(mhdr_sg, mhdr_cnt,
                     0,
                     &mhdr.num_buffers, sizeof mhdr.num_buffers);
1215
    }
A
aliguori 已提交
1216

1217
    virtqueue_flush(q->rx_vq, i);
1218
    virtio_notify(vdev, q->rx_vq);
1219 1220

    return size;
A
aliguori 已提交
1221 1222
}

1223
static int32_t virtio_net_flush_tx(VirtIONetQueue *q);
1224

1225
static void virtio_net_tx_complete(NetClientState *nc, ssize_t len)
1226
{
J
Jason Wang 已提交
1227
    VirtIONet *n = qemu_get_nic_opaque(nc);
J
Jason Wang 已提交
1228
    VirtIONetQueue *q = virtio_net_get_subqueue(nc);
1229
    VirtIODevice *vdev = VIRTIO_DEVICE(n);
1230

1231
    virtqueue_push(q->tx_vq, q->async_tx.elem, 0);
1232
    virtio_notify(vdev, q->tx_vq);
1233

1234 1235
    g_free(q->async_tx.elem);
    q->async_tx.elem = NULL;
1236

1237 1238
    virtio_queue_set_notification(q->tx_vq, 1);
    virtio_net_flush_tx(q);
1239 1240
}

A
aliguori 已提交
1241
/* TX */
1242
static int32_t virtio_net_flush_tx(VirtIONetQueue *q)
A
aliguori 已提交
1243
{
1244
    VirtIONet *n = q->n;
1245
    VirtIODevice *vdev = VIRTIO_DEVICE(n);
1246
    VirtQueueElement *elem;
1247
    int32_t num_packets = 0;
J
Jason Wang 已提交
1248
    int queue_index = vq2q(virtio_get_queue_index(q->tx_vq));
1249
    if (!(vdev->status & VIRTIO_CONFIG_S_DRIVER_OK)) {
1250 1251
        return num_packets;
    }
A
aliguori 已提交
1252

1253
    if (q->async_tx.elem) {
1254
        virtio_queue_set_notification(q->tx_vq, 0);
1255
        return num_packets;
1256 1257
    }

1258
    for (;;) {
J
Jason Wang 已提交
1259
        ssize_t ret;
1260 1261
        unsigned int out_num;
        struct iovec sg[VIRTQUEUE_MAX_SIZE], sg2[VIRTQUEUE_MAX_SIZE + 1], *out_sg;
J
Jason Wang 已提交
1262
        struct virtio_net_hdr_mrg_rxbuf mhdr;
A
aliguori 已提交
1263

1264 1265 1266 1267 1268 1269 1270
        elem = virtqueue_pop(q->tx_vq, sizeof(VirtQueueElement));
        if (!elem) {
            break;
        }

        out_num = elem->out_num;
        out_sg = elem->out_sg;
M
Michael S. Tsirkin 已提交
1271
        if (out_num < 1) {
1272 1273 1274 1275
            virtio_error(vdev, "virtio-net header not in first element");
            virtqueue_detach_element(q->tx_vq, elem, 0);
            g_free(elem);
            return -EINVAL;
A
aliguori 已提交
1276 1277
        }

1278
        if (n->has_vnet_hdr) {
J
Jason Wang 已提交
1279 1280
            if (iov_to_buf(out_sg, out_num, 0, &mhdr, n->guest_hdr_len) <
                n->guest_hdr_len) {
1281 1282 1283 1284
                virtio_error(vdev, "virtio-net header incorrect");
                virtqueue_detach_element(q->tx_vq, elem, 0);
                g_free(elem);
                return -EINVAL;
1285
            }
1286
            if (n->needs_vnet_hdr_swap) {
J
Jason Wang 已提交
1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298
                virtio_net_hdr_swap(vdev, (void *) &mhdr);
                sg2[0].iov_base = &mhdr;
                sg2[0].iov_len = n->guest_hdr_len;
                out_num = iov_copy(&sg2[1], ARRAY_SIZE(sg2) - 1,
                                   out_sg, out_num,
                                   n->guest_hdr_len, -1);
                if (out_num == VIRTQUEUE_MAX_SIZE) {
                    goto drop;
		}
                out_num += 1;
                out_sg = sg2;
	    }
1299
        }
1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314
        /*
         * If host wants to see the guest header as is, we can
         * pass it on unchanged. Otherwise, copy just the parts
         * that host is interested in.
         */
        assert(n->host_hdr_len <= n->guest_hdr_len);
        if (n->host_hdr_len != n->guest_hdr_len) {
            unsigned sg_num = iov_copy(sg, ARRAY_SIZE(sg),
                                       out_sg, out_num,
                                       0, n->host_hdr_len);
            sg_num += iov_copy(sg + sg_num, ARRAY_SIZE(sg) - sg_num,
                             out_sg, out_num,
                             n->guest_hdr_len, -1);
            out_num = sg_num;
            out_sg = sg;
A
aliguori 已提交
1315 1316
        }

J
Jason Wang 已提交
1317 1318
        ret = qemu_sendv_packet_async(qemu_get_subqueue(n->nic, queue_index),
                                      out_sg, out_num, virtio_net_tx_complete);
1319
        if (ret == 0) {
1320 1321
            virtio_queue_set_notification(q->tx_vq, 0);
            q->async_tx.elem = elem;
1322
            return -EBUSY;
1323 1324
        }

J
Jason Wang 已提交
1325
drop:
1326
        virtqueue_push(q->tx_vq, elem, 0);
1327
        virtio_notify(vdev, q->tx_vq);
1328
        g_free(elem);
1329 1330 1331 1332

        if (++num_packets >= n->tx_burst) {
            break;
        }
A
aliguori 已提交
1333
    }
1334
    return num_packets;
A
aliguori 已提交
1335 1336
}

1337
static void virtio_net_handle_tx_timer(VirtIODevice *vdev, VirtQueue *vq)
A
aliguori 已提交
1338
{
1339
    VirtIONet *n = VIRTIO_NET(vdev);
J
Jason Wang 已提交
1340
    VirtIONetQueue *q = &n->vqs[vq2q(virtio_get_queue_index(vq))];
A
aliguori 已提交
1341

1342 1343 1344 1345 1346
    if (unlikely((n->status & VIRTIO_NET_S_LINK_UP) == 0)) {
        virtio_net_drop_tx_queue_data(vdev, vq);
        return;
    }

1347
    /* This happens when device was stopped but VCPU wasn't. */
1348
    if (!vdev->vm_running) {
1349
        q->tx_waiting = 1;
1350 1351 1352
        return;
    }

1353
    if (q->tx_waiting) {
A
aliguori 已提交
1354
        virtio_queue_set_notification(vq, 1);
1355
        timer_del(q->tx_timer);
1356
        q->tx_waiting = 0;
1357 1358 1359
        if (virtio_net_flush_tx(q) == -EINVAL) {
            return;
        }
A
aliguori 已提交
1360
    } else {
1361 1362
        timer_mod(q->tx_timer,
                       qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + n->tx_timeout);
1363
        q->tx_waiting = 1;
A
aliguori 已提交
1364 1365 1366 1367
        virtio_queue_set_notification(vq, 0);
    }
}

1368 1369
static void virtio_net_handle_tx_bh(VirtIODevice *vdev, VirtQueue *vq)
{
1370
    VirtIONet *n = VIRTIO_NET(vdev);
J
Jason Wang 已提交
1371
    VirtIONetQueue *q = &n->vqs[vq2q(virtio_get_queue_index(vq))];
1372

1373 1374 1375 1376 1377
    if (unlikely((n->status & VIRTIO_NET_S_LINK_UP) == 0)) {
        virtio_net_drop_tx_queue_data(vdev, vq);
        return;
    }

1378
    if (unlikely(q->tx_waiting)) {
1379 1380
        return;
    }
1381
    q->tx_waiting = 1;
1382
    /* This happens when device was stopped but VCPU wasn't. */
1383
    if (!vdev->vm_running) {
1384 1385
        return;
    }
1386
    virtio_queue_set_notification(vq, 0);
1387
    qemu_bh_schedule(q->tx_bh);
1388 1389
}

A
aliguori 已提交
1390 1391
static void virtio_net_tx_timer(void *opaque)
{
1392 1393
    VirtIONetQueue *q = opaque;
    VirtIONet *n = q->n;
1394
    VirtIODevice *vdev = VIRTIO_DEVICE(n);
1395 1396 1397 1398 1399 1400
    /* This happens when device was stopped but BH wasn't. */
    if (!vdev->vm_running) {
        /* Make sure tx waiting is set, so we'll run when restarted. */
        assert(q->tx_waiting);
        return;
    }
A
aliguori 已提交
1401

1402
    q->tx_waiting = 0;
A
aliguori 已提交
1403 1404

    /* Just in case the driver is not ready on more */
1405
    if (!(vdev->status & VIRTIO_CONFIG_S_DRIVER_OK)) {
A
aliguori 已提交
1406
        return;
1407
    }
A
aliguori 已提交
1408

1409 1410
    virtio_queue_set_notification(q->tx_vq, 1);
    virtio_net_flush_tx(q);
A
aliguori 已提交
1411 1412
}

1413 1414
static void virtio_net_tx_bh(void *opaque)
{
1415 1416
    VirtIONetQueue *q = opaque;
    VirtIONet *n = q->n;
1417
    VirtIODevice *vdev = VIRTIO_DEVICE(n);
1418 1419
    int32_t ret;

1420 1421 1422 1423 1424 1425
    /* This happens when device was stopped but BH wasn't. */
    if (!vdev->vm_running) {
        /* Make sure tx waiting is set, so we'll run when restarted. */
        assert(q->tx_waiting);
        return;
    }
1426

1427
    q->tx_waiting = 0;
1428 1429

    /* Just in case the driver is not ready on more */
1430
    if (unlikely(!(vdev->status & VIRTIO_CONFIG_S_DRIVER_OK))) {
1431
        return;
1432
    }
1433

1434
    ret = virtio_net_flush_tx(q);
1435 1436 1437
    if (ret == -EBUSY || ret == -EINVAL) {
        return; /* Notification re-enable handled by tx_complete or device
                 * broken */
1438 1439 1440 1441 1442
    }

    /* If we flush a full burst of packets, assume there are
     * more coming and immediately reschedule */
    if (ret >= n->tx_burst) {
1443 1444
        qemu_bh_schedule(q->tx_bh);
        q->tx_waiting = 1;
1445 1446 1447 1448 1449 1450
        return;
    }

    /* If less than a full burst, re-enable notification and flush
     * anything that may have come in while we weren't looking.  If
     * we find something, assume the guest is still active and reschedule */
1451
    virtio_queue_set_notification(q->tx_vq, 1);
1452 1453 1454 1455
    ret = virtio_net_flush_tx(q);
    if (ret == -EINVAL) {
        return;
    } else if (ret > 0) {
1456 1457 1458
        virtio_queue_set_notification(q->tx_vq, 0);
        qemu_bh_schedule(q->tx_bh);
        q->tx_waiting = 1;
1459 1460 1461
    }
}

1462 1463 1464 1465
static void virtio_net_add_queue(VirtIONet *n, int index)
{
    VirtIODevice *vdev = VIRTIO_DEVICE(n);

1466 1467
    n->vqs[index].rx_vq = virtio_add_queue(vdev, n->net_conf.rx_queue_size,
                                           virtio_net_handle_rx);
1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536
    if (n->net_conf.tx && !strcmp(n->net_conf.tx, "timer")) {
        n->vqs[index].tx_vq =
            virtio_add_queue(vdev, 256, virtio_net_handle_tx_timer);
        n->vqs[index].tx_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL,
                                              virtio_net_tx_timer,
                                              &n->vqs[index]);
    } else {
        n->vqs[index].tx_vq =
            virtio_add_queue(vdev, 256, virtio_net_handle_tx_bh);
        n->vqs[index].tx_bh = qemu_bh_new(virtio_net_tx_bh, &n->vqs[index]);
    }

    n->vqs[index].tx_waiting = 0;
    n->vqs[index].n = n;
}

static void virtio_net_del_queue(VirtIONet *n, int index)
{
    VirtIODevice *vdev = VIRTIO_DEVICE(n);
    VirtIONetQueue *q = &n->vqs[index];
    NetClientState *nc = qemu_get_subqueue(n->nic, index);

    qemu_purge_queued_packets(nc);

    virtio_del_queue(vdev, index * 2);
    if (q->tx_timer) {
        timer_del(q->tx_timer);
        timer_free(q->tx_timer);
    } else {
        qemu_bh_delete(q->tx_bh);
    }
    virtio_del_queue(vdev, index * 2 + 1);
}

static void virtio_net_change_num_queues(VirtIONet *n, int new_max_queues)
{
    VirtIODevice *vdev = VIRTIO_DEVICE(n);
    int old_num_queues = virtio_get_num_queues(vdev);
    int new_num_queues = new_max_queues * 2 + 1;
    int i;

    assert(old_num_queues >= 3);
    assert(old_num_queues % 2 == 1);

    if (old_num_queues == new_num_queues) {
        return;
    }

    /*
     * We always need to remove and add ctrl vq if
     * old_num_queues != new_num_queues. Remove ctrl_vq first,
     * and then we only enter one of the following too loops.
     */
    virtio_del_queue(vdev, old_num_queues - 1);

    for (i = new_num_queues - 1; i < old_num_queues - 1; i += 2) {
        /* new_num_queues < old_num_queues */
        virtio_net_del_queue(n, i / 2);
    }

    for (i = old_num_queues - 1; i < new_num_queues - 1; i += 2) {
        /* new_num_queues > old_num_queues */
        virtio_net_add_queue(n, i / 2);
    }

    /* add ctrl_vq last */
    n->ctrl_vq = virtio_add_queue(vdev, 64, virtio_net_handle_ctrl);
}

J
Jason Wang 已提交
1537
static void virtio_net_set_multiqueue(VirtIONet *n, int multiqueue)
J
Jason Wang 已提交
1538
{
1539 1540
    int max = multiqueue ? n->max_queues : 1;

J
Jason Wang 已提交
1541
    n->multiqueue = multiqueue;
1542
    virtio_net_change_num_queues(n, max);
J
Jason Wang 已提交
1543 1544 1545 1546

    virtio_net_set_queues(n);
}

1547 1548 1549 1550
static void virtio_net_save_device(VirtIODevice *vdev, QEMUFile *f)
{
    VirtIONet *n = VIRTIO_NET(vdev);
    int i;
A
aliguori 已提交
1551

1552
    qemu_put_buffer(f, n->mac, ETH_ALEN);
1553
    qemu_put_be32(f, n->vqs[0].tx_waiting);
1554
    qemu_put_be32(f, n->mergeable_rx_bufs);
1555
    qemu_put_be16(f, n->status);
1556 1557
    qemu_put_byte(f, n->promisc);
    qemu_put_byte(f, n->allmulti);
1558 1559
    qemu_put_be32(f, n->mac_table.in_use);
    qemu_put_buffer(f, n->mac_table.macs, n->mac_table.in_use * ETH_ALEN);
1560
    qemu_put_buffer(f, (uint8_t *)n->vlans, MAX_VLAN >> 3);
M
Mark McLoughlin 已提交
1561
    qemu_put_be32(f, n->has_vnet_hdr);
1562 1563
    qemu_put_byte(f, n->mac_table.multi_overflow);
    qemu_put_byte(f, n->mac_table.uni_overflow);
1564 1565 1566 1567
    qemu_put_byte(f, n->alluni);
    qemu_put_byte(f, n->nomulti);
    qemu_put_byte(f, n->nouni);
    qemu_put_byte(f, n->nobcast);
1568
    qemu_put_byte(f, n->has_ufo);
1569 1570 1571 1572 1573 1574 1575
    if (n->max_queues > 1) {
        qemu_put_be16(f, n->max_queues);
        qemu_put_be16(f, n->curr_queues);
        for (i = 1; i < n->curr_queues; i++) {
            qemu_put_be32(f, n->vqs[i].tx_waiting);
        }
    }
1576

1577
    if (virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_GUEST_OFFLOADS)) {
1578 1579
        qemu_put_be64(f, n->curr_guest_offloads);
    }
A
aliguori 已提交
1580 1581
}

1582 1583 1584 1585 1586
static int virtio_net_load_device(VirtIODevice *vdev, QEMUFile *f,
                                  int version_id)
{
    VirtIONet *n = VIRTIO_NET(vdev);
    int i, link_down;
A
aliguori 已提交
1587

1588
    qemu_get_buffer(f, n->mac, ETH_ALEN);
1589
    n->vqs[0].tx_waiting = qemu_get_be32(f);
1590

1591
    virtio_net_set_mrg_rx_bufs(n, qemu_get_be32(f),
1592 1593
                               virtio_vdev_has_feature(vdev,
                                                       VIRTIO_F_VERSION_1));
A
aliguori 已提交
1594

1595
    n->status = qemu_get_be16(f);
1596

1597 1598
    n->promisc = qemu_get_byte(f);
    n->allmulti = qemu_get_byte(f);
1599

1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614
    n->mac_table.in_use = qemu_get_be32(f);
    /* MAC_TABLE_ENTRIES may be different from the saved image */
    if (n->mac_table.in_use <= MAC_TABLE_ENTRIES) {
        qemu_get_buffer(f, n->mac_table.macs,
                        n->mac_table.in_use * ETH_ALEN);
    } else {
        int64_t i;

        /* Overflow detected - can happen if source has a larger MAC table.
         * We simply set overflow flag so there's no need to maintain the
         * table of addresses, discard them all.
         * Note: 64 bit math to avoid integer overflow.
         */
        for (i = 0; i < (int64_t)n->mac_table.in_use * ETH_ALEN; ++i) {
            qemu_get_byte(f);
1615
        }
1616 1617
        n->mac_table.multi_overflow = n->mac_table.uni_overflow = 1;
        n->mac_table.in_use = 0;
1618 1619
    }
 
1620
    qemu_get_buffer(f, (uint8_t *)n->vlans, MAX_VLAN >> 3);
1621

1622 1623 1624
    if (qemu_get_be32(f) && !peer_has_vnet_hdr(n)) {
        error_report("virtio-net: saved image requires vnet_hdr=on");
        return -1;
1625 1626
    }

1627 1628
    n->mac_table.multi_overflow = qemu_get_byte(f);
    n->mac_table.uni_overflow = qemu_get_byte(f);
1629

1630 1631 1632 1633
    n->alluni = qemu_get_byte(f);
    n->nomulti = qemu_get_byte(f);
    n->nouni = qemu_get_byte(f);
    n->nobcast = qemu_get_byte(f);
1634

1635 1636 1637
    if (qemu_get_byte(f) && !peer_has_ufo(n)) {
        error_report("virtio-net: saved image requires TUN_F_UFO support");
        return -1;
1638 1639
    }

1640 1641 1642 1643 1644 1645 1646
    if (n->max_queues > 1) {
        if (n->max_queues != qemu_get_be16(f)) {
            error_report("virtio-net: different max_queues ");
            return -1;
        }

        n->curr_queues = qemu_get_be16(f);
1647 1648 1649 1650 1651
        if (n->curr_queues > n->max_queues) {
            error_report("virtio-net: curr_queues %x > max_queues %x",
                         n->curr_queues, n->max_queues);
            return -1;
        }
1652 1653 1654 1655 1656
        for (i = 1; i < n->curr_queues; i++) {
            n->vqs[i].tx_waiting = qemu_get_be32(f);
        }
    }

1657 1658 1659 1660 1661 1662 1663 1664 1665 1666
    if (virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_GUEST_OFFLOADS)) {
        n->curr_guest_offloads = qemu_get_be64(f);
    } else {
        n->curr_guest_offloads = virtio_net_supported_guest_offloads(n);
    }

    if (peer_has_vnet_hdr(n)) {
        virtio_net_apply_guest_offloads(n);
    }

1667 1668
    virtio_net_set_queues(n);

1669 1670 1671 1672 1673 1674 1675
    /* Find the first multicast entry in the saved MAC filter */
    for (i = 0; i < n->mac_table.in_use; i++) {
        if (n->mac_table.macs[i * ETH_ALEN] & 1) {
            break;
        }
    }
    n->mac_table.first_multi = i;
1676 1677 1678

    /* nc.link_down can't be migrated, so infer link_down according
     * to link status bit in n->status */
1679 1680 1681 1682
    link_down = (n->status & VIRTIO_NET_S_LINK_UP) == 0;
    for (i = 0; i < n->max_queues; i++) {
        qemu_get_subqueue(n->nic, i)->link_down = link_down;
    }
1683

1684 1685 1686 1687 1688 1689
    if (virtio_vdev_has_feature(vdev, VIRTIO_NET_F_GUEST_ANNOUNCE) &&
        virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_VQ)) {
        n->announce_counter = SELF_ANNOUNCE_ROUNDS;
        timer_mod(n->announce_timer, qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL));
    }

A
aliguori 已提交
1690 1691 1692
    return 0;
}

M
Mark McLoughlin 已提交
1693
static NetClientInfo net_virtio_info = {
1694
    .type = NET_CLIENT_DRIVER_NIC,
M
Mark McLoughlin 已提交
1695 1696 1697 1698
    .size = sizeof(NICState),
    .can_receive = virtio_net_can_receive,
    .receive = virtio_net_receive,
    .link_status_changed = virtio_net_set_link_status,
1699
    .query_rx_filter = virtio_net_query_rxfilter,
M
Mark McLoughlin 已提交
1700 1701
};

1702 1703
static bool virtio_net_guest_notifier_pending(VirtIODevice *vdev, int idx)
{
1704
    VirtIONet *n = VIRTIO_NET(vdev);
J
Jason Wang 已提交
1705
    NetClientState *nc = qemu_get_subqueue(n->nic, vq2q(idx));
1706
    assert(n->vhost_started);
1707
    return vhost_net_virtqueue_pending(get_vhost_net(nc->peer), idx);
1708 1709 1710 1711 1712
}

static void virtio_net_guest_notifier_mask(VirtIODevice *vdev, int idx,
                                           bool mask)
{
1713
    VirtIONet *n = VIRTIO_NET(vdev);
J
Jason Wang 已提交
1714
    NetClientState *nc = qemu_get_subqueue(n->nic, vq2q(idx));
1715
    assert(n->vhost_started);
1716
    vhost_net_virtqueue_mask(get_vhost_net(nc->peer),
1717 1718 1719
                             vdev, idx, mask);
}

G
Gerd Hoffmann 已提交
1720
static void virtio_net_set_config_size(VirtIONet *n, uint64_t host_features)
A
aliguori 已提交
1721
{
1722
    int i, config_size = 0;
1723
    virtio_add_feature(&host_features, VIRTIO_NET_F_MAC);
1724 1725 1726 1727 1728
    for (i = 0; feature_sizes[i].flags != 0; i++) {
        if (host_features & feature_sizes[i].flags) {
            config_size = MAX(feature_sizes[i].end, config_size);
        }
    }
1729 1730 1731
    n->config_size = config_size;
}

1732 1733 1734 1735 1736 1737 1738 1739
void virtio_net_set_netclient_name(VirtIONet *n, const char *name,
                                   const char *type)
{
    /*
     * The name can be NULL, the netclient name will be type.x.
     */
    assert(type != NULL);

1740 1741
    g_free(n->netclient_name);
    g_free(n->netclient_type);
1742
    n->netclient_name = g_strdup(name);
1743 1744 1745
    n->netclient_type = g_strdup(type);
}

1746
static void virtio_net_device_realize(DeviceState *dev, Error **errp)
1747
{
1748
    VirtIODevice *vdev = VIRTIO_DEVICE(dev);
1749
    VirtIONet *n = VIRTIO_NET(dev);
1750
    NetClientState *nc;
1751
    int i;
1752

1753
    virtio_net_set_config_size(n, n->host_features);
1754
    virtio_init(vdev, "virtio-net", VIRTIO_ID_NET, n->config_size);
A
aliguori 已提交
1755

1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771
    /*
     * We set a lower limit on RX queue size to what it always was.
     * Guests that want a smaller ring can always resize it without
     * help from us (using virtio 1 and up).
     */
    if (n->net_conf.rx_queue_size < VIRTIO_NET_RX_QUEUE_MIN_SIZE ||
        n->net_conf.rx_queue_size > VIRTQUEUE_MAX_SIZE ||
        (n->net_conf.rx_queue_size & (n->net_conf.rx_queue_size - 1))) {
        error_setg(errp, "Invalid rx_queue_size (= %" PRIu16 "), "
                   "must be a power of 2 between %d and %d.",
                   n->net_conf.rx_queue_size, VIRTIO_NET_RX_QUEUE_MIN_SIZE,
                   VIRTQUEUE_MAX_SIZE);
        virtio_cleanup(vdev);
        return;
    }

1772
    n->max_queues = MAX(n->nic_conf.peers.queues, 1);
1773
    if (n->max_queues * 2 + 1 > VIRTIO_QUEUE_MAX) {
1774
        error_setg(errp, "Invalid number of queues (= %" PRIu32 "), "
S
Stefan Weil 已提交
1775
                   "must be a positive integer less than %d.",
1776
                   n->max_queues, (VIRTIO_QUEUE_MAX - 1) / 2);
1777 1778 1779
        virtio_cleanup(vdev);
        return;
    }
1780
    n->vqs = g_malloc0(sizeof(VirtIONetQueue) * n->max_queues);
J
Jason Wang 已提交
1781
    n->curr_queues = 1;
1782
    n->tx_timeout = n->net_conf.txtimer;
1783

1784 1785
    if (n->net_conf.tx && strcmp(n->net_conf.tx, "timer")
                       && strcmp(n->net_conf.tx, "bh")) {
1786 1787
        error_report("virtio-net: "
                     "Unknown option tx=%s, valid options: \"timer\" \"bh\"",
1788
                     n->net_conf.tx);
1789
        error_report("Defaulting to \"bh\"");
1790 1791
    }

1792
    for (i = 0; i < n->max_queues; i++) {
1793
        virtio_net_add_queue(n, i);
1794
    }
1795

1796
    n->ctrl_vq = virtio_add_queue(vdev, 64, virtio_net_handle_ctrl);
1797 1798
    qemu_macaddr_default_if_unset(&n->nic_conf.macaddr);
    memcpy(&n->mac[0], &n->nic_conf.macaddr, sizeof(n->mac));
1799
    n->status = VIRTIO_NET_S_LINK_UP;
J
Jason Wang 已提交
1800 1801
    n->announce_timer = timer_new_ms(QEMU_CLOCK_VIRTUAL,
                                     virtio_net_announce_timer, n);
A
aliguori 已提交
1802

1803 1804 1805 1806 1807 1808 1809 1810
    if (n->netclient_type) {
        /*
         * Happen when virtio_net_set_netclient_name has been called.
         */
        n->nic = qemu_new_nic(&net_virtio_info, &n->nic_conf,
                              n->netclient_type, n->netclient_name, n);
    } else {
        n->nic = qemu_new_nic(&net_virtio_info, &n->nic_conf,
1811
                              object_get_typename(OBJECT(dev)), dev->id, n);
1812 1813
    }

1814 1815
    peer_test_vnet_hdr(n);
    if (peer_has_vnet_hdr(n)) {
J
Jason Wang 已提交
1816
        for (i = 0; i < n->max_queues; i++) {
1817
            qemu_using_vnet_hdr(qemu_get_subqueue(n->nic, i)->peer, true);
J
Jason Wang 已提交
1818
        }
1819 1820 1821 1822
        n->host_hdr_len = sizeof(struct virtio_net_hdr);
    } else {
        n->host_hdr_len = 0;
    }
M
Mark McLoughlin 已提交
1823

1824
    qemu_format_nic_info_str(qemu_get_queue(n->nic), n->nic_conf.macaddr.a);
1825

J
Jason Wang 已提交
1826
    n->vqs[0].tx_waiting = 0;
1827
    n->tx_burst = n->net_conf.txburst;
1828
    virtio_net_set_mrg_rx_bufs(n, 0, 0);
1829
    n->promisc = 1; /* for compatibility */
A
aliguori 已提交
1830

1831
    n->mac_table.macs = g_malloc0(MAC_TABLE_ENTRIES * ETH_ALEN);
1832

1833
    n->vlans = g_malloc0(MAX_VLAN >> 3);
1834

1835 1836 1837
    nc = qemu_get_queue(n->nic);
    nc->rxfilter_notify_enabled = 1;

1838
    n->qdev = dev;
1839 1840
}

1841
static void virtio_net_device_unrealize(DeviceState *dev, Error **errp)
1842
{
1843 1844
    VirtIODevice *vdev = VIRTIO_DEVICE(dev);
    VirtIONet *n = VIRTIO_NET(dev);
1845
    int i, max_queues;
1846 1847 1848 1849

    /* This will stop vhost backend if appropriate. */
    virtio_net_set_status(vdev, 0);

1850 1851 1852 1853
    g_free(n->netclient_name);
    n->netclient_name = NULL;
    g_free(n->netclient_type);
    n->netclient_type = NULL;
1854

1855 1856 1857
    g_free(n->mac_table.macs);
    g_free(n->vlans);

1858 1859 1860
    max_queues = n->multiqueue ? n->max_queues : 1;
    for (i = 0; i < max_queues; i++) {
        virtio_net_del_queue(n, i);
1861 1862
    }

J
Jason Wang 已提交
1863 1864
    timer_del(n->announce_timer);
    timer_free(n->announce_timer);
1865 1866
    g_free(n->vqs);
    qemu_del_nic(n->nic);
1867
    virtio_cleanup(vdev);
1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878
}

static void virtio_net_instance_init(Object *obj)
{
    VirtIONet *n = VIRTIO_NET(obj);

    /*
     * The default config_size is sizeof(struct virtio_net_config).
     * Can be overriden with virtio_net_set_config_size.
     */
    n->config_size = sizeof(struct virtio_net_config);
1879 1880 1881
    device_add_bootindex_property(obj, &n->nic_conf.bootindex,
                                  "bootindex", "/ethernet-phy@0",
                                  DEVICE(n), NULL);
1882 1883
}

1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902
static void virtio_net_pre_save(void *opaque)
{
    VirtIONet *n = opaque;

    /* At this point, backend must be stopped, otherwise
     * it might keep writing to memory. */
    assert(!n->vhost_started);
}

static const VMStateDescription vmstate_virtio_net = {
    .name = "virtio-net",
    .minimum_version_id = VIRTIO_NET_VM_VERSION,
    .version_id = VIRTIO_NET_VM_VERSION,
    .fields = (VMStateField[]) {
        VMSTATE_VIRTIO_DEVICE,
        VMSTATE_END_OF_LIST()
    },
    .pre_save = virtio_net_pre_save,
};
1903

1904
static Property virtio_net_properties[] = {
1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943
    DEFINE_PROP_BIT("csum", VirtIONet, host_features, VIRTIO_NET_F_CSUM, true),
    DEFINE_PROP_BIT("guest_csum", VirtIONet, host_features,
                    VIRTIO_NET_F_GUEST_CSUM, true),
    DEFINE_PROP_BIT("gso", VirtIONet, host_features, VIRTIO_NET_F_GSO, true),
    DEFINE_PROP_BIT("guest_tso4", VirtIONet, host_features,
                    VIRTIO_NET_F_GUEST_TSO4, true),
    DEFINE_PROP_BIT("guest_tso6", VirtIONet, host_features,
                    VIRTIO_NET_F_GUEST_TSO6, true),
    DEFINE_PROP_BIT("guest_ecn", VirtIONet, host_features,
                    VIRTIO_NET_F_GUEST_ECN, true),
    DEFINE_PROP_BIT("guest_ufo", VirtIONet, host_features,
                    VIRTIO_NET_F_GUEST_UFO, true),
    DEFINE_PROP_BIT("guest_announce", VirtIONet, host_features,
                    VIRTIO_NET_F_GUEST_ANNOUNCE, true),
    DEFINE_PROP_BIT("host_tso4", VirtIONet, host_features,
                    VIRTIO_NET_F_HOST_TSO4, true),
    DEFINE_PROP_BIT("host_tso6", VirtIONet, host_features,
                    VIRTIO_NET_F_HOST_TSO6, true),
    DEFINE_PROP_BIT("host_ecn", VirtIONet, host_features,
                    VIRTIO_NET_F_HOST_ECN, true),
    DEFINE_PROP_BIT("host_ufo", VirtIONet, host_features,
                    VIRTIO_NET_F_HOST_UFO, true),
    DEFINE_PROP_BIT("mrg_rxbuf", VirtIONet, host_features,
                    VIRTIO_NET_F_MRG_RXBUF, true),
    DEFINE_PROP_BIT("status", VirtIONet, host_features,
                    VIRTIO_NET_F_STATUS, true),
    DEFINE_PROP_BIT("ctrl_vq", VirtIONet, host_features,
                    VIRTIO_NET_F_CTRL_VQ, true),
    DEFINE_PROP_BIT("ctrl_rx", VirtIONet, host_features,
                    VIRTIO_NET_F_CTRL_RX, true),
    DEFINE_PROP_BIT("ctrl_vlan", VirtIONet, host_features,
                    VIRTIO_NET_F_CTRL_VLAN, true),
    DEFINE_PROP_BIT("ctrl_rx_extra", VirtIONet, host_features,
                    VIRTIO_NET_F_CTRL_RX_EXTRA, true),
    DEFINE_PROP_BIT("ctrl_mac_addr", VirtIONet, host_features,
                    VIRTIO_NET_F_CTRL_MAC_ADDR, true),
    DEFINE_PROP_BIT("ctrl_guest_offloads", VirtIONet, host_features,
                    VIRTIO_NET_F_CTRL_GUEST_OFFLOADS, true),
    DEFINE_PROP_BIT("mq", VirtIONet, host_features, VIRTIO_NET_F_MQ, false),
1944 1945
    DEFINE_NIC_PROPERTIES(VirtIONet, nic_conf),
    DEFINE_PROP_UINT32("x-txtimer", VirtIONet, net_conf.txtimer,
1946
                       TX_TIMER_INTERVAL),
1947 1948
    DEFINE_PROP_INT32("x-txburst", VirtIONet, net_conf.txburst, TX_BURST),
    DEFINE_PROP_STRING("tx", VirtIONet, net_conf.tx),
1949 1950
    DEFINE_PROP_UINT16("rx_queue_size", VirtIONet, net_conf.rx_queue_size,
                       VIRTIO_NET_RX_QUEUE_DEFAULT_SIZE),
1951 1952 1953 1954 1955 1956 1957
    DEFINE_PROP_END_OF_LIST(),
};

static void virtio_net_class_init(ObjectClass *klass, void *data)
{
    DeviceClass *dc = DEVICE_CLASS(klass);
    VirtioDeviceClass *vdc = VIRTIO_DEVICE_CLASS(klass);
1958

1959
    dc->props = virtio_net_properties;
1960
    dc->vmsd = &vmstate_virtio_net;
1961
    set_bit(DEVICE_CATEGORY_NETWORK, dc->categories);
1962
    vdc->realize = virtio_net_device_realize;
1963
    vdc->unrealize = virtio_net_device_unrealize;
1964 1965 1966 1967 1968 1969 1970 1971 1972
    vdc->get_config = virtio_net_get_config;
    vdc->set_config = virtio_net_set_config;
    vdc->get_features = virtio_net_get_features;
    vdc->set_features = virtio_net_set_features;
    vdc->bad_features = virtio_net_bad_features;
    vdc->reset = virtio_net_reset;
    vdc->set_status = virtio_net_set_status;
    vdc->guest_notifier_mask = virtio_net_guest_notifier_mask;
    vdc->guest_notifier_pending = virtio_net_guest_notifier_pending;
1973 1974
    vdc->load = virtio_net_load_device;
    vdc->save = virtio_net_save_device;
1975
    vdc->legacy_features |= (0x1 << VIRTIO_NET_F_GSO);
1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991
}

static const TypeInfo virtio_net_info = {
    .name = TYPE_VIRTIO_NET,
    .parent = TYPE_VIRTIO_DEVICE,
    .instance_size = sizeof(VirtIONet),
    .instance_init = virtio_net_instance_init,
    .class_init = virtio_net_class_init,
};

static void virtio_register_types(void)
{
    type_register_static(&virtio_net_info);
}

type_init(virtio_register_types)