virtio-net.c 56.0 KB
Newer Older
A
aliguori 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13
/*
 * Virtio Network Device
 *
 * Copyright IBM, Corp. 2007
 *
 * Authors:
 *  Anthony Liguori   <aliguori@us.ibm.com>
 *
 * This work is licensed under the terms of the GNU GPL, version 2.  See
 * the COPYING file in the top-level directory.
 *
 */

14
#include "qemu/iov.h"
P
Paolo Bonzini 已提交
15
#include "hw/virtio/virtio.h"
P
Paolo Bonzini 已提交
16
#include "net/net.h"
17
#include "net/checksum.h"
18
#include "net/tap.h"
19 20
#include "qemu/error-report.h"
#include "qemu/timer.h"
P
Paolo Bonzini 已提交
21 22
#include "hw/virtio/virtio-net.h"
#include "net/vhost_net.h"
23
#include "hw/virtio/virtio-bus.h"
24
#include "qapi/qmp/qjson.h"
25
#include "qapi-event.h"
26
#include "hw/virtio/virtio-access.h"
A
aliguori 已提交
27

28
#define VIRTIO_NET_VM_VERSION    11
29

30
#define MAC_TABLE_ENTRIES    64
31
#define MAX_VLAN    (1 << 12)   /* Per 802.1Q definition */
32

33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54
/*
 * Calculate the number of bytes up to and including the given 'field' of
 * 'container'.
 */
#define endof(container, field) \
    (offsetof(container, field) + sizeof(((container *)0)->field))

typedef struct VirtIOFeature {
    uint32_t flags;
    size_t end;
} VirtIOFeature;

static VirtIOFeature feature_sizes[] = {
    {.flags = 1 << VIRTIO_NET_F_MAC,
     .end = endof(struct virtio_net_config, mac)},
    {.flags = 1 << VIRTIO_NET_F_STATUS,
     .end = endof(struct virtio_net_config, status)},
    {.flags = 1 << VIRTIO_NET_F_MQ,
     .end = endof(struct virtio_net_config, max_virtqueue_pairs)},
    {}
};

J
Jason Wang 已提交
55
static VirtIONetQueue *virtio_net_get_subqueue(NetClientState *nc)
56 57 58
{
    VirtIONet *n = qemu_get_nic_opaque(nc);

J
Jason Wang 已提交
59
    return &n->vqs[nc->queue_index];
60
}
J
Jason Wang 已提交
61 62 63 64 65 66

static int vq2q(int queue_index)
{
    return queue_index / 2;
}

A
aliguori 已提交
67 68 69 70
/* TODO
 * - we could suppress RX interrupt if we were so inclined.
 */

71
static void virtio_net_get_config(VirtIODevice *vdev, uint8_t *config)
A
aliguori 已提交
72
{
73
    VirtIONet *n = VIRTIO_NET(vdev);
A
aliguori 已提交
74 75
    struct virtio_net_config netcfg;

76 77
    virtio_stw_p(vdev, &netcfg.status, n->status);
    virtio_stw_p(vdev, &netcfg.max_virtqueue_pairs, n->max_queues);
78
    memcpy(netcfg.mac, n->mac, ETH_ALEN);
79
    memcpy(config, &netcfg, n->config_size);
A
aliguori 已提交
80 81
}

82 83
static void virtio_net_set_config(VirtIODevice *vdev, const uint8_t *config)
{
84
    VirtIONet *n = VIRTIO_NET(vdev);
85
    struct virtio_net_config netcfg = {};
86

87
    memcpy(&netcfg, config, n->config_size);
88

89 90
    if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_MAC_ADDR) &&
        !virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1) &&
91
        memcmp(netcfg.mac, n->mac, ETH_ALEN)) {
92
        memcpy(n->mac, netcfg.mac, ETH_ALEN);
J
Jason Wang 已提交
93
        qemu_format_nic_info_str(qemu_get_queue(n->nic), n->mac);
94 95 96
    }
}

97 98
static bool virtio_net_started(VirtIONet *n, uint8_t status)
{
99
    VirtIODevice *vdev = VIRTIO_DEVICE(n);
100
    return (status & VIRTIO_CONFIG_S_DRIVER_OK) &&
101
        (n->status & VIRTIO_NET_S_LINK_UP) && vdev->vm_running;
102 103
}

J
Jason Wang 已提交
104 105 106 107 108 109 110 111 112 113
static void virtio_net_announce_timer(void *opaque)
{
    VirtIONet *n = opaque;
    VirtIODevice *vdev = VIRTIO_DEVICE(n);

    n->announce_counter--;
    n->status |= VIRTIO_NET_S_ANNOUNCE;
    virtio_notify_config(vdev);
}

114
static void virtio_net_vhost_status(VirtIONet *n, uint8_t status)
115
{
116
    VirtIODevice *vdev = VIRTIO_DEVICE(n);
J
Jason Wang 已提交
117
    NetClientState *nc = qemu_get_queue(n->nic);
J
Jason Wang 已提交
118
    int queues = n->multiqueue ? n->max_queues : 1;
J
Jason Wang 已提交
119

120
    if (!get_vhost_net(nc->peer)) {
121 122
        return;
    }
J
Jason Wang 已提交
123

124 125
    if ((virtio_net_started(n, status) && !nc->peer->link_down) ==
        !!n->vhost_started) {
126 127 128
        return;
    }
    if (!n->vhost_started) {
129 130 131 132 133 134 135 136 137 138 139 140 141
        int r, i;

        /* Any packets outstanding? Purge them to avoid touching rings
         * when vhost is running.
         */
        for (i = 0;  i < queues; i++) {
            NetClientState *qnc = qemu_get_subqueue(n->nic, i);

            /* Purge both directions: TX and RX. */
            qemu_net_queue_purge(qnc->peer->incoming_queue, qnc);
            qemu_net_queue_purge(qnc->incoming_queue, qnc->peer);
        }

142
        n->vhost_started = 1;
143
        r = vhost_net_start(vdev, n->nic->ncs, queues);
144
        if (r < 0) {
145 146
            error_report("unable to start vhost net: %d: "
                         "falling back on userspace virtio", -r);
147
            n->vhost_started = 0;
148 149
        }
    } else {
150
        vhost_net_stop(vdev, n->nic->ncs, queues);
151 152 153 154
        n->vhost_started = 0;
    }
}

155 156
static void virtio_net_set_status(struct VirtIODevice *vdev, uint8_t status)
{
157
    VirtIONet *n = VIRTIO_NET(vdev);
J
Jason Wang 已提交
158 159 160
    VirtIONetQueue *q;
    int i;
    uint8_t queue_status;
161 162 163

    virtio_net_vhost_status(n, status);

J
Jason Wang 已提交
164
    for (i = 0; i < n->max_queues; i++) {
165 166
        NetClientState *ncs = qemu_get_subqueue(n->nic, i);
        bool queue_started;
J
Jason Wang 已提交
167
        q = &n->vqs[i];
168

J
Jason Wang 已提交
169 170
        if ((!n->multiqueue && i != 0) || i >= n->curr_queues) {
            queue_status = 0;
171
        } else {
J
Jason Wang 已提交
172
            queue_status = status;
173
        }
174 175 176 177 178 179
        queue_started =
            virtio_net_started(n, queue_status) && !n->vhost_started;

        if (queue_started) {
            qemu_flush_queued_packets(ncs);
        }
J
Jason Wang 已提交
180 181 182 183 184

        if (!q->tx_waiting) {
            continue;
        }

185
        if (queue_started) {
J
Jason Wang 已提交
186
            if (q->tx_timer) {
187 188
                timer_mod(q->tx_timer,
                               qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + n->tx_timeout);
J
Jason Wang 已提交
189 190 191
            } else {
                qemu_bh_schedule(q->tx_bh);
            }
192
        } else {
J
Jason Wang 已提交
193
            if (q->tx_timer) {
194
                timer_del(q->tx_timer);
J
Jason Wang 已提交
195 196 197
            } else {
                qemu_bh_cancel(q->tx_bh);
            }
198 199 200 201
        }
    }
}

202
static void virtio_net_set_link_status(NetClientState *nc)
203
{
J
Jason Wang 已提交
204
    VirtIONet *n = qemu_get_nic_opaque(nc);
205
    VirtIODevice *vdev = VIRTIO_DEVICE(n);
206 207
    uint16_t old_status = n->status;

M
Mark McLoughlin 已提交
208
    if (nc->link_down)
209 210 211 212 213
        n->status &= ~VIRTIO_NET_S_LINK_UP;
    else
        n->status |= VIRTIO_NET_S_LINK_UP;

    if (n->status != old_status)
214
        virtio_notify_config(vdev);
215

216
    virtio_net_set_status(vdev, vdev->status);
217 218
}

219 220 221 222 223
static void rxfilter_notify(NetClientState *nc)
{
    VirtIONet *n = qemu_get_nic_opaque(nc);

    if (nc->rxfilter_notify_enabled) {
224
        gchar *path = object_get_canonical_path(OBJECT(n->qdev));
225 226
        qapi_event_send_nic_rx_filter_changed(!!n->netclient_name,
                                              n->netclient_name, path, &error_abort);
227
        g_free(path);
228 229 230 231 232 233

        /* disable event notification to avoid events flooding */
        nc->rxfilter_notify_enabled = 0;
    }
}

234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253
static intList *get_vlan_table(VirtIONet *n)
{
    intList *list, *entry;
    int i, j;

    list = NULL;
    for (i = 0; i < MAX_VLAN >> 5; i++) {
        for (j = 0; n->vlans[i] && j <= 0x1f; j++) {
            if (n->vlans[i] & (1U << j)) {
                entry = g_malloc0(sizeof(*entry));
                entry->value = (i << 5) + j;
                entry->next = list;
                list = entry;
            }
        }
    }

    return list;
}

254 255 256
static RxFilterInfo *virtio_net_query_rxfilter(NetClientState *nc)
{
    VirtIONet *n = qemu_get_nic_opaque(nc);
257
    VirtIODevice *vdev = VIRTIO_DEVICE(n);
258 259
    RxFilterInfo *info;
    strList *str_list, *entry;
260
    int i;
261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285

    info = g_malloc0(sizeof(*info));
    info->name = g_strdup(nc->name);
    info->promiscuous = n->promisc;

    if (n->nouni) {
        info->unicast = RX_STATE_NONE;
    } else if (n->alluni) {
        info->unicast = RX_STATE_ALL;
    } else {
        info->unicast = RX_STATE_NORMAL;
    }

    if (n->nomulti) {
        info->multicast = RX_STATE_NONE;
    } else if (n->allmulti) {
        info->multicast = RX_STATE_ALL;
    } else {
        info->multicast = RX_STATE_NORMAL;
    }

    info->broadcast_allowed = n->nobcast;
    info->multicast_overflow = n->mac_table.multi_overflow;
    info->unicast_overflow = n->mac_table.uni_overflow;

286
    info->main_mac = qemu_mac_strdup_printf(n->mac);
287 288 289 290

    str_list = NULL;
    for (i = 0; i < n->mac_table.first_multi; i++) {
        entry = g_malloc0(sizeof(*entry));
291
        entry->value = qemu_mac_strdup_printf(n->mac_table.macs + i * ETH_ALEN);
292 293 294 295 296 297 298 299
        entry->next = str_list;
        str_list = entry;
    }
    info->unicast_table = str_list;

    str_list = NULL;
    for (i = n->mac_table.first_multi; i < n->mac_table.in_use; i++) {
        entry = g_malloc0(sizeof(*entry));
300
        entry->value = qemu_mac_strdup_printf(n->mac_table.macs + i * ETH_ALEN);
301 302 303 304
        entry->next = str_list;
        str_list = entry;
    }
    info->multicast_table = str_list;
305
    info->vlan_table = get_vlan_table(n);
306

307
    if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_VLAN)) {
308 309 310 311 312
        info->vlan = RX_STATE_ALL;
    } else if (!info->vlan_table) {
        info->vlan = RX_STATE_NONE;
    } else {
        info->vlan = RX_STATE_NORMAL;
313 314 315 316 317 318 319 320
    }

    /* enable event notification after query */
    nc->rxfilter_notify_enabled = 1;

    return info;
}

321 322
static void virtio_net_reset(VirtIODevice *vdev)
{
323
    VirtIONet *n = VIRTIO_NET(vdev);
324 325 326 327

    /* Reset back to compatibility mode */
    n->promisc = 1;
    n->allmulti = 0;
328 329 330 331
    n->alluni = 0;
    n->nomulti = 0;
    n->nouni = 0;
    n->nobcast = 0;
J
Jason Wang 已提交
332 333
    /* multiqueue is disabled by default */
    n->curr_queues = 1;
J
Jason Wang 已提交
334 335 336
    timer_del(n->announce_timer);
    n->announce_counter = 0;
    n->status &= ~VIRTIO_NET_S_ANNOUNCE;
337

338
    /* Flush any MAC and VLAN filter table state */
339
    n->mac_table.in_use = 0;
340
    n->mac_table.first_multi = 0;
341 342
    n->mac_table.multi_overflow = 0;
    n->mac_table.uni_overflow = 0;
343
    memset(n->mac_table.macs, 0, MAC_TABLE_ENTRIES * ETH_ALEN);
344
    memcpy(&n->mac[0], &n->nic->conf->macaddr, sizeof(n->mac));
345
    qemu_format_nic_info_str(qemu_get_queue(n->nic), n->mac);
346
    memset(n->vlans, 0, MAX_VLAN >> 3);
347 348
}

349
static void peer_test_vnet_hdr(VirtIONet *n)
M
Mark McLoughlin 已提交
350
{
J
Jason Wang 已提交
351 352
    NetClientState *nc = qemu_get_queue(n->nic);
    if (!nc->peer) {
353
        return;
J
Jason Wang 已提交
354
    }
M
Mark McLoughlin 已提交
355

356
    n->has_vnet_hdr = qemu_has_vnet_hdr(nc->peer);
357
}
M
Mark McLoughlin 已提交
358

359 360
static int peer_has_vnet_hdr(VirtIONet *n)
{
M
Mark McLoughlin 已提交
361 362 363
    return n->has_vnet_hdr;
}

364 365 366 367 368
static int peer_has_ufo(VirtIONet *n)
{
    if (!peer_has_vnet_hdr(n))
        return 0;

369
    n->has_ufo = qemu_has_ufo(qemu_get_queue(n->nic)->peer);
370 371 372 373

    return n->has_ufo;
}

374 375
static void virtio_net_set_mrg_rx_bufs(VirtIONet *n, int mergeable_rx_bufs,
                                       int version_1)
376
{
J
Jason Wang 已提交
377 378 379
    int i;
    NetClientState *nc;

380 381
    n->mergeable_rx_bufs = mergeable_rx_bufs;

382 383 384 385 386 387 388
    if (version_1) {
        n->guest_hdr_len = sizeof(struct virtio_net_hdr_mrg_rxbuf);
    } else {
        n->guest_hdr_len = n->mergeable_rx_bufs ?
            sizeof(struct virtio_net_hdr_mrg_rxbuf) :
            sizeof(struct virtio_net_hdr);
    }
389

J
Jason Wang 已提交
390 391 392 393
    for (i = 0; i < n->max_queues; i++) {
        nc = qemu_get_subqueue(n->nic, i);

        if (peer_has_vnet_hdr(n) &&
394 395
            qemu_has_vnet_hdr_len(nc->peer, n->guest_hdr_len)) {
            qemu_set_vnet_hdr_len(nc->peer, n->guest_hdr_len);
J
Jason Wang 已提交
396 397
            n->host_hdr_len = n->guest_hdr_len;
        }
398 399 400
    }
}

J
Jason Wang 已提交
401 402 403 404 405 406 407 408
static int peer_attach(VirtIONet *n, int index)
{
    NetClientState *nc = qemu_get_subqueue(n->nic, index);

    if (!nc->peer) {
        return 0;
    }

409 410 411 412
    if (nc->peer->info->type == NET_CLIENT_OPTIONS_KIND_VHOST_USER) {
        vhost_set_vring_enable(nc->peer, 1);
    }

J
Jason Wang 已提交
413 414 415 416 417 418 419 420 421 422 423 424 425 426 427
    if (nc->peer->info->type != NET_CLIENT_OPTIONS_KIND_TAP) {
        return 0;
    }

    return tap_enable(nc->peer);
}

static int peer_detach(VirtIONet *n, int index)
{
    NetClientState *nc = qemu_get_subqueue(n->nic, index);

    if (!nc->peer) {
        return 0;
    }

428 429 430 431
    if (nc->peer->info->type == NET_CLIENT_OPTIONS_KIND_VHOST_USER) {
        vhost_set_vring_enable(nc->peer, 0);
    }

J
Jason Wang 已提交
432 433 434 435 436 437 438 439 440 441
    if (nc->peer->info->type !=  NET_CLIENT_OPTIONS_KIND_TAP) {
        return 0;
    }

    return tap_disable(nc->peer);
}

static void virtio_net_set_queues(VirtIONet *n)
{
    int i;
442
    int r;
J
Jason Wang 已提交
443 444 445

    for (i = 0; i < n->max_queues; i++) {
        if (i < n->curr_queues) {
446 447
            r = peer_attach(n, i);
            assert(!r);
J
Jason Wang 已提交
448
        } else {
449 450
            r = peer_detach(n, i);
            assert(!r);
J
Jason Wang 已提交
451 452 453 454
        }
    }
}

J
Jason Wang 已提交
455
static void virtio_net_set_multiqueue(VirtIONet *n, int multiqueue);
J
Jason Wang 已提交
456

J
Jason Wang 已提交
457 458
static uint64_t virtio_net_get_features(VirtIODevice *vdev, uint64_t features,
                                        Error **errp)
A
aliguori 已提交
459
{
460
    VirtIONet *n = VIRTIO_NET(vdev);
J
Jason Wang 已提交
461
    NetClientState *nc = qemu_get_queue(n->nic);
A
aliguori 已提交
462

463 464 465
    /* Firstly sync all virtio-net possible supported features */
    features |= n->host_features;

466
    virtio_add_feature(&features, VIRTIO_NET_F_MAC);
467

468
    if (!peer_has_vnet_hdr(n)) {
469 470 471 472
        virtio_clear_feature(&features, VIRTIO_NET_F_CSUM);
        virtio_clear_feature(&features, VIRTIO_NET_F_HOST_TSO4);
        virtio_clear_feature(&features, VIRTIO_NET_F_HOST_TSO6);
        virtio_clear_feature(&features, VIRTIO_NET_F_HOST_ECN);
473

474 475 476 477
        virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_CSUM);
        virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_TSO4);
        virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_TSO6);
        virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_ECN);
478
    }
M
Mark McLoughlin 已提交
479

480
    if (!peer_has_vnet_hdr(n) || !peer_has_ufo(n)) {
481 482
        virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_UFO);
        virtio_clear_feature(&features, VIRTIO_NET_F_HOST_UFO);
M
Mark McLoughlin 已提交
483 484
    }

485
    if (!get_vhost_net(nc->peer)) {
486 487
        return features;
    }
488
    return vhost_net_get_features(get_vhost_net(nc->peer), features);
A
aliguori 已提交
489 490
}

G
Gerd Hoffmann 已提交
491
static uint64_t virtio_net_bad_features(VirtIODevice *vdev)
492
{
G
Gerd Hoffmann 已提交
493
    uint64_t features = 0;
494 495 496

    /* Linux kernel 2.6.25.  It understood MAC (as everyone must),
     * but also these: */
497 498 499 500 501
    virtio_add_feature(&features, VIRTIO_NET_F_MAC);
    virtio_add_feature(&features, VIRTIO_NET_F_CSUM);
    virtio_add_feature(&features, VIRTIO_NET_F_HOST_TSO4);
    virtio_add_feature(&features, VIRTIO_NET_F_HOST_TSO6);
    virtio_add_feature(&features, VIRTIO_NET_F_HOST_ECN);
502

503
    return features;
504 505
}

506 507
static void virtio_net_apply_guest_offloads(VirtIONet *n)
{
508
    qemu_set_offload(qemu_get_queue(n->nic)->peer,
509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533
            !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_CSUM)),
            !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_TSO4)),
            !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_TSO6)),
            !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_ECN)),
            !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_UFO)));
}

static uint64_t virtio_net_guest_offloads_by_features(uint32_t features)
{
    static const uint64_t guest_offloads_mask =
        (1ULL << VIRTIO_NET_F_GUEST_CSUM) |
        (1ULL << VIRTIO_NET_F_GUEST_TSO4) |
        (1ULL << VIRTIO_NET_F_GUEST_TSO6) |
        (1ULL << VIRTIO_NET_F_GUEST_ECN)  |
        (1ULL << VIRTIO_NET_F_GUEST_UFO);

    return guest_offloads_mask & features;
}

static inline uint64_t virtio_net_supported_guest_offloads(VirtIONet *n)
{
    VirtIODevice *vdev = VIRTIO_DEVICE(n);
    return virtio_net_guest_offloads_by_features(vdev->guest_features);
}

G
Gerd Hoffmann 已提交
534
static void virtio_net_set_features(VirtIODevice *vdev, uint64_t features)
A
aliguori 已提交
535
{
536
    VirtIONet *n = VIRTIO_NET(vdev);
J
Jason Wang 已提交
537 538
    int i;

539
    virtio_net_set_multiqueue(n,
540
                              virtio_has_feature(features, VIRTIO_NET_F_MQ));
A
aliguori 已提交
541

542
    virtio_net_set_mrg_rx_bufs(n,
543 544 545 546
                               virtio_has_feature(features,
                                                  VIRTIO_NET_F_MRG_RXBUF),
                               virtio_has_feature(features,
                                                  VIRTIO_F_VERSION_1));
547 548

    if (n->has_vnet_hdr) {
549 550 551
        n->curr_guest_offloads =
            virtio_net_guest_offloads_by_features(features);
        virtio_net_apply_guest_offloads(n);
552
    }
J
Jason Wang 已提交
553 554 555 556

    for (i = 0;  i < n->max_queues; i++) {
        NetClientState *nc = qemu_get_subqueue(n->nic, i);

557
        if (!get_vhost_net(nc->peer)) {
J
Jason Wang 已提交
558 559
            continue;
        }
560
        vhost_net_ack_features(get_vhost_net(nc->peer), features);
D
David L Stevens 已提交
561
    }
562

563
    if (virtio_has_feature(features, VIRTIO_NET_F_CTRL_VLAN)) {
564 565 566 567
        memset(n->vlans, 0, MAX_VLAN >> 3);
    } else {
        memset(n->vlans, 0xff, MAX_VLAN >> 3);
    }
A
aliguori 已提交
568 569
}

570
static int virtio_net_handle_rx_mode(VirtIONet *n, uint8_t cmd,
571
                                     struct iovec *iov, unsigned int iov_cnt)
572 573
{
    uint8_t on;
574
    size_t s;
575
    NetClientState *nc = qemu_get_queue(n->nic);
576

577 578 579
    s = iov_to_buf(iov, iov_cnt, 0, &on, sizeof(on));
    if (s != sizeof(on)) {
        return VIRTIO_NET_ERR;
580 581
    }

A
Amos Kong 已提交
582
    if (cmd == VIRTIO_NET_CTRL_RX_PROMISC) {
583
        n->promisc = on;
A
Amos Kong 已提交
584
    } else if (cmd == VIRTIO_NET_CTRL_RX_ALLMULTI) {
585
        n->allmulti = on;
A
Amos Kong 已提交
586
    } else if (cmd == VIRTIO_NET_CTRL_RX_ALLUNI) {
587
        n->alluni = on;
A
Amos Kong 已提交
588
    } else if (cmd == VIRTIO_NET_CTRL_RX_NOMULTI) {
589
        n->nomulti = on;
A
Amos Kong 已提交
590
    } else if (cmd == VIRTIO_NET_CTRL_RX_NOUNI) {
591
        n->nouni = on;
A
Amos Kong 已提交
592
    } else if (cmd == VIRTIO_NET_CTRL_RX_NOBCAST) {
593
        n->nobcast = on;
594
    } else {
595
        return VIRTIO_NET_ERR;
596
    }
597

598 599
    rxfilter_notify(nc);

600 601 602
    return VIRTIO_NET_OK;
}

603 604 605 606 607 608 609
static int virtio_net_handle_offloads(VirtIONet *n, uint8_t cmd,
                                     struct iovec *iov, unsigned int iov_cnt)
{
    VirtIODevice *vdev = VIRTIO_DEVICE(n);
    uint64_t offloads;
    size_t s;

610
    if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_GUEST_OFFLOADS)) {
611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639
        return VIRTIO_NET_ERR;
    }

    s = iov_to_buf(iov, iov_cnt, 0, &offloads, sizeof(offloads));
    if (s != sizeof(offloads)) {
        return VIRTIO_NET_ERR;
    }

    if (cmd == VIRTIO_NET_CTRL_GUEST_OFFLOADS_SET) {
        uint64_t supported_offloads;

        if (!n->has_vnet_hdr) {
            return VIRTIO_NET_ERR;
        }

        supported_offloads = virtio_net_supported_guest_offloads(n);
        if (offloads & ~supported_offloads) {
            return VIRTIO_NET_ERR;
        }

        n->curr_guest_offloads = offloads;
        virtio_net_apply_guest_offloads(n);

        return VIRTIO_NET_OK;
    } else {
        return VIRTIO_NET_ERR;
    }
}

640
static int virtio_net_handle_mac(VirtIONet *n, uint8_t cmd,
641
                                 struct iovec *iov, unsigned int iov_cnt)
642
{
643
    VirtIODevice *vdev = VIRTIO_DEVICE(n);
644
    struct virtio_net_ctrl_mac mac_data;
645
    size_t s;
646
    NetClientState *nc = qemu_get_queue(n->nic);
647

648 649 650 651 652 653
    if (cmd == VIRTIO_NET_CTRL_MAC_ADDR_SET) {
        if (iov_size(iov, iov_cnt) != sizeof(n->mac)) {
            return VIRTIO_NET_ERR;
        }
        s = iov_to_buf(iov, iov_cnt, 0, &n->mac, sizeof(n->mac));
        assert(s == sizeof(n->mac));
J
Jason Wang 已提交
654
        qemu_format_nic_info_str(qemu_get_queue(n->nic), n->mac);
655 656
        rxfilter_notify(nc);

657 658 659
        return VIRTIO_NET_OK;
    }

660
    if (cmd != VIRTIO_NET_CTRL_MAC_TABLE_SET) {
661
        return VIRTIO_NET_ERR;
662
    }
663

664 665 666 667 668
    int in_use = 0;
    int first_multi = 0;
    uint8_t uni_overflow = 0;
    uint8_t multi_overflow = 0;
    uint8_t *macs = g_malloc0(MAC_TABLE_ENTRIES * ETH_ALEN);
669

670 671
    s = iov_to_buf(iov, iov_cnt, 0, &mac_data.entries,
                   sizeof(mac_data.entries));
672
    mac_data.entries = virtio_ldl_p(vdev, &mac_data.entries);
673
    if (s != sizeof(mac_data.entries)) {
674
        goto error;
675 676
    }
    iov_discard_front(&iov, &iov_cnt, s);
677

678
    if (mac_data.entries * ETH_ALEN > iov_size(iov, iov_cnt)) {
679
        goto error;
680
    }
681 682

    if (mac_data.entries <= MAC_TABLE_ENTRIES) {
683
        s = iov_to_buf(iov, iov_cnt, 0, macs,
684 685
                       mac_data.entries * ETH_ALEN);
        if (s != mac_data.entries * ETH_ALEN) {
686
            goto error;
687
        }
688
        in_use += mac_data.entries;
689
    } else {
690
        uni_overflow = 1;
691 692
    }

693 694
    iov_discard_front(&iov, &iov_cnt, mac_data.entries * ETH_ALEN);

695
    first_multi = in_use;
696

697 698
    s = iov_to_buf(iov, iov_cnt, 0, &mac_data.entries,
                   sizeof(mac_data.entries));
699
    mac_data.entries = virtio_ldl_p(vdev, &mac_data.entries);
700
    if (s != sizeof(mac_data.entries)) {
701
        goto error;
702 703 704
    }

    iov_discard_front(&iov, &iov_cnt, s);
705

706
    if (mac_data.entries * ETH_ALEN != iov_size(iov, iov_cnt)) {
707
        goto error;
708
    }
709

710
    if (mac_data.entries <= MAC_TABLE_ENTRIES - in_use) {
711
        s = iov_to_buf(iov, iov_cnt, 0, &macs[in_use * ETH_ALEN],
712 713
                       mac_data.entries * ETH_ALEN);
        if (s != mac_data.entries * ETH_ALEN) {
714
            goto error;
715
        }
716
        in_use += mac_data.entries;
717
    } else {
718
        multi_overflow = 1;
719 720
    }

721 722 723 724 725 726
    n->mac_table.in_use = in_use;
    n->mac_table.first_multi = first_multi;
    n->mac_table.uni_overflow = uni_overflow;
    n->mac_table.multi_overflow = multi_overflow;
    memcpy(n->mac_table.macs, macs, MAC_TABLE_ENTRIES * ETH_ALEN);
    g_free(macs);
727 728
    rxfilter_notify(nc);

729
    return VIRTIO_NET_OK;
730 731

error:
732
    g_free(macs);
733
    return VIRTIO_NET_ERR;
734 735
}

736
static int virtio_net_handle_vlan_table(VirtIONet *n, uint8_t cmd,
737
                                        struct iovec *iov, unsigned int iov_cnt)
738
{
739
    VirtIODevice *vdev = VIRTIO_DEVICE(n);
740
    uint16_t vid;
741
    size_t s;
742
    NetClientState *nc = qemu_get_queue(n->nic);
743

744
    s = iov_to_buf(iov, iov_cnt, 0, &vid, sizeof(vid));
745
    vid = virtio_lduw_p(vdev, &vid);
746
    if (s != sizeof(vid)) {
747 748 749 750 751 752 753 754 755 756 757 758 759
        return VIRTIO_NET_ERR;
    }

    if (vid >= MAX_VLAN)
        return VIRTIO_NET_ERR;

    if (cmd == VIRTIO_NET_CTRL_VLAN_ADD)
        n->vlans[vid >> 5] |= (1U << (vid & 0x1f));
    else if (cmd == VIRTIO_NET_CTRL_VLAN_DEL)
        n->vlans[vid >> 5] &= ~(1U << (vid & 0x1f));
    else
        return VIRTIO_NET_ERR;

760 761
    rxfilter_notify(nc);

762 763 764
    return VIRTIO_NET_OK;
}

J
Jason Wang 已提交
765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781
static int virtio_net_handle_announce(VirtIONet *n, uint8_t cmd,
                                      struct iovec *iov, unsigned int iov_cnt)
{
    if (cmd == VIRTIO_NET_CTRL_ANNOUNCE_ACK &&
        n->status & VIRTIO_NET_S_ANNOUNCE) {
        n->status &= ~VIRTIO_NET_S_ANNOUNCE;
        if (n->announce_counter) {
            timer_mod(n->announce_timer,
                      qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL) +
                      self_announce_delay(n->announce_counter));
        }
        return VIRTIO_NET_OK;
    } else {
        return VIRTIO_NET_ERR;
    }
}

J
Jason Wang 已提交
782
static int virtio_net_handle_mq(VirtIONet *n, uint8_t cmd,
783
                                struct iovec *iov, unsigned int iov_cnt)
J
Jason Wang 已提交
784
{
785
    VirtIODevice *vdev = VIRTIO_DEVICE(n);
786 787 788
    struct virtio_net_ctrl_mq mq;
    size_t s;
    uint16_t queues;
J
Jason Wang 已提交
789

790 791
    s = iov_to_buf(iov, iov_cnt, 0, &mq, sizeof(mq));
    if (s != sizeof(mq)) {
J
Jason Wang 已提交
792 793 794 795 796 797 798
        return VIRTIO_NET_ERR;
    }

    if (cmd != VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET) {
        return VIRTIO_NET_ERR;
    }

799
    queues = virtio_lduw_p(vdev, &mq.virtqueue_pairs);
J
Jason Wang 已提交
800

801 802 803
    if (queues < VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MIN ||
        queues > VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MAX ||
        queues > n->max_queues ||
J
Jason Wang 已提交
804 805 806 807
        !n->multiqueue) {
        return VIRTIO_NET_ERR;
    }

808
    n->curr_queues = queues;
J
Jason Wang 已提交
809 810
    /* stop the backend before changing the number of queues to avoid handling a
     * disabled queue */
811
    virtio_net_set_status(vdev, vdev->status);
J
Jason Wang 已提交
812 813 814 815
    virtio_net_set_queues(n);

    return VIRTIO_NET_OK;
}
816 817
static void virtio_net_handle_ctrl(VirtIODevice *vdev, VirtQueue *vq)
{
818
    VirtIONet *n = VIRTIO_NET(vdev);
819 820 821
    struct virtio_net_ctrl_hdr ctrl;
    virtio_net_ctrl_ack status = VIRTIO_NET_ERR;
    VirtQueueElement elem;
822
    size_t s;
J
Jason Wang 已提交
823
    struct iovec *iov, *iov2;
824
    unsigned int iov_cnt;
825 826

    while (virtqueue_pop(vq, &elem)) {
827 828
        if (iov_size(elem.in_sg, elem.in_num) < sizeof(status) ||
            iov_size(elem.out_sg, elem.out_num) < sizeof(ctrl)) {
829
            error_report("virtio-net ctrl missing headers");
830 831 832
            exit(1);
        }

833
        iov_cnt = elem.out_num;
J
Jason Wang 已提交
834
        iov2 = iov = g_memdup(elem.out_sg, sizeof(struct iovec) * elem.out_num);
835 836 837 838
        s = iov_to_buf(iov, iov_cnt, 0, &ctrl, sizeof(ctrl));
        iov_discard_front(&iov, &iov_cnt, sizeof(ctrl));
        if (s != sizeof(ctrl)) {
            status = VIRTIO_NET_ERR;
A
Amos Kong 已提交
839
        } else if (ctrl.class == VIRTIO_NET_CTRL_RX) {
840 841 842 843 844
            status = virtio_net_handle_rx_mode(n, ctrl.cmd, iov, iov_cnt);
        } else if (ctrl.class == VIRTIO_NET_CTRL_MAC) {
            status = virtio_net_handle_mac(n, ctrl.cmd, iov, iov_cnt);
        } else if (ctrl.class == VIRTIO_NET_CTRL_VLAN) {
            status = virtio_net_handle_vlan_table(n, ctrl.cmd, iov, iov_cnt);
J
Jason Wang 已提交
845 846
        } else if (ctrl.class == VIRTIO_NET_CTRL_ANNOUNCE) {
            status = virtio_net_handle_announce(n, ctrl.cmd, iov, iov_cnt);
J
Jason Wang 已提交
847
        } else if (ctrl.class == VIRTIO_NET_CTRL_MQ) {
848
            status = virtio_net_handle_mq(n, ctrl.cmd, iov, iov_cnt);
849 850
        } else if (ctrl.class == VIRTIO_NET_CTRL_GUEST_OFFLOADS) {
            status = virtio_net_handle_offloads(n, ctrl.cmd, iov, iov_cnt);
851 852
        }

853 854
        s = iov_from_buf(elem.in_sg, elem.in_num, 0, &status, sizeof(status));
        assert(s == sizeof(status));
855 856 857

        virtqueue_push(vq, &elem, sizeof(status));
        virtio_notify(vdev, vq);
J
Jason Wang 已提交
858
        g_free(iov2);
859 860 861
    }
}

A
aliguori 已提交
862 863 864 865
/* RX */

static void virtio_net_handle_rx(VirtIODevice *vdev, VirtQueue *vq)
{
866
    VirtIONet *n = VIRTIO_NET(vdev);
J
Jason Wang 已提交
867
    int queue_index = vq2q(virtio_get_queue_index(vq));
868

J
Jason Wang 已提交
869
    qemu_flush_queued_packets(qemu_get_subqueue(n->nic, queue_index));
A
aliguori 已提交
870 871
}

872
static int virtio_net_can_receive(NetClientState *nc)
A
aliguori 已提交
873
{
J
Jason Wang 已提交
874
    VirtIONet *n = qemu_get_nic_opaque(nc);
875
    VirtIODevice *vdev = VIRTIO_DEVICE(n);
J
Jason Wang 已提交
876
    VirtIONetQueue *q = virtio_net_get_subqueue(nc);
877

878
    if (!vdev->vm_running) {
879 880
        return 0;
    }
881

J
Jason Wang 已提交
882 883 884 885
    if (nc->queue_index >= n->curr_queues) {
        return 0;
    }

886
    if (!virtio_queue_ready(q->rx_vq) ||
887
        !(vdev->status & VIRTIO_CONFIG_S_DRIVER_OK)) {
A
aliguori 已提交
888
        return 0;
889
    }
A
aliguori 已提交
890

891 892 893
    return 1;
}

894
static int virtio_net_has_buffers(VirtIONetQueue *q, int bufsize)
895
{
896 897
    VirtIONet *n = q->n;
    if (virtio_queue_empty(q->rx_vq) ||
A
aliguori 已提交
898
        (n->mergeable_rx_bufs &&
899 900
         !virtqueue_avail_bytes(q->rx_vq, bufsize, 0))) {
        virtio_queue_set_notification(q->rx_vq, 1);
901 902 903 904 905

        /* To avoid a race condition where the guest has made some buffers
         * available after the above check but before notification was
         * enabled, check for available buffers again.
         */
906
        if (virtio_queue_empty(q->rx_vq) ||
907
            (n->mergeable_rx_bufs &&
908
             !virtqueue_avail_bytes(q->rx_vq, bufsize, 0))) {
909
            return 0;
910
        }
A
aliguori 已提交
911 912
    }

913
    virtio_queue_set_notification(q->rx_vq, 0);
A
aliguori 已提交
914 915 916
    return 1;
}

917
static void virtio_net_hdr_swap(VirtIODevice *vdev, struct virtio_net_hdr *hdr)
918
{
919 920 921 922
    virtio_tswap16s(vdev, &hdr->hdr_len);
    virtio_tswap16s(vdev, &hdr->gso_size);
    virtio_tswap16s(vdev, &hdr->csum_start);
    virtio_tswap16s(vdev, &hdr->csum_offset);
923 924
}

A
Anthony Liguori 已提交
925 926 927 928 929 930 931 932 933 934 935 936 937 938 939
/* dhclient uses AF_PACKET but doesn't pass auxdata to the kernel so
 * it never finds out that the packets don't have valid checksums.  This
 * causes dhclient to get upset.  Fedora's carried a patch for ages to
 * fix this with Xen but it hasn't appeared in an upstream release of
 * dhclient yet.
 *
 * To avoid breaking existing guests, we catch udp packets and add
 * checksums.  This is terrible but it's better than hacking the guest
 * kernels.
 *
 * N.B. if we introduce a zero-copy API, this operation is no longer free so
 * we should provide a mechanism to disable it to avoid polluting the host
 * cache.
 */
static void work_around_broken_dhclient(struct virtio_net_hdr *hdr,
M
Michael S. Tsirkin 已提交
940
                                        uint8_t *buf, size_t size)
A
Anthony Liguori 已提交
941 942 943 944 945 946
{
    if ((hdr->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM) && /* missing csum */
        (size > 27 && size < 1500) && /* normal sized MTU */
        (buf[12] == 0x08 && buf[13] == 0x00) && /* ethertype == IPv4 */
        (buf[23] == 17) && /* ip.protocol == UDP */
        (buf[34] == 0 && buf[35] == 67)) { /* udp.srcport == bootps */
M
Michael S. Tsirkin 已提交
947
        net_checksum_calculate(buf, size);
A
Anthony Liguori 已提交
948 949 950 951
        hdr->flags &= ~VIRTIO_NET_HDR_F_NEEDS_CSUM;
    }
}

952 953
static void receive_header(VirtIONet *n, const struct iovec *iov, int iov_cnt,
                           const void *buf, size_t size)
A
aliguori 已提交
954
{
M
Mark McLoughlin 已提交
955
    if (n->has_vnet_hdr) {
M
Michael S. Tsirkin 已提交
956 957
        /* FIXME this cast is evil */
        void *wbuf = (void *)buf;
958 959
        work_around_broken_dhclient(wbuf, wbuf + n->host_hdr_len,
                                    size - n->host_hdr_len);
960
        virtio_net_hdr_swap(VIRTIO_DEVICE(n), wbuf);
961
        iov_from_buf(iov, iov_cnt, 0, buf, sizeof(struct virtio_net_hdr));
M
Michael S. Tsirkin 已提交
962 963 964 965 966 967
    } else {
        struct virtio_net_hdr hdr = {
            .flags = 0,
            .gso_type = VIRTIO_NET_HDR_GSO_NONE
        };
        iov_from_buf(iov, iov_cnt, 0, &hdr, sizeof hdr);
M
Mark McLoughlin 已提交
968
    }
A
aliguori 已提交
969 970
}

971 972 973
static int receive_filter(VirtIONet *n, const uint8_t *buf, int size)
{
    static const uint8_t bcast[] = {0xff, 0xff, 0xff, 0xff, 0xff, 0xff};
974
    static const uint8_t vlan[] = {0x81, 0x00};
975
    uint8_t *ptr = (uint8_t *)buf;
976
    int i;
977 978 979 980

    if (n->promisc)
        return 1;

981
    ptr += n->host_hdr_len;
M
Mark McLoughlin 已提交
982

983 984 985 986 987 988
    if (!memcmp(&ptr[12], vlan, sizeof(vlan))) {
        int vid = be16_to_cpup((uint16_t *)(ptr + 14)) & 0xfff;
        if (!(n->vlans[vid >> 5] & (1U << (vid & 0x1f))))
            return 0;
    }

989 990
    if (ptr[0] & 1) { // multicast
        if (!memcmp(ptr, bcast, sizeof(bcast))) {
991 992 993
            return !n->nobcast;
        } else if (n->nomulti) {
            return 0;
994
        } else if (n->allmulti || n->mac_table.multi_overflow) {
995 996
            return 1;
        }
997 998 999 1000 1001 1002

        for (i = n->mac_table.first_multi; i < n->mac_table.in_use; i++) {
            if (!memcmp(ptr, &n->mac_table.macs[i * ETH_ALEN], ETH_ALEN)) {
                return 1;
            }
        }
1003
    } else { // unicast
1004 1005 1006
        if (n->nouni) {
            return 0;
        } else if (n->alluni || n->mac_table.uni_overflow) {
1007 1008
            return 1;
        } else if (!memcmp(ptr, n->mac, ETH_ALEN)) {
1009 1010
            return 1;
        }
1011

1012 1013 1014 1015 1016
        for (i = 0; i < n->mac_table.first_multi; i++) {
            if (!memcmp(ptr, &n->mac_table.macs[i * ETH_ALEN], ETH_ALEN)) {
                return 1;
            }
        }
1017 1018
    }

1019 1020 1021
    return 0;
}

1022
static ssize_t virtio_net_receive(NetClientState *nc, const uint8_t *buf, size_t size)
A
aliguori 已提交
1023
{
J
Jason Wang 已提交
1024
    VirtIONet *n = qemu_get_nic_opaque(nc);
J
Jason Wang 已提交
1025
    VirtIONetQueue *q = virtio_net_get_subqueue(nc);
1026
    VirtIODevice *vdev = VIRTIO_DEVICE(n);
1027 1028 1029
    struct iovec mhdr_sg[VIRTQUEUE_MAX_SIZE];
    struct virtio_net_hdr_mrg_rxbuf mhdr;
    unsigned mhdr_cnt = 0;
M
Michael S. Tsirkin 已提交
1030
    size_t offset, i, guest_offset;
A
aliguori 已提交
1031

J
Jason Wang 已提交
1032
    if (!virtio_net_can_receive(nc)) {
1033
        return -1;
J
Jason Wang 已提交
1034
    }
1035

1036
    /* hdr_len refers to the header we supply to the guest */
1037
    if (!virtio_net_has_buffers(q, size + n->guest_hdr_len - n->host_hdr_len)) {
1038
        return 0;
1039
    }
A
aliguori 已提交
1040

1041
    if (!receive_filter(n, buf, size))
1042
        return size;
1043

A
aliguori 已提交
1044 1045 1046 1047 1048
    offset = i = 0;

    while (offset < size) {
        VirtQueueElement elem;
        int len, total;
M
Michael S. Tsirkin 已提交
1049
        const struct iovec *sg = elem.in_sg;
A
aliguori 已提交
1050

A
Amit Shah 已提交
1051
        total = 0;
A
aliguori 已提交
1052

1053
        if (virtqueue_pop(q->rx_vq, &elem) == 0) {
A
aliguori 已提交
1054
            if (i == 0)
1055
                return -1;
1056
            error_report("virtio-net unexpected empty queue: "
G
Gerd Hoffmann 已提交
1057 1058 1059 1060 1061 1062
                         "i %zd mergeable %d offset %zd, size %zd, "
                         "guest hdr len %zd, host hdr len %zd "
                         "guest features 0x%" PRIx64,
                         i, n->mergeable_rx_bufs, offset, size,
                         n->guest_hdr_len, n->host_hdr_len,
                         vdev->guest_features);
A
aliguori 已提交
1063 1064 1065 1066
            exit(1);
        }

        if (elem.in_num < 1) {
1067
            error_report("virtio-net receive queue contains no in buffers");
A
aliguori 已提交
1068 1069 1070 1071
            exit(1);
        }

        if (i == 0) {
1072
            assert(offset == 0);
1073 1074 1075 1076 1077 1078
            if (n->mergeable_rx_bufs) {
                mhdr_cnt = iov_copy(mhdr_sg, ARRAY_SIZE(mhdr_sg),
                                    sg, elem.in_num,
                                    offsetof(typeof(mhdr), num_buffers),
                                    sizeof(mhdr.num_buffers));
            }
A
aliguori 已提交
1079

1080 1081
            receive_header(n, sg, elem.in_num, buf, size);
            offset = n->host_hdr_len;
1082
            total += n->guest_hdr_len;
M
Michael S. Tsirkin 已提交
1083 1084 1085
            guest_offset = n->guest_hdr_len;
        } else {
            guest_offset = 0;
A
aliguori 已提交
1086 1087 1088
        }

        /* copy in packet.  ugh */
M
Michael S. Tsirkin 已提交
1089
        len = iov_from_buf(sg, elem.in_num, guest_offset,
1090
                           buf + offset, size - offset);
A
aliguori 已提交
1091
        total += len;
1092 1093 1094 1095 1096
        offset += len;
        /* If buffers can't be merged, at this point we
         * must have consumed the complete packet.
         * Otherwise, drop it. */
        if (!n->mergeable_rx_bufs && offset < size) {
1097
            virtqueue_discard(q->rx_vq, &elem, total);
1098 1099
            return size;
        }
A
aliguori 已提交
1100 1101

        /* signal other side */
1102
        virtqueue_fill(q->rx_vq, &elem, total, i++);
A
aliguori 已提交
1103 1104
    }

1105
    if (mhdr_cnt) {
1106
        virtio_stw_p(vdev, &mhdr.num_buffers, i);
1107 1108 1109
        iov_from_buf(mhdr_sg, mhdr_cnt,
                     0,
                     &mhdr.num_buffers, sizeof mhdr.num_buffers);
1110
    }
A
aliguori 已提交
1111

1112
    virtqueue_flush(q->rx_vq, i);
1113
    virtio_notify(vdev, q->rx_vq);
1114 1115

    return size;
A
aliguori 已提交
1116 1117
}

1118
static int32_t virtio_net_flush_tx(VirtIONetQueue *q);
1119

1120
static void virtio_net_tx_complete(NetClientState *nc, ssize_t len)
1121
{
J
Jason Wang 已提交
1122
    VirtIONet *n = qemu_get_nic_opaque(nc);
J
Jason Wang 已提交
1123
    VirtIONetQueue *q = virtio_net_get_subqueue(nc);
1124
    VirtIODevice *vdev = VIRTIO_DEVICE(n);
1125

1126
    virtqueue_push(q->tx_vq, &q->async_tx.elem, 0);
1127
    virtio_notify(vdev, q->tx_vq);
1128

J
Jason Wang 已提交
1129
    q->async_tx.elem.out_num = 0;
1130

1131 1132
    virtio_queue_set_notification(q->tx_vq, 1);
    virtio_net_flush_tx(q);
1133 1134
}

A
aliguori 已提交
1135
/* TX */
1136
static int32_t virtio_net_flush_tx(VirtIONetQueue *q)
A
aliguori 已提交
1137
{
1138
    VirtIONet *n = q->n;
1139
    VirtIODevice *vdev = VIRTIO_DEVICE(n);
A
aliguori 已提交
1140
    VirtQueueElement elem;
1141
    int32_t num_packets = 0;
J
Jason Wang 已提交
1142
    int queue_index = vq2q(virtio_get_queue_index(q->tx_vq));
1143
    if (!(vdev->status & VIRTIO_CONFIG_S_DRIVER_OK)) {
1144 1145
        return num_packets;
    }
A
aliguori 已提交
1146

1147 1148
    if (q->async_tx.elem.out_num) {
        virtio_queue_set_notification(q->tx_vq, 0);
1149
        return num_packets;
1150 1151
    }

1152
    while (virtqueue_pop(q->tx_vq, &elem)) {
J
Jason Wang 已提交
1153
        ssize_t ret;
A
aliguori 已提交
1154 1155
        unsigned int out_num = elem.out_num;
        struct iovec *out_sg = &elem.out_sg[0];
J
Jason Wang 已提交
1156 1157
        struct iovec sg[VIRTQUEUE_MAX_SIZE], sg2[VIRTQUEUE_MAX_SIZE + 1];
        struct virtio_net_hdr_mrg_rxbuf mhdr;
A
aliguori 已提交
1158

M
Michael S. Tsirkin 已提交
1159
        if (out_num < 1) {
1160
            error_report("virtio-net header not in first element");
A
aliguori 已提交
1161 1162 1163
            exit(1);
        }

1164
        if (n->has_vnet_hdr) {
J
Jason Wang 已提交
1165 1166
            if (iov_to_buf(out_sg, out_num, 0, &mhdr, n->guest_hdr_len) <
                n->guest_hdr_len) {
1167 1168 1169
                error_report("virtio-net header incorrect");
                exit(1);
            }
J
Jason Wang 已提交
1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182
            if (virtio_needs_swap(vdev)) {
                virtio_net_hdr_swap(vdev, (void *) &mhdr);
                sg2[0].iov_base = &mhdr;
                sg2[0].iov_len = n->guest_hdr_len;
                out_num = iov_copy(&sg2[1], ARRAY_SIZE(sg2) - 1,
                                   out_sg, out_num,
                                   n->guest_hdr_len, -1);
                if (out_num == VIRTQUEUE_MAX_SIZE) {
                    goto drop;
		}
                out_num += 1;
                out_sg = sg2;
	    }
1183
        }
1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198
        /*
         * If host wants to see the guest header as is, we can
         * pass it on unchanged. Otherwise, copy just the parts
         * that host is interested in.
         */
        assert(n->host_hdr_len <= n->guest_hdr_len);
        if (n->host_hdr_len != n->guest_hdr_len) {
            unsigned sg_num = iov_copy(sg, ARRAY_SIZE(sg),
                                       out_sg, out_num,
                                       0, n->host_hdr_len);
            sg_num += iov_copy(sg + sg_num, ARRAY_SIZE(sg) - sg_num,
                             out_sg, out_num,
                             n->guest_hdr_len, -1);
            out_num = sg_num;
            out_sg = sg;
A
aliguori 已提交
1199 1200
        }

J
Jason Wang 已提交
1201 1202
        ret = qemu_sendv_packet_async(qemu_get_subqueue(n->nic, queue_index),
                                      out_sg, out_num, virtio_net_tx_complete);
1203
        if (ret == 0) {
1204 1205
            virtio_queue_set_notification(q->tx_vq, 0);
            q->async_tx.elem = elem;
1206
            return -EBUSY;
1207 1208
        }

J
Jason Wang 已提交
1209
drop:
1210
        virtqueue_push(q->tx_vq, &elem, 0);
1211
        virtio_notify(vdev, q->tx_vq);
1212 1213 1214 1215

        if (++num_packets >= n->tx_burst) {
            break;
        }
A
aliguori 已提交
1216
    }
1217
    return num_packets;
A
aliguori 已提交
1218 1219
}

1220
static void virtio_net_handle_tx_timer(VirtIODevice *vdev, VirtQueue *vq)
A
aliguori 已提交
1221
{
1222
    VirtIONet *n = VIRTIO_NET(vdev);
J
Jason Wang 已提交
1223
    VirtIONetQueue *q = &n->vqs[vq2q(virtio_get_queue_index(vq))];
A
aliguori 已提交
1224

1225
    /* This happens when device was stopped but VCPU wasn't. */
1226
    if (!vdev->vm_running) {
1227
        q->tx_waiting = 1;
1228 1229 1230
        return;
    }

1231
    if (q->tx_waiting) {
A
aliguori 已提交
1232
        virtio_queue_set_notification(vq, 1);
1233
        timer_del(q->tx_timer);
1234 1235
        q->tx_waiting = 0;
        virtio_net_flush_tx(q);
A
aliguori 已提交
1236
    } else {
1237 1238
        timer_mod(q->tx_timer,
                       qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + n->tx_timeout);
1239
        q->tx_waiting = 1;
A
aliguori 已提交
1240 1241 1242 1243
        virtio_queue_set_notification(vq, 0);
    }
}

1244 1245
static void virtio_net_handle_tx_bh(VirtIODevice *vdev, VirtQueue *vq)
{
1246
    VirtIONet *n = VIRTIO_NET(vdev);
J
Jason Wang 已提交
1247
    VirtIONetQueue *q = &n->vqs[vq2q(virtio_get_queue_index(vq))];
1248

1249
    if (unlikely(q->tx_waiting)) {
1250 1251
        return;
    }
1252
    q->tx_waiting = 1;
1253
    /* This happens when device was stopped but VCPU wasn't. */
1254
    if (!vdev->vm_running) {
1255 1256
        return;
    }
1257
    virtio_queue_set_notification(vq, 0);
1258
    qemu_bh_schedule(q->tx_bh);
1259 1260
}

A
aliguori 已提交
1261 1262
static void virtio_net_tx_timer(void *opaque)
{
1263 1264
    VirtIONetQueue *q = opaque;
    VirtIONet *n = q->n;
1265
    VirtIODevice *vdev = VIRTIO_DEVICE(n);
1266 1267 1268 1269 1270 1271
    /* This happens when device was stopped but BH wasn't. */
    if (!vdev->vm_running) {
        /* Make sure tx waiting is set, so we'll run when restarted. */
        assert(q->tx_waiting);
        return;
    }
A
aliguori 已提交
1272

1273
    q->tx_waiting = 0;
A
aliguori 已提交
1274 1275

    /* Just in case the driver is not ready on more */
1276
    if (!(vdev->status & VIRTIO_CONFIG_S_DRIVER_OK)) {
A
aliguori 已提交
1277
        return;
1278
    }
A
aliguori 已提交
1279

1280 1281
    virtio_queue_set_notification(q->tx_vq, 1);
    virtio_net_flush_tx(q);
A
aliguori 已提交
1282 1283
}

1284 1285
static void virtio_net_tx_bh(void *opaque)
{
1286 1287
    VirtIONetQueue *q = opaque;
    VirtIONet *n = q->n;
1288
    VirtIODevice *vdev = VIRTIO_DEVICE(n);
1289 1290
    int32_t ret;

1291 1292 1293 1294 1295 1296
    /* This happens when device was stopped but BH wasn't. */
    if (!vdev->vm_running) {
        /* Make sure tx waiting is set, so we'll run when restarted. */
        assert(q->tx_waiting);
        return;
    }
1297

1298
    q->tx_waiting = 0;
1299 1300

    /* Just in case the driver is not ready on more */
1301
    if (unlikely(!(vdev->status & VIRTIO_CONFIG_S_DRIVER_OK))) {
1302
        return;
1303
    }
1304

1305
    ret = virtio_net_flush_tx(q);
1306 1307 1308 1309 1310 1311 1312
    if (ret == -EBUSY) {
        return; /* Notification re-enable handled by tx_complete */
    }

    /* If we flush a full burst of packets, assume there are
     * more coming and immediately reschedule */
    if (ret >= n->tx_burst) {
1313 1314
        qemu_bh_schedule(q->tx_bh);
        q->tx_waiting = 1;
1315 1316 1317 1318 1319 1320
        return;
    }

    /* If less than a full burst, re-enable notification and flush
     * anything that may have come in while we weren't looking.  If
     * we find something, assume the guest is still active and reschedule */
1321 1322 1323 1324 1325
    virtio_queue_set_notification(q->tx_vq, 1);
    if (virtio_net_flush_tx(q) > 0) {
        virtio_queue_set_notification(q->tx_vq, 0);
        qemu_bh_schedule(q->tx_bh);
        q->tx_waiting = 1;
1326 1327 1328
    }
}

1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402
static void virtio_net_add_queue(VirtIONet *n, int index)
{
    VirtIODevice *vdev = VIRTIO_DEVICE(n);

    n->vqs[index].rx_vq = virtio_add_queue(vdev, 256, virtio_net_handle_rx);
    if (n->net_conf.tx && !strcmp(n->net_conf.tx, "timer")) {
        n->vqs[index].tx_vq =
            virtio_add_queue(vdev, 256, virtio_net_handle_tx_timer);
        n->vqs[index].tx_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL,
                                              virtio_net_tx_timer,
                                              &n->vqs[index]);
    } else {
        n->vqs[index].tx_vq =
            virtio_add_queue(vdev, 256, virtio_net_handle_tx_bh);
        n->vqs[index].tx_bh = qemu_bh_new(virtio_net_tx_bh, &n->vqs[index]);
    }

    n->vqs[index].tx_waiting = 0;
    n->vqs[index].n = n;
}

static void virtio_net_del_queue(VirtIONet *n, int index)
{
    VirtIODevice *vdev = VIRTIO_DEVICE(n);
    VirtIONetQueue *q = &n->vqs[index];
    NetClientState *nc = qemu_get_subqueue(n->nic, index);

    qemu_purge_queued_packets(nc);

    virtio_del_queue(vdev, index * 2);
    if (q->tx_timer) {
        timer_del(q->tx_timer);
        timer_free(q->tx_timer);
    } else {
        qemu_bh_delete(q->tx_bh);
    }
    virtio_del_queue(vdev, index * 2 + 1);
}

static void virtio_net_change_num_queues(VirtIONet *n, int new_max_queues)
{
    VirtIODevice *vdev = VIRTIO_DEVICE(n);
    int old_num_queues = virtio_get_num_queues(vdev);
    int new_num_queues = new_max_queues * 2 + 1;
    int i;

    assert(old_num_queues >= 3);
    assert(old_num_queues % 2 == 1);

    if (old_num_queues == new_num_queues) {
        return;
    }

    /*
     * We always need to remove and add ctrl vq if
     * old_num_queues != new_num_queues. Remove ctrl_vq first,
     * and then we only enter one of the following too loops.
     */
    virtio_del_queue(vdev, old_num_queues - 1);

    for (i = new_num_queues - 1; i < old_num_queues - 1; i += 2) {
        /* new_num_queues < old_num_queues */
        virtio_net_del_queue(n, i / 2);
    }

    for (i = old_num_queues - 1; i < new_num_queues - 1; i += 2) {
        /* new_num_queues > old_num_queues */
        virtio_net_add_queue(n, i / 2);
    }

    /* add ctrl_vq last */
    n->ctrl_vq = virtio_add_queue(vdev, 64, virtio_net_handle_ctrl);
}

J
Jason Wang 已提交
1403
static void virtio_net_set_multiqueue(VirtIONet *n, int multiqueue)
J
Jason Wang 已提交
1404
{
1405 1406
    int max = multiqueue ? n->max_queues : 1;

J
Jason Wang 已提交
1407
    n->multiqueue = multiqueue;
1408
    virtio_net_change_num_queues(n, max);
J
Jason Wang 已提交
1409 1410 1411 1412

    virtio_net_set_queues(n);
}

A
aliguori 已提交
1413 1414 1415
static void virtio_net_save(QEMUFile *f, void *opaque)
{
    VirtIONet *n = opaque;
1416
    VirtIODevice *vdev = VIRTIO_DEVICE(n);
A
aliguori 已提交
1417

1418 1419 1420
    /* At this point, backend must be stopped, otherwise
     * it might keep writing to memory. */
    assert(!n->vhost_started);
1421
    virtio_save(vdev, f);
1422 1423 1424 1425 1426 1427
}

static void virtio_net_save_device(VirtIODevice *vdev, QEMUFile *f)
{
    VirtIONet *n = VIRTIO_NET(vdev);
    int i;
A
aliguori 已提交
1428

1429
    qemu_put_buffer(f, n->mac, ETH_ALEN);
1430
    qemu_put_be32(f, n->vqs[0].tx_waiting);
1431
    qemu_put_be32(f, n->mergeable_rx_bufs);
1432
    qemu_put_be16(f, n->status);
1433 1434
    qemu_put_byte(f, n->promisc);
    qemu_put_byte(f, n->allmulti);
1435 1436
    qemu_put_be32(f, n->mac_table.in_use);
    qemu_put_buffer(f, n->mac_table.macs, n->mac_table.in_use * ETH_ALEN);
1437
    qemu_put_buffer(f, (uint8_t *)n->vlans, MAX_VLAN >> 3);
M
Mark McLoughlin 已提交
1438
    qemu_put_be32(f, n->has_vnet_hdr);
1439 1440
    qemu_put_byte(f, n->mac_table.multi_overflow);
    qemu_put_byte(f, n->mac_table.uni_overflow);
1441 1442 1443 1444
    qemu_put_byte(f, n->alluni);
    qemu_put_byte(f, n->nomulti);
    qemu_put_byte(f, n->nouni);
    qemu_put_byte(f, n->nobcast);
1445
    qemu_put_byte(f, n->has_ufo);
1446 1447 1448 1449 1450 1451 1452
    if (n->max_queues > 1) {
        qemu_put_be16(f, n->max_queues);
        qemu_put_be16(f, n->curr_queues);
        for (i = 1; i < n->curr_queues; i++) {
            qemu_put_be32(f, n->vqs[i].tx_waiting);
        }
    }
1453

1454
    if (virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_GUEST_OFFLOADS)) {
1455 1456
        qemu_put_be64(f, n->curr_guest_offloads);
    }
A
aliguori 已提交
1457 1458 1459 1460 1461
}

static int virtio_net_load(QEMUFile *f, void *opaque, int version_id)
{
    VirtIONet *n = opaque;
1462
    VirtIODevice *vdev = VIRTIO_DEVICE(n);
1463
    int ret;
A
aliguori 已提交
1464

1465
    if (version_id < 2 || version_id > VIRTIO_NET_VM_VERSION)
A
aliguori 已提交
1466 1467
        return -EINVAL;

1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489
    ret = virtio_load(vdev, f, version_id);
    if (ret) {
        return ret;
    }

    if (virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_GUEST_OFFLOADS)) {
        n->curr_guest_offloads = qemu_get_be64(f);
    } else {
        n->curr_guest_offloads = virtio_net_supported_guest_offloads(n);
    }

    if (peer_has_vnet_hdr(n)) {
        virtio_net_apply_guest_offloads(n);
    }

    if (virtio_vdev_has_feature(vdev, VIRTIO_NET_F_GUEST_ANNOUNCE) &&
        virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_VQ)) {
        n->announce_counter = SELF_ANNOUNCE_ROUNDS;
        timer_mod(n->announce_timer, qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL));
    }

    return 0;
1490 1491 1492 1493 1494 1495 1496
}

static int virtio_net_load_device(VirtIODevice *vdev, QEMUFile *f,
                                  int version_id)
{
    VirtIONet *n = VIRTIO_NET(vdev);
    int i, link_down;
A
aliguori 已提交
1497

1498
    qemu_get_buffer(f, n->mac, ETH_ALEN);
1499
    n->vqs[0].tx_waiting = qemu_get_be32(f);
1500

1501
    virtio_net_set_mrg_rx_bufs(n, qemu_get_be32(f),
1502 1503
                               virtio_vdev_has_feature(vdev,
                                                       VIRTIO_F_VERSION_1));
A
aliguori 已提交
1504

1505 1506 1507
    if (version_id >= 3)
        n->status = qemu_get_be16(f);

1508
    if (version_id >= 4) {
1509 1510 1511 1512 1513 1514 1515
        if (version_id < 8) {
            n->promisc = qemu_get_be32(f);
            n->allmulti = qemu_get_be32(f);
        } else {
            n->promisc = qemu_get_byte(f);
            n->allmulti = qemu_get_byte(f);
        }
1516 1517
    }

1518 1519 1520 1521 1522 1523
    if (version_id >= 5) {
        n->mac_table.in_use = qemu_get_be32(f);
        /* MAC_TABLE_ENTRIES may be different from the saved image */
        if (n->mac_table.in_use <= MAC_TABLE_ENTRIES) {
            qemu_get_buffer(f, n->mac_table.macs,
                            n->mac_table.in_use * ETH_ALEN);
1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534
        } else {
            int64_t i;

            /* Overflow detected - can happen if source has a larger MAC table.
             * We simply set overflow flag so there's no need to maintain the
             * table of addresses, discard them all.
             * Note: 64 bit math to avoid integer overflow.
             */
            for (i = 0; i < (int64_t)n->mac_table.in_use * ETH_ALEN; ++i) {
                qemu_get_byte(f);
            }
1535
            n->mac_table.multi_overflow = n->mac_table.uni_overflow = 1;
1536 1537 1538 1539
            n->mac_table.in_use = 0;
        }
    }
 
1540 1541 1542
    if (version_id >= 6)
        qemu_get_buffer(f, (uint8_t *)n->vlans, MAX_VLAN >> 3);

M
Mark McLoughlin 已提交
1543 1544
    if (version_id >= 7) {
        if (qemu_get_be32(f) && !peer_has_vnet_hdr(n)) {
1545
            error_report("virtio-net: saved image requires vnet_hdr=on");
M
Mark McLoughlin 已提交
1546 1547
            return -1;
        }
1548 1549
    }

1550 1551 1552 1553 1554
    if (version_id >= 9) {
        n->mac_table.multi_overflow = qemu_get_byte(f);
        n->mac_table.uni_overflow = qemu_get_byte(f);
    }

1555 1556 1557 1558 1559 1560 1561
    if (version_id >= 10) {
        n->alluni = qemu_get_byte(f);
        n->nomulti = qemu_get_byte(f);
        n->nouni = qemu_get_byte(f);
        n->nobcast = qemu_get_byte(f);
    }

1562 1563
    if (version_id >= 11) {
        if (qemu_get_byte(f) && !peer_has_ufo(n)) {
1564
            error_report("virtio-net: saved image requires TUN_F_UFO support");
1565 1566 1567 1568
            return -1;
        }
    }

1569 1570 1571 1572 1573 1574 1575
    if (n->max_queues > 1) {
        if (n->max_queues != qemu_get_be16(f)) {
            error_report("virtio-net: different max_queues ");
            return -1;
        }

        n->curr_queues = qemu_get_be16(f);
1576 1577 1578 1579 1580
        if (n->curr_queues > n->max_queues) {
            error_report("virtio-net: curr_queues %x > max_queues %x",
                         n->curr_queues, n->max_queues);
            return -1;
        }
1581 1582 1583 1584 1585 1586 1587
        for (i = 1; i < n->curr_queues; i++) {
            n->vqs[i].tx_waiting = qemu_get_be32(f);
        }
    }

    virtio_net_set_queues(n);

1588 1589 1590 1591 1592 1593 1594
    /* Find the first multicast entry in the saved MAC filter */
    for (i = 0; i < n->mac_table.in_use; i++) {
        if (n->mac_table.macs[i * ETH_ALEN] & 1) {
            break;
        }
    }
    n->mac_table.first_multi = i;
1595 1596 1597

    /* nc.link_down can't be migrated, so infer link_down according
     * to link status bit in n->status */
1598 1599 1600 1601
    link_down = (n->status & VIRTIO_NET_S_LINK_UP) == 0;
    for (i = 0; i < n->max_queues; i++) {
        qemu_get_subqueue(n->nic, i)->link_down = link_down;
    }
1602

A
aliguori 已提交
1603 1604 1605
    return 0;
}

M
Mark McLoughlin 已提交
1606
static NetClientInfo net_virtio_info = {
1607
    .type = NET_CLIENT_OPTIONS_KIND_NIC,
M
Mark McLoughlin 已提交
1608 1609 1610 1611
    .size = sizeof(NICState),
    .can_receive = virtio_net_can_receive,
    .receive = virtio_net_receive,
    .link_status_changed = virtio_net_set_link_status,
1612
    .query_rx_filter = virtio_net_query_rxfilter,
M
Mark McLoughlin 已提交
1613 1614
};

1615 1616
static bool virtio_net_guest_notifier_pending(VirtIODevice *vdev, int idx)
{
1617
    VirtIONet *n = VIRTIO_NET(vdev);
J
Jason Wang 已提交
1618
    NetClientState *nc = qemu_get_subqueue(n->nic, vq2q(idx));
1619
    assert(n->vhost_started);
1620
    return vhost_net_virtqueue_pending(get_vhost_net(nc->peer), idx);
1621 1622 1623 1624 1625
}

static void virtio_net_guest_notifier_mask(VirtIODevice *vdev, int idx,
                                           bool mask)
{
1626
    VirtIONet *n = VIRTIO_NET(vdev);
J
Jason Wang 已提交
1627
    NetClientState *nc = qemu_get_subqueue(n->nic, vq2q(idx));
1628
    assert(n->vhost_started);
1629
    vhost_net_virtqueue_mask(get_vhost_net(nc->peer),
1630 1631 1632
                             vdev, idx, mask);
}

G
Gerd Hoffmann 已提交
1633
static void virtio_net_set_config_size(VirtIONet *n, uint64_t host_features)
A
aliguori 已提交
1634
{
1635
    int i, config_size = 0;
1636
    virtio_add_feature(&host_features, VIRTIO_NET_F_MAC);
1637 1638 1639 1640 1641
    for (i = 0; feature_sizes[i].flags != 0; i++) {
        if (host_features & feature_sizes[i].flags) {
            config_size = MAX(feature_sizes[i].end, config_size);
        }
    }
1642 1643 1644
    n->config_size = config_size;
}

1645 1646 1647 1648 1649 1650 1651 1652
void virtio_net_set_netclient_name(VirtIONet *n, const char *name,
                                   const char *type)
{
    /*
     * The name can be NULL, the netclient name will be type.x.
     */
    assert(type != NULL);

1653 1654
    g_free(n->netclient_name);
    g_free(n->netclient_type);
1655
    n->netclient_name = g_strdup(name);
1656 1657 1658
    n->netclient_type = g_strdup(type);
}

1659
static void virtio_net_device_realize(DeviceState *dev, Error **errp)
1660
{
1661
    VirtIODevice *vdev = VIRTIO_DEVICE(dev);
1662
    VirtIONet *n = VIRTIO_NET(dev);
1663
    NetClientState *nc;
1664
    int i;
1665

1666
    virtio_net_set_config_size(n, n->host_features);
1667
    virtio_init(vdev, "virtio-net", VIRTIO_ID_NET, n->config_size);
A
aliguori 已提交
1668

1669
    n->max_queues = MAX(n->nic_conf.peers.queues, 1);
1670
    if (n->max_queues * 2 + 1 > VIRTIO_QUEUE_MAX) {
1671
        error_setg(errp, "Invalid number of queues (= %" PRIu32 "), "
S
Stefan Weil 已提交
1672
                   "must be a positive integer less than %d.",
1673
                   n->max_queues, (VIRTIO_QUEUE_MAX - 1) / 2);
1674 1675 1676
        virtio_cleanup(vdev);
        return;
    }
1677
    n->vqs = g_malloc0(sizeof(VirtIONetQueue) * n->max_queues);
J
Jason Wang 已提交
1678
    n->curr_queues = 1;
1679
    n->tx_timeout = n->net_conf.txtimer;
1680

1681 1682
    if (n->net_conf.tx && strcmp(n->net_conf.tx, "timer")
                       && strcmp(n->net_conf.tx, "bh")) {
1683 1684
        error_report("virtio-net: "
                     "Unknown option tx=%s, valid options: \"timer\" \"bh\"",
1685
                     n->net_conf.tx);
1686
        error_report("Defaulting to \"bh\"");
1687 1688
    }

1689
    for (i = 0; i < n->max_queues; i++) {
1690
        virtio_net_add_queue(n, i);
1691
    }
1692

1693
    n->ctrl_vq = virtio_add_queue(vdev, 64, virtio_net_handle_ctrl);
1694 1695
    qemu_macaddr_default_if_unset(&n->nic_conf.macaddr);
    memcpy(&n->mac[0], &n->nic_conf.macaddr, sizeof(n->mac));
1696
    n->status = VIRTIO_NET_S_LINK_UP;
J
Jason Wang 已提交
1697 1698
    n->announce_timer = timer_new_ms(QEMU_CLOCK_VIRTUAL,
                                     virtio_net_announce_timer, n);
A
aliguori 已提交
1699

1700 1701 1702 1703 1704 1705 1706 1707
    if (n->netclient_type) {
        /*
         * Happen when virtio_net_set_netclient_name has been called.
         */
        n->nic = qemu_new_nic(&net_virtio_info, &n->nic_conf,
                              n->netclient_type, n->netclient_name, n);
    } else {
        n->nic = qemu_new_nic(&net_virtio_info, &n->nic_conf,
1708
                              object_get_typename(OBJECT(dev)), dev->id, n);
1709 1710
    }

1711 1712
    peer_test_vnet_hdr(n);
    if (peer_has_vnet_hdr(n)) {
J
Jason Wang 已提交
1713
        for (i = 0; i < n->max_queues; i++) {
1714
            qemu_using_vnet_hdr(qemu_get_subqueue(n->nic, i)->peer, true);
J
Jason Wang 已提交
1715
        }
1716 1717 1718 1719
        n->host_hdr_len = sizeof(struct virtio_net_hdr);
    } else {
        n->host_hdr_len = 0;
    }
M
Mark McLoughlin 已提交
1720

1721
    qemu_format_nic_info_str(qemu_get_queue(n->nic), n->nic_conf.macaddr.a);
1722

J
Jason Wang 已提交
1723
    n->vqs[0].tx_waiting = 0;
1724
    n->tx_burst = n->net_conf.txburst;
1725
    virtio_net_set_mrg_rx_bufs(n, 0, 0);
1726
    n->promisc = 1; /* for compatibility */
A
aliguori 已提交
1727

1728
    n->mac_table.macs = g_malloc0(MAC_TABLE_ENTRIES * ETH_ALEN);
1729

1730
    n->vlans = g_malloc0(MAX_VLAN >> 3);
1731

1732 1733 1734
    nc = qemu_get_queue(n->nic);
    nc->rxfilter_notify_enabled = 1;

1735 1736
    n->qdev = dev;
    register_savevm(dev, "virtio-net", -1, VIRTIO_NET_VM_VERSION,
A
aliguori 已提交
1737
                    virtio_net_save, virtio_net_load, n);
1738 1739
}

1740
static void virtio_net_device_unrealize(DeviceState *dev, Error **errp)
1741
{
1742 1743
    VirtIODevice *vdev = VIRTIO_DEVICE(dev);
    VirtIONet *n = VIRTIO_NET(dev);
1744
    int i, max_queues;
1745 1746 1747 1748

    /* This will stop vhost backend if appropriate. */
    virtio_net_set_status(vdev, 0);

1749
    unregister_savevm(dev, "virtio-net", n);
1750

1751 1752 1753 1754
    g_free(n->netclient_name);
    n->netclient_name = NULL;
    g_free(n->netclient_type);
    n->netclient_type = NULL;
1755

1756 1757 1758
    g_free(n->mac_table.macs);
    g_free(n->vlans);

1759 1760 1761
    max_queues = n->multiqueue ? n->max_queues : 1;
    for (i = 0; i < max_queues; i++) {
        virtio_net_del_queue(n, i);
1762 1763
    }

J
Jason Wang 已提交
1764 1765
    timer_del(n->announce_timer);
    timer_free(n->announce_timer);
1766 1767
    g_free(n->vqs);
    qemu_del_nic(n->nic);
1768
    virtio_cleanup(vdev);
1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779
}

static void virtio_net_instance_init(Object *obj)
{
    VirtIONet *n = VIRTIO_NET(obj);

    /*
     * The default config_size is sizeof(struct virtio_net_config).
     * Can be overriden with virtio_net_set_config_size.
     */
    n->config_size = sizeof(struct virtio_net_config);
1780 1781 1782
    device_add_bootindex_property(obj, &n->nic_conf.bootindex,
                                  "bootindex", "/ethernet-phy@0",
                                  DEVICE(n), NULL);
1783 1784 1785
}

static Property virtio_net_properties[] = {
1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824
    DEFINE_PROP_BIT("csum", VirtIONet, host_features, VIRTIO_NET_F_CSUM, true),
    DEFINE_PROP_BIT("guest_csum", VirtIONet, host_features,
                    VIRTIO_NET_F_GUEST_CSUM, true),
    DEFINE_PROP_BIT("gso", VirtIONet, host_features, VIRTIO_NET_F_GSO, true),
    DEFINE_PROP_BIT("guest_tso4", VirtIONet, host_features,
                    VIRTIO_NET_F_GUEST_TSO4, true),
    DEFINE_PROP_BIT("guest_tso6", VirtIONet, host_features,
                    VIRTIO_NET_F_GUEST_TSO6, true),
    DEFINE_PROP_BIT("guest_ecn", VirtIONet, host_features,
                    VIRTIO_NET_F_GUEST_ECN, true),
    DEFINE_PROP_BIT("guest_ufo", VirtIONet, host_features,
                    VIRTIO_NET_F_GUEST_UFO, true),
    DEFINE_PROP_BIT("guest_announce", VirtIONet, host_features,
                    VIRTIO_NET_F_GUEST_ANNOUNCE, true),
    DEFINE_PROP_BIT("host_tso4", VirtIONet, host_features,
                    VIRTIO_NET_F_HOST_TSO4, true),
    DEFINE_PROP_BIT("host_tso6", VirtIONet, host_features,
                    VIRTIO_NET_F_HOST_TSO6, true),
    DEFINE_PROP_BIT("host_ecn", VirtIONet, host_features,
                    VIRTIO_NET_F_HOST_ECN, true),
    DEFINE_PROP_BIT("host_ufo", VirtIONet, host_features,
                    VIRTIO_NET_F_HOST_UFO, true),
    DEFINE_PROP_BIT("mrg_rxbuf", VirtIONet, host_features,
                    VIRTIO_NET_F_MRG_RXBUF, true),
    DEFINE_PROP_BIT("status", VirtIONet, host_features,
                    VIRTIO_NET_F_STATUS, true),
    DEFINE_PROP_BIT("ctrl_vq", VirtIONet, host_features,
                    VIRTIO_NET_F_CTRL_VQ, true),
    DEFINE_PROP_BIT("ctrl_rx", VirtIONet, host_features,
                    VIRTIO_NET_F_CTRL_RX, true),
    DEFINE_PROP_BIT("ctrl_vlan", VirtIONet, host_features,
                    VIRTIO_NET_F_CTRL_VLAN, true),
    DEFINE_PROP_BIT("ctrl_rx_extra", VirtIONet, host_features,
                    VIRTIO_NET_F_CTRL_RX_EXTRA, true),
    DEFINE_PROP_BIT("ctrl_mac_addr", VirtIONet, host_features,
                    VIRTIO_NET_F_CTRL_MAC_ADDR, true),
    DEFINE_PROP_BIT("ctrl_guest_offloads", VirtIONet, host_features,
                    VIRTIO_NET_F_CTRL_GUEST_OFFLOADS, true),
    DEFINE_PROP_BIT("mq", VirtIONet, host_features, VIRTIO_NET_F_MQ, false),
1825 1826
    DEFINE_NIC_PROPERTIES(VirtIONet, nic_conf),
    DEFINE_PROP_UINT32("x-txtimer", VirtIONet, net_conf.txtimer,
1827
                       TX_TIMER_INTERVAL),
1828 1829 1830 1831 1832 1833 1834 1835 1836
    DEFINE_PROP_INT32("x-txburst", VirtIONet, net_conf.txburst, TX_BURST),
    DEFINE_PROP_STRING("tx", VirtIONet, net_conf.tx),
    DEFINE_PROP_END_OF_LIST(),
};

static void virtio_net_class_init(ObjectClass *klass, void *data)
{
    DeviceClass *dc = DEVICE_CLASS(klass);
    VirtioDeviceClass *vdc = VIRTIO_DEVICE_CLASS(klass);
1837

1838
    dc->props = virtio_net_properties;
1839
    set_bit(DEVICE_CATEGORY_NETWORK, dc->categories);
1840
    vdc->realize = virtio_net_device_realize;
1841
    vdc->unrealize = virtio_net_device_unrealize;
1842 1843 1844 1845 1846 1847 1848 1849 1850
    vdc->get_config = virtio_net_get_config;
    vdc->set_config = virtio_net_set_config;
    vdc->get_features = virtio_net_get_features;
    vdc->set_features = virtio_net_set_features;
    vdc->bad_features = virtio_net_bad_features;
    vdc->reset = virtio_net_reset;
    vdc->set_status = virtio_net_set_status;
    vdc->guest_notifier_mask = virtio_net_guest_notifier_mask;
    vdc->guest_notifier_pending = virtio_net_guest_notifier_pending;
1851 1852
    vdc->load = virtio_net_load_device;
    vdc->save = virtio_net_save_device;
1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868
}

static const TypeInfo virtio_net_info = {
    .name = TYPE_VIRTIO_NET,
    .parent = TYPE_VIRTIO_DEVICE,
    .instance_size = sizeof(VirtIONet),
    .instance_init = virtio_net_instance_init,
    .class_init = virtio_net_class_init,
};

static void virtio_register_types(void)
{
    type_register_static(&virtio_net_info);
}

type_init(virtio_register_types)