virtio-net.c 56.1 KB
Newer Older
A
aliguori 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13
/*
 * Virtio Network Device
 *
 * Copyright IBM, Corp. 2007
 *
 * Authors:
 *  Anthony Liguori   <aliguori@us.ibm.com>
 *
 * This work is licensed under the terms of the GNU GPL, version 2.  See
 * the COPYING file in the top-level directory.
 *
 */

14
#include "qemu/iov.h"
P
Paolo Bonzini 已提交
15
#include "hw/virtio/virtio.h"
P
Paolo Bonzini 已提交
16
#include "net/net.h"
17
#include "net/checksum.h"
18
#include "net/tap.h"
19 20
#include "qemu/error-report.h"
#include "qemu/timer.h"
P
Paolo Bonzini 已提交
21 22
#include "hw/virtio/virtio-net.h"
#include "net/vhost_net.h"
23
#include "hw/virtio/virtio-bus.h"
24
#include "qapi/qmp/qjson.h"
25
#include "qapi-event.h"
26
#include "hw/virtio/virtio-access.h"
A
aliguori 已提交
27

28
#define VIRTIO_NET_VM_VERSION    11
29

30
#define MAC_TABLE_ENTRIES    64
31
#define MAX_VLAN    (1 << 12)   /* Per 802.1Q definition */
32

33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54
/*
 * Calculate the number of bytes up to and including the given 'field' of
 * 'container'.
 */
#define endof(container, field) \
    (offsetof(container, field) + sizeof(((container *)0)->field))

typedef struct VirtIOFeature {
    uint32_t flags;
    size_t end;
} VirtIOFeature;

static VirtIOFeature feature_sizes[] = {
    {.flags = 1 << VIRTIO_NET_F_MAC,
     .end = endof(struct virtio_net_config, mac)},
    {.flags = 1 << VIRTIO_NET_F_STATUS,
     .end = endof(struct virtio_net_config, status)},
    {.flags = 1 << VIRTIO_NET_F_MQ,
     .end = endof(struct virtio_net_config, max_virtqueue_pairs)},
    {}
};

J
Jason Wang 已提交
55
static VirtIONetQueue *virtio_net_get_subqueue(NetClientState *nc)
56 57 58
{
    VirtIONet *n = qemu_get_nic_opaque(nc);

J
Jason Wang 已提交
59
    return &n->vqs[nc->queue_index];
60
}
J
Jason Wang 已提交
61 62 63 64 65 66

static int vq2q(int queue_index)
{
    return queue_index / 2;
}

A
aliguori 已提交
67 68 69 70
/* TODO
 * - we could suppress RX interrupt if we were so inclined.
 */

71
static void virtio_net_get_config(VirtIODevice *vdev, uint8_t *config)
A
aliguori 已提交
72
{
73
    VirtIONet *n = VIRTIO_NET(vdev);
A
aliguori 已提交
74 75
    struct virtio_net_config netcfg;

76 77
    virtio_stw_p(vdev, &netcfg.status, n->status);
    virtio_stw_p(vdev, &netcfg.max_virtqueue_pairs, n->max_queues);
78
    memcpy(netcfg.mac, n->mac, ETH_ALEN);
79
    memcpy(config, &netcfg, n->config_size);
A
aliguori 已提交
80 81
}

82 83
static void virtio_net_set_config(VirtIODevice *vdev, const uint8_t *config)
{
84
    VirtIONet *n = VIRTIO_NET(vdev);
85
    struct virtio_net_config netcfg = {};
86

87
    memcpy(&netcfg, config, n->config_size);
88

89 90
    if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_MAC_ADDR) &&
        !virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1) &&
91
        memcmp(netcfg.mac, n->mac, ETH_ALEN)) {
92
        memcpy(n->mac, netcfg.mac, ETH_ALEN);
J
Jason Wang 已提交
93
        qemu_format_nic_info_str(qemu_get_queue(n->nic), n->mac);
94 95 96
    }
}

97 98
static bool virtio_net_started(VirtIONet *n, uint8_t status)
{
99
    VirtIODevice *vdev = VIRTIO_DEVICE(n);
100
    return (status & VIRTIO_CONFIG_S_DRIVER_OK) &&
101
        (n->status & VIRTIO_NET_S_LINK_UP) && vdev->vm_running;
102 103
}

J
Jason Wang 已提交
104 105 106 107 108 109 110 111 112 113
static void virtio_net_announce_timer(void *opaque)
{
    VirtIONet *n = opaque;
    VirtIODevice *vdev = VIRTIO_DEVICE(n);

    n->announce_counter--;
    n->status |= VIRTIO_NET_S_ANNOUNCE;
    virtio_notify_config(vdev);
}

114
static void virtio_net_vhost_status(VirtIONet *n, uint8_t status)
115
{
116
    VirtIODevice *vdev = VIRTIO_DEVICE(n);
J
Jason Wang 已提交
117
    NetClientState *nc = qemu_get_queue(n->nic);
J
Jason Wang 已提交
118
    int queues = n->multiqueue ? n->max_queues : 1;
J
Jason Wang 已提交
119

120
    if (!get_vhost_net(nc->peer)) {
121 122
        return;
    }
J
Jason Wang 已提交
123

124 125
    if ((virtio_net_started(n, status) && !nc->peer->link_down) ==
        !!n->vhost_started) {
126 127 128
        return;
    }
    if (!n->vhost_started) {
129 130 131 132 133 134 135 136 137 138 139 140 141
        int r, i;

        /* Any packets outstanding? Purge them to avoid touching rings
         * when vhost is running.
         */
        for (i = 0;  i < queues; i++) {
            NetClientState *qnc = qemu_get_subqueue(n->nic, i);

            /* Purge both directions: TX and RX. */
            qemu_net_queue_purge(qnc->peer->incoming_queue, qnc);
            qemu_net_queue_purge(qnc->incoming_queue, qnc->peer);
        }

142
        n->vhost_started = 1;
143
        r = vhost_net_start(vdev, n->nic->ncs, queues);
144
        if (r < 0) {
145 146
            error_report("unable to start vhost net: %d: "
                         "falling back on userspace virtio", -r);
147
            n->vhost_started = 0;
148 149
        }
    } else {
150
        vhost_net_stop(vdev, n->nic->ncs, queues);
151 152 153 154
        n->vhost_started = 0;
    }
}

155 156
static void virtio_net_set_status(struct VirtIODevice *vdev, uint8_t status)
{
157
    VirtIONet *n = VIRTIO_NET(vdev);
J
Jason Wang 已提交
158 159 160
    VirtIONetQueue *q;
    int i;
    uint8_t queue_status;
161 162 163

    virtio_net_vhost_status(n, status);

J
Jason Wang 已提交
164
    for (i = 0; i < n->max_queues; i++) {
165 166
        NetClientState *ncs = qemu_get_subqueue(n->nic, i);
        bool queue_started;
J
Jason Wang 已提交
167
        q = &n->vqs[i];
168

J
Jason Wang 已提交
169 170
        if ((!n->multiqueue && i != 0) || i >= n->curr_queues) {
            queue_status = 0;
171
        } else {
J
Jason Wang 已提交
172
            queue_status = status;
173
        }
174 175 176 177 178 179
        queue_started =
            virtio_net_started(n, queue_status) && !n->vhost_started;

        if (queue_started) {
            qemu_flush_queued_packets(ncs);
        }
J
Jason Wang 已提交
180 181 182 183 184

        if (!q->tx_waiting) {
            continue;
        }

185
        if (queue_started) {
J
Jason Wang 已提交
186
            if (q->tx_timer) {
187 188
                timer_mod(q->tx_timer,
                               qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + n->tx_timeout);
J
Jason Wang 已提交
189 190 191
            } else {
                qemu_bh_schedule(q->tx_bh);
            }
192
        } else {
J
Jason Wang 已提交
193
            if (q->tx_timer) {
194
                timer_del(q->tx_timer);
J
Jason Wang 已提交
195 196 197
            } else {
                qemu_bh_cancel(q->tx_bh);
            }
198 199 200 201
        }
    }
}

202
static void virtio_net_set_link_status(NetClientState *nc)
203
{
J
Jason Wang 已提交
204
    VirtIONet *n = qemu_get_nic_opaque(nc);
205
    VirtIODevice *vdev = VIRTIO_DEVICE(n);
206 207
    uint16_t old_status = n->status;

M
Mark McLoughlin 已提交
208
    if (nc->link_down)
209 210 211 212 213
        n->status &= ~VIRTIO_NET_S_LINK_UP;
    else
        n->status |= VIRTIO_NET_S_LINK_UP;

    if (n->status != old_status)
214
        virtio_notify_config(vdev);
215

216
    virtio_net_set_status(vdev, vdev->status);
217 218
}

219 220 221 222 223
static void rxfilter_notify(NetClientState *nc)
{
    VirtIONet *n = qemu_get_nic_opaque(nc);

    if (nc->rxfilter_notify_enabled) {
224
        gchar *path = object_get_canonical_path(OBJECT(n->qdev));
225 226
        qapi_event_send_nic_rx_filter_changed(!!n->netclient_name,
                                              n->netclient_name, path, &error_abort);
227
        g_free(path);
228 229 230 231 232 233

        /* disable event notification to avoid events flooding */
        nc->rxfilter_notify_enabled = 0;
    }
}

234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253
static intList *get_vlan_table(VirtIONet *n)
{
    intList *list, *entry;
    int i, j;

    list = NULL;
    for (i = 0; i < MAX_VLAN >> 5; i++) {
        for (j = 0; n->vlans[i] && j <= 0x1f; j++) {
            if (n->vlans[i] & (1U << j)) {
                entry = g_malloc0(sizeof(*entry));
                entry->value = (i << 5) + j;
                entry->next = list;
                list = entry;
            }
        }
    }

    return list;
}

254 255 256
static RxFilterInfo *virtio_net_query_rxfilter(NetClientState *nc)
{
    VirtIONet *n = qemu_get_nic_opaque(nc);
257
    VirtIODevice *vdev = VIRTIO_DEVICE(n);
258 259
    RxFilterInfo *info;
    strList *str_list, *entry;
260
    int i;
261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285

    info = g_malloc0(sizeof(*info));
    info->name = g_strdup(nc->name);
    info->promiscuous = n->promisc;

    if (n->nouni) {
        info->unicast = RX_STATE_NONE;
    } else if (n->alluni) {
        info->unicast = RX_STATE_ALL;
    } else {
        info->unicast = RX_STATE_NORMAL;
    }

    if (n->nomulti) {
        info->multicast = RX_STATE_NONE;
    } else if (n->allmulti) {
        info->multicast = RX_STATE_ALL;
    } else {
        info->multicast = RX_STATE_NORMAL;
    }

    info->broadcast_allowed = n->nobcast;
    info->multicast_overflow = n->mac_table.multi_overflow;
    info->unicast_overflow = n->mac_table.uni_overflow;

286
    info->main_mac = qemu_mac_strdup_printf(n->mac);
287 288 289 290

    str_list = NULL;
    for (i = 0; i < n->mac_table.first_multi; i++) {
        entry = g_malloc0(sizeof(*entry));
291
        entry->value = qemu_mac_strdup_printf(n->mac_table.macs + i * ETH_ALEN);
292 293 294 295 296 297 298 299
        entry->next = str_list;
        str_list = entry;
    }
    info->unicast_table = str_list;

    str_list = NULL;
    for (i = n->mac_table.first_multi; i < n->mac_table.in_use; i++) {
        entry = g_malloc0(sizeof(*entry));
300
        entry->value = qemu_mac_strdup_printf(n->mac_table.macs + i * ETH_ALEN);
301 302 303 304
        entry->next = str_list;
        str_list = entry;
    }
    info->multicast_table = str_list;
305
    info->vlan_table = get_vlan_table(n);
306

307
    if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_VLAN)) {
308 309 310 311 312
        info->vlan = RX_STATE_ALL;
    } else if (!info->vlan_table) {
        info->vlan = RX_STATE_NONE;
    } else {
        info->vlan = RX_STATE_NORMAL;
313 314 315 316 317 318 319 320
    }

    /* enable event notification after query */
    nc->rxfilter_notify_enabled = 1;

    return info;
}

321 322
static void virtio_net_reset(VirtIODevice *vdev)
{
323
    VirtIONet *n = VIRTIO_NET(vdev);
324 325 326 327

    /* Reset back to compatibility mode */
    n->promisc = 1;
    n->allmulti = 0;
328 329 330 331
    n->alluni = 0;
    n->nomulti = 0;
    n->nouni = 0;
    n->nobcast = 0;
J
Jason Wang 已提交
332 333
    /* multiqueue is disabled by default */
    n->curr_queues = 1;
J
Jason Wang 已提交
334 335 336
    timer_del(n->announce_timer);
    n->announce_counter = 0;
    n->status &= ~VIRTIO_NET_S_ANNOUNCE;
337

338
    /* Flush any MAC and VLAN filter table state */
339
    n->mac_table.in_use = 0;
340
    n->mac_table.first_multi = 0;
341 342
    n->mac_table.multi_overflow = 0;
    n->mac_table.uni_overflow = 0;
343
    memset(n->mac_table.macs, 0, MAC_TABLE_ENTRIES * ETH_ALEN);
344
    memcpy(&n->mac[0], &n->nic->conf->macaddr, sizeof(n->mac));
345
    qemu_format_nic_info_str(qemu_get_queue(n->nic), n->mac);
346
    memset(n->vlans, 0, MAX_VLAN >> 3);
347 348
}

349
static void peer_test_vnet_hdr(VirtIONet *n)
M
Mark McLoughlin 已提交
350
{
J
Jason Wang 已提交
351 352
    NetClientState *nc = qemu_get_queue(n->nic);
    if (!nc->peer) {
353
        return;
J
Jason Wang 已提交
354
    }
M
Mark McLoughlin 已提交
355

356
    n->has_vnet_hdr = qemu_has_vnet_hdr(nc->peer);
357
}
M
Mark McLoughlin 已提交
358

359 360
static int peer_has_vnet_hdr(VirtIONet *n)
{
M
Mark McLoughlin 已提交
361 362 363
    return n->has_vnet_hdr;
}

364 365 366 367 368
static int peer_has_ufo(VirtIONet *n)
{
    if (!peer_has_vnet_hdr(n))
        return 0;

369
    n->has_ufo = qemu_has_ufo(qemu_get_queue(n->nic)->peer);
370 371 372 373

    return n->has_ufo;
}

374 375
static void virtio_net_set_mrg_rx_bufs(VirtIONet *n, int mergeable_rx_bufs,
                                       int version_1)
376
{
J
Jason Wang 已提交
377 378 379
    int i;
    NetClientState *nc;

380 381
    n->mergeable_rx_bufs = mergeable_rx_bufs;

382 383 384 385 386 387 388
    if (version_1) {
        n->guest_hdr_len = sizeof(struct virtio_net_hdr_mrg_rxbuf);
    } else {
        n->guest_hdr_len = n->mergeable_rx_bufs ?
            sizeof(struct virtio_net_hdr_mrg_rxbuf) :
            sizeof(struct virtio_net_hdr);
    }
389

J
Jason Wang 已提交
390 391 392 393
    for (i = 0; i < n->max_queues; i++) {
        nc = qemu_get_subqueue(n->nic, i);

        if (peer_has_vnet_hdr(n) &&
394 395
            qemu_has_vnet_hdr_len(nc->peer, n->guest_hdr_len)) {
            qemu_set_vnet_hdr_len(nc->peer, n->guest_hdr_len);
J
Jason Wang 已提交
396 397
            n->host_hdr_len = n->guest_hdr_len;
        }
398 399 400
    }
}

J
Jason Wang 已提交
401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433
static int peer_attach(VirtIONet *n, int index)
{
    NetClientState *nc = qemu_get_subqueue(n->nic, index);

    if (!nc->peer) {
        return 0;
    }

    if (nc->peer->info->type != NET_CLIENT_OPTIONS_KIND_TAP) {
        return 0;
    }

    return tap_enable(nc->peer);
}

static int peer_detach(VirtIONet *n, int index)
{
    NetClientState *nc = qemu_get_subqueue(n->nic, index);

    if (!nc->peer) {
        return 0;
    }

    if (nc->peer->info->type !=  NET_CLIENT_OPTIONS_KIND_TAP) {
        return 0;
    }

    return tap_disable(nc->peer);
}

static void virtio_net_set_queues(VirtIONet *n)
{
    int i;
434
    int r;
J
Jason Wang 已提交
435 436 437

    for (i = 0; i < n->max_queues; i++) {
        if (i < n->curr_queues) {
438 439
            r = peer_attach(n, i);
            assert(!r);
J
Jason Wang 已提交
440
        } else {
441 442
            r = peer_detach(n, i);
            assert(!r);
J
Jason Wang 已提交
443 444 445 446
        }
    }
}

J
Jason Wang 已提交
447
static void virtio_net_set_multiqueue(VirtIONet *n, int multiqueue);
J
Jason Wang 已提交
448

J
Jason Wang 已提交
449 450
static uint64_t virtio_net_get_features(VirtIODevice *vdev, uint64_t features,
                                        Error **errp)
A
aliguori 已提交
451
{
452
    VirtIONet *n = VIRTIO_NET(vdev);
J
Jason Wang 已提交
453
    NetClientState *nc = qemu_get_queue(n->nic);
A
aliguori 已提交
454

455 456 457
    /* Firstly sync all virtio-net possible supported features */
    features |= n->host_features;

458
    virtio_add_feature(&features, VIRTIO_NET_F_MAC);
459

460
    if (!peer_has_vnet_hdr(n)) {
461 462 463 464
        virtio_clear_feature(&features, VIRTIO_NET_F_CSUM);
        virtio_clear_feature(&features, VIRTIO_NET_F_HOST_TSO4);
        virtio_clear_feature(&features, VIRTIO_NET_F_HOST_TSO6);
        virtio_clear_feature(&features, VIRTIO_NET_F_HOST_ECN);
465

466 467 468 469
        virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_CSUM);
        virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_TSO4);
        virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_TSO6);
        virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_ECN);
470
    }
M
Mark McLoughlin 已提交
471

472
    if (!peer_has_vnet_hdr(n) || !peer_has_ufo(n)) {
473 474
        virtio_clear_feature(&features, VIRTIO_NET_F_GUEST_UFO);
        virtio_clear_feature(&features, VIRTIO_NET_F_HOST_UFO);
M
Mark McLoughlin 已提交
475 476
    }

477
    if (!get_vhost_net(nc->peer)) {
478 479
        return features;
    }
480
    return vhost_net_get_features(get_vhost_net(nc->peer), features);
A
aliguori 已提交
481 482
}

G
Gerd Hoffmann 已提交
483
static uint64_t virtio_net_bad_features(VirtIODevice *vdev)
484
{
G
Gerd Hoffmann 已提交
485
    uint64_t features = 0;
486 487 488

    /* Linux kernel 2.6.25.  It understood MAC (as everyone must),
     * but also these: */
489 490 491 492 493
    virtio_add_feature(&features, VIRTIO_NET_F_MAC);
    virtio_add_feature(&features, VIRTIO_NET_F_CSUM);
    virtio_add_feature(&features, VIRTIO_NET_F_HOST_TSO4);
    virtio_add_feature(&features, VIRTIO_NET_F_HOST_TSO6);
    virtio_add_feature(&features, VIRTIO_NET_F_HOST_ECN);
494

495
    return features;
496 497
}

498 499
static void virtio_net_apply_guest_offloads(VirtIONet *n)
{
500
    qemu_set_offload(qemu_get_queue(n->nic)->peer,
501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525
            !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_CSUM)),
            !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_TSO4)),
            !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_TSO6)),
            !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_ECN)),
            !!(n->curr_guest_offloads & (1ULL << VIRTIO_NET_F_GUEST_UFO)));
}

static uint64_t virtio_net_guest_offloads_by_features(uint32_t features)
{
    static const uint64_t guest_offloads_mask =
        (1ULL << VIRTIO_NET_F_GUEST_CSUM) |
        (1ULL << VIRTIO_NET_F_GUEST_TSO4) |
        (1ULL << VIRTIO_NET_F_GUEST_TSO6) |
        (1ULL << VIRTIO_NET_F_GUEST_ECN)  |
        (1ULL << VIRTIO_NET_F_GUEST_UFO);

    return guest_offloads_mask & features;
}

static inline uint64_t virtio_net_supported_guest_offloads(VirtIONet *n)
{
    VirtIODevice *vdev = VIRTIO_DEVICE(n);
    return virtio_net_guest_offloads_by_features(vdev->guest_features);
}

G
Gerd Hoffmann 已提交
526
static void virtio_net_set_features(VirtIODevice *vdev, uint64_t features)
A
aliguori 已提交
527
{
528
    VirtIONet *n = VIRTIO_NET(vdev);
J
Jason Wang 已提交
529 530
    int i;

531
    virtio_net_set_multiqueue(n,
532
                              virtio_has_feature(features, VIRTIO_NET_F_MQ));
A
aliguori 已提交
533

534
    virtio_net_set_mrg_rx_bufs(n,
535 536 537 538
                               virtio_has_feature(features,
                                                  VIRTIO_NET_F_MRG_RXBUF),
                               virtio_has_feature(features,
                                                  VIRTIO_F_VERSION_1));
539 540

    if (n->has_vnet_hdr) {
541 542 543
        n->curr_guest_offloads =
            virtio_net_guest_offloads_by_features(features);
        virtio_net_apply_guest_offloads(n);
544
    }
J
Jason Wang 已提交
545 546 547 548

    for (i = 0;  i < n->max_queues; i++) {
        NetClientState *nc = qemu_get_subqueue(n->nic, i);

549
        if (!get_vhost_net(nc->peer)) {
J
Jason Wang 已提交
550 551
            continue;
        }
552
        vhost_net_ack_features(get_vhost_net(nc->peer), features);
D
David L Stevens 已提交
553
    }
554

555
    if (virtio_has_feature(features, VIRTIO_NET_F_CTRL_VLAN)) {
556 557 558 559
        memset(n->vlans, 0, MAX_VLAN >> 3);
    } else {
        memset(n->vlans, 0xff, MAX_VLAN >> 3);
    }
A
aliguori 已提交
560 561
}

562
static int virtio_net_handle_rx_mode(VirtIONet *n, uint8_t cmd,
563
                                     struct iovec *iov, unsigned int iov_cnt)
564 565
{
    uint8_t on;
566
    size_t s;
567
    NetClientState *nc = qemu_get_queue(n->nic);
568

569 570 571
    s = iov_to_buf(iov, iov_cnt, 0, &on, sizeof(on));
    if (s != sizeof(on)) {
        return VIRTIO_NET_ERR;
572 573
    }

A
Amos Kong 已提交
574
    if (cmd == VIRTIO_NET_CTRL_RX_PROMISC) {
575
        n->promisc = on;
A
Amos Kong 已提交
576
    } else if (cmd == VIRTIO_NET_CTRL_RX_ALLMULTI) {
577
        n->allmulti = on;
A
Amos Kong 已提交
578
    } else if (cmd == VIRTIO_NET_CTRL_RX_ALLUNI) {
579
        n->alluni = on;
A
Amos Kong 已提交
580
    } else if (cmd == VIRTIO_NET_CTRL_RX_NOMULTI) {
581
        n->nomulti = on;
A
Amos Kong 已提交
582
    } else if (cmd == VIRTIO_NET_CTRL_RX_NOUNI) {
583
        n->nouni = on;
A
Amos Kong 已提交
584
    } else if (cmd == VIRTIO_NET_CTRL_RX_NOBCAST) {
585
        n->nobcast = on;
586
    } else {
587
        return VIRTIO_NET_ERR;
588
    }
589

590 591
    rxfilter_notify(nc);

592 593 594
    return VIRTIO_NET_OK;
}

595 596 597 598 599 600 601
static int virtio_net_handle_offloads(VirtIONet *n, uint8_t cmd,
                                     struct iovec *iov, unsigned int iov_cnt)
{
    VirtIODevice *vdev = VIRTIO_DEVICE(n);
    uint64_t offloads;
    size_t s;

602
    if (!virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_GUEST_OFFLOADS)) {
603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631
        return VIRTIO_NET_ERR;
    }

    s = iov_to_buf(iov, iov_cnt, 0, &offloads, sizeof(offloads));
    if (s != sizeof(offloads)) {
        return VIRTIO_NET_ERR;
    }

    if (cmd == VIRTIO_NET_CTRL_GUEST_OFFLOADS_SET) {
        uint64_t supported_offloads;

        if (!n->has_vnet_hdr) {
            return VIRTIO_NET_ERR;
        }

        supported_offloads = virtio_net_supported_guest_offloads(n);
        if (offloads & ~supported_offloads) {
            return VIRTIO_NET_ERR;
        }

        n->curr_guest_offloads = offloads;
        virtio_net_apply_guest_offloads(n);

        return VIRTIO_NET_OK;
    } else {
        return VIRTIO_NET_ERR;
    }
}

632
static int virtio_net_handle_mac(VirtIONet *n, uint8_t cmd,
633
                                 struct iovec *iov, unsigned int iov_cnt)
634
{
635
    VirtIODevice *vdev = VIRTIO_DEVICE(n);
636
    struct virtio_net_ctrl_mac mac_data;
637
    size_t s;
638
    NetClientState *nc = qemu_get_queue(n->nic);
639

640 641 642 643 644 645
    if (cmd == VIRTIO_NET_CTRL_MAC_ADDR_SET) {
        if (iov_size(iov, iov_cnt) != sizeof(n->mac)) {
            return VIRTIO_NET_ERR;
        }
        s = iov_to_buf(iov, iov_cnt, 0, &n->mac, sizeof(n->mac));
        assert(s == sizeof(n->mac));
J
Jason Wang 已提交
646
        qemu_format_nic_info_str(qemu_get_queue(n->nic), n->mac);
647 648
        rxfilter_notify(nc);

649 650 651
        return VIRTIO_NET_OK;
    }

652
    if (cmd != VIRTIO_NET_CTRL_MAC_TABLE_SET) {
653
        return VIRTIO_NET_ERR;
654
    }
655

656 657 658 659 660
    int in_use = 0;
    int first_multi = 0;
    uint8_t uni_overflow = 0;
    uint8_t multi_overflow = 0;
    uint8_t *macs = g_malloc0(MAC_TABLE_ENTRIES * ETH_ALEN);
661

662 663
    s = iov_to_buf(iov, iov_cnt, 0, &mac_data.entries,
                   sizeof(mac_data.entries));
664
    mac_data.entries = virtio_ldl_p(vdev, &mac_data.entries);
665
    if (s != sizeof(mac_data.entries)) {
666
        goto error;
667 668
    }
    iov_discard_front(&iov, &iov_cnt, s);
669

670
    if (mac_data.entries * ETH_ALEN > iov_size(iov, iov_cnt)) {
671
        goto error;
672
    }
673 674

    if (mac_data.entries <= MAC_TABLE_ENTRIES) {
675
        s = iov_to_buf(iov, iov_cnt, 0, macs,
676 677
                       mac_data.entries * ETH_ALEN);
        if (s != mac_data.entries * ETH_ALEN) {
678
            goto error;
679
        }
680
        in_use += mac_data.entries;
681
    } else {
682
        uni_overflow = 1;
683 684
    }

685 686
    iov_discard_front(&iov, &iov_cnt, mac_data.entries * ETH_ALEN);

687
    first_multi = in_use;
688

689 690
    s = iov_to_buf(iov, iov_cnt, 0, &mac_data.entries,
                   sizeof(mac_data.entries));
691
    mac_data.entries = virtio_ldl_p(vdev, &mac_data.entries);
692
    if (s != sizeof(mac_data.entries)) {
693
        goto error;
694 695 696
    }

    iov_discard_front(&iov, &iov_cnt, s);
697

698
    if (mac_data.entries * ETH_ALEN != iov_size(iov, iov_cnt)) {
699
        goto error;
700
    }
701

702
    if (mac_data.entries <= MAC_TABLE_ENTRIES - in_use) {
703
        s = iov_to_buf(iov, iov_cnt, 0, &macs[in_use * ETH_ALEN],
704 705
                       mac_data.entries * ETH_ALEN);
        if (s != mac_data.entries * ETH_ALEN) {
706
            goto error;
707
        }
708
        in_use += mac_data.entries;
709
    } else {
710
        multi_overflow = 1;
711 712
    }

713 714 715 716 717 718
    n->mac_table.in_use = in_use;
    n->mac_table.first_multi = first_multi;
    n->mac_table.uni_overflow = uni_overflow;
    n->mac_table.multi_overflow = multi_overflow;
    memcpy(n->mac_table.macs, macs, MAC_TABLE_ENTRIES * ETH_ALEN);
    g_free(macs);
719 720
    rxfilter_notify(nc);

721
    return VIRTIO_NET_OK;
722 723

error:
724
    g_free(macs);
725
    return VIRTIO_NET_ERR;
726 727
}

728
static int virtio_net_handle_vlan_table(VirtIONet *n, uint8_t cmd,
729
                                        struct iovec *iov, unsigned int iov_cnt)
730
{
731
    VirtIODevice *vdev = VIRTIO_DEVICE(n);
732
    uint16_t vid;
733
    size_t s;
734
    NetClientState *nc = qemu_get_queue(n->nic);
735

736
    s = iov_to_buf(iov, iov_cnt, 0, &vid, sizeof(vid));
737
    vid = virtio_lduw_p(vdev, &vid);
738
    if (s != sizeof(vid)) {
739 740 741 742 743 744 745 746 747 748 749 750 751
        return VIRTIO_NET_ERR;
    }

    if (vid >= MAX_VLAN)
        return VIRTIO_NET_ERR;

    if (cmd == VIRTIO_NET_CTRL_VLAN_ADD)
        n->vlans[vid >> 5] |= (1U << (vid & 0x1f));
    else if (cmd == VIRTIO_NET_CTRL_VLAN_DEL)
        n->vlans[vid >> 5] &= ~(1U << (vid & 0x1f));
    else
        return VIRTIO_NET_ERR;

752 753
    rxfilter_notify(nc);

754 755 756
    return VIRTIO_NET_OK;
}

J
Jason Wang 已提交
757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773
static int virtio_net_handle_announce(VirtIONet *n, uint8_t cmd,
                                      struct iovec *iov, unsigned int iov_cnt)
{
    if (cmd == VIRTIO_NET_CTRL_ANNOUNCE_ACK &&
        n->status & VIRTIO_NET_S_ANNOUNCE) {
        n->status &= ~VIRTIO_NET_S_ANNOUNCE;
        if (n->announce_counter) {
            timer_mod(n->announce_timer,
                      qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL) +
                      self_announce_delay(n->announce_counter));
        }
        return VIRTIO_NET_OK;
    } else {
        return VIRTIO_NET_ERR;
    }
}

J
Jason Wang 已提交
774
static int virtio_net_handle_mq(VirtIONet *n, uint8_t cmd,
775
                                struct iovec *iov, unsigned int iov_cnt)
J
Jason Wang 已提交
776
{
777
    VirtIODevice *vdev = VIRTIO_DEVICE(n);
778 779 780
    struct virtio_net_ctrl_mq mq;
    size_t s;
    uint16_t queues;
J
Jason Wang 已提交
781

782 783
    s = iov_to_buf(iov, iov_cnt, 0, &mq, sizeof(mq));
    if (s != sizeof(mq)) {
J
Jason Wang 已提交
784 785 786 787 788 789 790
        return VIRTIO_NET_ERR;
    }

    if (cmd != VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET) {
        return VIRTIO_NET_ERR;
    }

791
    queues = virtio_lduw_p(vdev, &mq.virtqueue_pairs);
J
Jason Wang 已提交
792

793 794 795
    if (queues < VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MIN ||
        queues > VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MAX ||
        queues > n->max_queues ||
J
Jason Wang 已提交
796 797 798 799
        !n->multiqueue) {
        return VIRTIO_NET_ERR;
    }

800
    n->curr_queues = queues;
J
Jason Wang 已提交
801 802
    /* stop the backend before changing the number of queues to avoid handling a
     * disabled queue */
803
    virtio_net_set_status(vdev, vdev->status);
J
Jason Wang 已提交
804 805 806 807
    virtio_net_set_queues(n);

    return VIRTIO_NET_OK;
}
808 809
static void virtio_net_handle_ctrl(VirtIODevice *vdev, VirtQueue *vq)
{
810
    VirtIONet *n = VIRTIO_NET(vdev);
811 812 813
    struct virtio_net_ctrl_hdr ctrl;
    virtio_net_ctrl_ack status = VIRTIO_NET_ERR;
    VirtQueueElement elem;
814
    size_t s;
J
Jason Wang 已提交
815
    struct iovec *iov, *iov2;
816
    unsigned int iov_cnt;
817 818

    while (virtqueue_pop(vq, &elem)) {
819 820
        if (iov_size(elem.in_sg, elem.in_num) < sizeof(status) ||
            iov_size(elem.out_sg, elem.out_num) < sizeof(ctrl)) {
821
            error_report("virtio-net ctrl missing headers");
822 823 824
            exit(1);
        }

825
        iov_cnt = elem.out_num;
J
Jason Wang 已提交
826
        iov2 = iov = g_memdup(elem.out_sg, sizeof(struct iovec) * elem.out_num);
827 828 829 830
        s = iov_to_buf(iov, iov_cnt, 0, &ctrl, sizeof(ctrl));
        iov_discard_front(&iov, &iov_cnt, sizeof(ctrl));
        if (s != sizeof(ctrl)) {
            status = VIRTIO_NET_ERR;
A
Amos Kong 已提交
831
        } else if (ctrl.class == VIRTIO_NET_CTRL_RX) {
832 833 834 835 836
            status = virtio_net_handle_rx_mode(n, ctrl.cmd, iov, iov_cnt);
        } else if (ctrl.class == VIRTIO_NET_CTRL_MAC) {
            status = virtio_net_handle_mac(n, ctrl.cmd, iov, iov_cnt);
        } else if (ctrl.class == VIRTIO_NET_CTRL_VLAN) {
            status = virtio_net_handle_vlan_table(n, ctrl.cmd, iov, iov_cnt);
J
Jason Wang 已提交
837 838
        } else if (ctrl.class == VIRTIO_NET_CTRL_ANNOUNCE) {
            status = virtio_net_handle_announce(n, ctrl.cmd, iov, iov_cnt);
J
Jason Wang 已提交
839
        } else if (ctrl.class == VIRTIO_NET_CTRL_MQ) {
840
            status = virtio_net_handle_mq(n, ctrl.cmd, iov, iov_cnt);
841 842
        } else if (ctrl.class == VIRTIO_NET_CTRL_GUEST_OFFLOADS) {
            status = virtio_net_handle_offloads(n, ctrl.cmd, iov, iov_cnt);
843 844
        }

845 846
        s = iov_from_buf(elem.in_sg, elem.in_num, 0, &status, sizeof(status));
        assert(s == sizeof(status));
847 848 849

        virtqueue_push(vq, &elem, sizeof(status));
        virtio_notify(vdev, vq);
J
Jason Wang 已提交
850
        g_free(iov2);
851 852 853
    }
}

A
aliguori 已提交
854 855 856 857
/* RX */

static void virtio_net_handle_rx(VirtIODevice *vdev, VirtQueue *vq)
{
858
    VirtIONet *n = VIRTIO_NET(vdev);
J
Jason Wang 已提交
859
    int queue_index = vq2q(virtio_get_queue_index(vq));
860

J
Jason Wang 已提交
861
    qemu_flush_queued_packets(qemu_get_subqueue(n->nic, queue_index));
A
aliguori 已提交
862 863
}

864
static int virtio_net_can_receive(NetClientState *nc)
A
aliguori 已提交
865
{
J
Jason Wang 已提交
866
    VirtIONet *n = qemu_get_nic_opaque(nc);
867
    VirtIODevice *vdev = VIRTIO_DEVICE(n);
J
Jason Wang 已提交
868
    VirtIONetQueue *q = virtio_net_get_subqueue(nc);
869

870
    if (!vdev->vm_running) {
871 872
        return 0;
    }
873

J
Jason Wang 已提交
874 875 876 877
    if (nc->queue_index >= n->curr_queues) {
        return 0;
    }

878
    if (!virtio_queue_ready(q->rx_vq) ||
879
        !(vdev->status & VIRTIO_CONFIG_S_DRIVER_OK)) {
A
aliguori 已提交
880
        return 0;
881
    }
A
aliguori 已提交
882

883 884 885
    return 1;
}

886
static int virtio_net_has_buffers(VirtIONetQueue *q, int bufsize)
887
{
888 889
    VirtIONet *n = q->n;
    if (virtio_queue_empty(q->rx_vq) ||
A
aliguori 已提交
890
        (n->mergeable_rx_bufs &&
891 892
         !virtqueue_avail_bytes(q->rx_vq, bufsize, 0))) {
        virtio_queue_set_notification(q->rx_vq, 1);
893 894 895 896 897

        /* To avoid a race condition where the guest has made some buffers
         * available after the above check but before notification was
         * enabled, check for available buffers again.
         */
898
        if (virtio_queue_empty(q->rx_vq) ||
899
            (n->mergeable_rx_bufs &&
900
             !virtqueue_avail_bytes(q->rx_vq, bufsize, 0))) {
901
            return 0;
902
        }
A
aliguori 已提交
903 904
    }

905
    virtio_queue_set_notification(q->rx_vq, 0);
A
aliguori 已提交
906 907 908
    return 1;
}

909
static void virtio_net_hdr_swap(VirtIODevice *vdev, struct virtio_net_hdr *hdr)
910
{
911 912 913 914
    virtio_tswap16s(vdev, &hdr->hdr_len);
    virtio_tswap16s(vdev, &hdr->gso_size);
    virtio_tswap16s(vdev, &hdr->csum_start);
    virtio_tswap16s(vdev, &hdr->csum_offset);
915 916
}

A
Anthony Liguori 已提交
917 918 919 920 921 922 923 924 925 926 927 928 929 930 931
/* dhclient uses AF_PACKET but doesn't pass auxdata to the kernel so
 * it never finds out that the packets don't have valid checksums.  This
 * causes dhclient to get upset.  Fedora's carried a patch for ages to
 * fix this with Xen but it hasn't appeared in an upstream release of
 * dhclient yet.
 *
 * To avoid breaking existing guests, we catch udp packets and add
 * checksums.  This is terrible but it's better than hacking the guest
 * kernels.
 *
 * N.B. if we introduce a zero-copy API, this operation is no longer free so
 * we should provide a mechanism to disable it to avoid polluting the host
 * cache.
 */
static void work_around_broken_dhclient(struct virtio_net_hdr *hdr,
M
Michael S. Tsirkin 已提交
932
                                        uint8_t *buf, size_t size)
A
Anthony Liguori 已提交
933 934 935 936 937 938
{
    if ((hdr->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM) && /* missing csum */
        (size > 27 && size < 1500) && /* normal sized MTU */
        (buf[12] == 0x08 && buf[13] == 0x00) && /* ethertype == IPv4 */
        (buf[23] == 17) && /* ip.protocol == UDP */
        (buf[34] == 0 && buf[35] == 67)) { /* udp.srcport == bootps */
M
Michael S. Tsirkin 已提交
939
        net_checksum_calculate(buf, size);
A
Anthony Liguori 已提交
940 941 942 943
        hdr->flags &= ~VIRTIO_NET_HDR_F_NEEDS_CSUM;
    }
}

944 945
static void receive_header(VirtIONet *n, const struct iovec *iov, int iov_cnt,
                           const void *buf, size_t size)
A
aliguori 已提交
946
{
M
Mark McLoughlin 已提交
947
    if (n->has_vnet_hdr) {
M
Michael S. Tsirkin 已提交
948 949
        /* FIXME this cast is evil */
        void *wbuf = (void *)buf;
950 951
        work_around_broken_dhclient(wbuf, wbuf + n->host_hdr_len,
                                    size - n->host_hdr_len);
952
        virtio_net_hdr_swap(VIRTIO_DEVICE(n), wbuf);
953
        iov_from_buf(iov, iov_cnt, 0, buf, sizeof(struct virtio_net_hdr));
M
Michael S. Tsirkin 已提交
954 955 956 957 958 959
    } else {
        struct virtio_net_hdr hdr = {
            .flags = 0,
            .gso_type = VIRTIO_NET_HDR_GSO_NONE
        };
        iov_from_buf(iov, iov_cnt, 0, &hdr, sizeof hdr);
M
Mark McLoughlin 已提交
960
    }
A
aliguori 已提交
961 962
}

963 964 965
static int receive_filter(VirtIONet *n, const uint8_t *buf, int size)
{
    static const uint8_t bcast[] = {0xff, 0xff, 0xff, 0xff, 0xff, 0xff};
966
    static const uint8_t vlan[] = {0x81, 0x00};
967
    uint8_t *ptr = (uint8_t *)buf;
968
    int i;
969 970 971 972

    if (n->promisc)
        return 1;

973
    ptr += n->host_hdr_len;
M
Mark McLoughlin 已提交
974

975 976 977 978 979 980
    if (!memcmp(&ptr[12], vlan, sizeof(vlan))) {
        int vid = be16_to_cpup((uint16_t *)(ptr + 14)) & 0xfff;
        if (!(n->vlans[vid >> 5] & (1U << (vid & 0x1f))))
            return 0;
    }

981 982
    if (ptr[0] & 1) { // multicast
        if (!memcmp(ptr, bcast, sizeof(bcast))) {
983 984 985
            return !n->nobcast;
        } else if (n->nomulti) {
            return 0;
986
        } else if (n->allmulti || n->mac_table.multi_overflow) {
987 988
            return 1;
        }
989 990 991 992 993 994

        for (i = n->mac_table.first_multi; i < n->mac_table.in_use; i++) {
            if (!memcmp(ptr, &n->mac_table.macs[i * ETH_ALEN], ETH_ALEN)) {
                return 1;
            }
        }
995
    } else { // unicast
996 997 998
        if (n->nouni) {
            return 0;
        } else if (n->alluni || n->mac_table.uni_overflow) {
999 1000
            return 1;
        } else if (!memcmp(ptr, n->mac, ETH_ALEN)) {
1001 1002
            return 1;
        }
1003

1004 1005 1006 1007 1008
        for (i = 0; i < n->mac_table.first_multi; i++) {
            if (!memcmp(ptr, &n->mac_table.macs[i * ETH_ALEN], ETH_ALEN)) {
                return 1;
            }
        }
1009 1010
    }

1011 1012 1013
    return 0;
}

1014
static ssize_t virtio_net_receive(NetClientState *nc, const uint8_t *buf, size_t size)
A
aliguori 已提交
1015
{
J
Jason Wang 已提交
1016
    VirtIONet *n = qemu_get_nic_opaque(nc);
J
Jason Wang 已提交
1017
    VirtIONetQueue *q = virtio_net_get_subqueue(nc);
1018
    VirtIODevice *vdev = VIRTIO_DEVICE(n);
1019 1020 1021
    struct iovec mhdr_sg[VIRTQUEUE_MAX_SIZE];
    struct virtio_net_hdr_mrg_rxbuf mhdr;
    unsigned mhdr_cnt = 0;
M
Michael S. Tsirkin 已提交
1022
    size_t offset, i, guest_offset;
A
aliguori 已提交
1023

J
Jason Wang 已提交
1024
    if (!virtio_net_can_receive(nc)) {
1025
        return -1;
J
Jason Wang 已提交
1026
    }
1027

1028
    /* hdr_len refers to the header we supply to the guest */
1029
    if (!virtio_net_has_buffers(q, size + n->guest_hdr_len - n->host_hdr_len)) {
1030
        return 0;
1031
    }
A
aliguori 已提交
1032

1033
    if (!receive_filter(n, buf, size))
1034
        return size;
1035

A
aliguori 已提交
1036 1037 1038 1039 1040
    offset = i = 0;

    while (offset < size) {
        VirtQueueElement elem;
        int len, total;
M
Michael S. Tsirkin 已提交
1041
        const struct iovec *sg = elem.in_sg;
A
aliguori 已提交
1042

A
Amit Shah 已提交
1043
        total = 0;
A
aliguori 已提交
1044

1045
        if (virtqueue_pop(q->rx_vq, &elem) == 0) {
A
aliguori 已提交
1046
            if (i == 0)
1047
                return -1;
1048
            error_report("virtio-net unexpected empty queue: "
G
Gerd Hoffmann 已提交
1049 1050 1051 1052 1053 1054
                         "i %zd mergeable %d offset %zd, size %zd, "
                         "guest hdr len %zd, host hdr len %zd "
                         "guest features 0x%" PRIx64,
                         i, n->mergeable_rx_bufs, offset, size,
                         n->guest_hdr_len, n->host_hdr_len,
                         vdev->guest_features);
A
aliguori 已提交
1055 1056 1057 1058
            exit(1);
        }

        if (elem.in_num < 1) {
1059
            error_report("virtio-net receive queue contains no in buffers");
A
aliguori 已提交
1060 1061 1062 1063
            exit(1);
        }

        if (i == 0) {
1064
            assert(offset == 0);
1065 1066 1067 1068 1069 1070
            if (n->mergeable_rx_bufs) {
                mhdr_cnt = iov_copy(mhdr_sg, ARRAY_SIZE(mhdr_sg),
                                    sg, elem.in_num,
                                    offsetof(typeof(mhdr), num_buffers),
                                    sizeof(mhdr.num_buffers));
            }
A
aliguori 已提交
1071

1072 1073
            receive_header(n, sg, elem.in_num, buf, size);
            offset = n->host_hdr_len;
1074
            total += n->guest_hdr_len;
M
Michael S. Tsirkin 已提交
1075 1076 1077
            guest_offset = n->guest_hdr_len;
        } else {
            guest_offset = 0;
A
aliguori 已提交
1078 1079 1080
        }

        /* copy in packet.  ugh */
M
Michael S. Tsirkin 已提交
1081
        len = iov_from_buf(sg, elem.in_num, guest_offset,
1082
                           buf + offset, size - offset);
A
aliguori 已提交
1083
        total += len;
1084 1085 1086 1087 1088 1089
        offset += len;
        /* If buffers can't be merged, at this point we
         * must have consumed the complete packet.
         * Otherwise, drop it. */
        if (!n->mergeable_rx_bufs && offset < size) {
#if 0
1090 1091 1092 1093
            error_report("virtio-net truncated non-mergeable packet: "
                         "i %zd mergeable %d offset %zd, size %zd, "
                         "guest hdr len %zd, host hdr len %zd",
                         i, n->mergeable_rx_bufs,
1094
                         offset, size, n->guest_hdr_len, n->host_hdr_len);
1095 1096 1097
#endif
            return size;
        }
A
aliguori 已提交
1098 1099

        /* signal other side */
1100
        virtqueue_fill(q->rx_vq, &elem, total, i++);
A
aliguori 已提交
1101 1102
    }

1103
    if (mhdr_cnt) {
1104
        virtio_stw_p(vdev, &mhdr.num_buffers, i);
1105 1106 1107
        iov_from_buf(mhdr_sg, mhdr_cnt,
                     0,
                     &mhdr.num_buffers, sizeof mhdr.num_buffers);
1108
    }
A
aliguori 已提交
1109

1110
    virtqueue_flush(q->rx_vq, i);
1111
    virtio_notify(vdev, q->rx_vq);
1112 1113

    return size;
A
aliguori 已提交
1114 1115
}

1116
static int32_t virtio_net_flush_tx(VirtIONetQueue *q);
1117

1118
static void virtio_net_tx_complete(NetClientState *nc, ssize_t len)
1119
{
J
Jason Wang 已提交
1120
    VirtIONet *n = qemu_get_nic_opaque(nc);
J
Jason Wang 已提交
1121
    VirtIONetQueue *q = virtio_net_get_subqueue(nc);
1122
    VirtIODevice *vdev = VIRTIO_DEVICE(n);
1123

1124
    virtqueue_push(q->tx_vq, &q->async_tx.elem, 0);
1125
    virtio_notify(vdev, q->tx_vq);
1126

J
Jason Wang 已提交
1127
    q->async_tx.elem.out_num = 0;
1128

1129 1130
    virtio_queue_set_notification(q->tx_vq, 1);
    virtio_net_flush_tx(q);
1131 1132
}

A
aliguori 已提交
1133
/* TX */
1134
static int32_t virtio_net_flush_tx(VirtIONetQueue *q)
A
aliguori 已提交
1135
{
1136
    VirtIONet *n = q->n;
1137
    VirtIODevice *vdev = VIRTIO_DEVICE(n);
A
aliguori 已提交
1138
    VirtQueueElement elem;
1139
    int32_t num_packets = 0;
J
Jason Wang 已提交
1140
    int queue_index = vq2q(virtio_get_queue_index(q->tx_vq));
1141
    if (!(vdev->status & VIRTIO_CONFIG_S_DRIVER_OK)) {
1142 1143
        return num_packets;
    }
A
aliguori 已提交
1144

1145 1146
    if (q->async_tx.elem.out_num) {
        virtio_queue_set_notification(q->tx_vq, 0);
1147
        return num_packets;
1148 1149
    }

1150
    while (virtqueue_pop(q->tx_vq, &elem)) {
J
Jason Wang 已提交
1151
        ssize_t ret;
A
aliguori 已提交
1152 1153
        unsigned int out_num = elem.out_num;
        struct iovec *out_sg = &elem.out_sg[0];
J
Jason Wang 已提交
1154 1155
        struct iovec sg[VIRTQUEUE_MAX_SIZE], sg2[VIRTQUEUE_MAX_SIZE + 1];
        struct virtio_net_hdr_mrg_rxbuf mhdr;
A
aliguori 已提交
1156

M
Michael S. Tsirkin 已提交
1157
        if (out_num < 1) {
1158
            error_report("virtio-net header not in first element");
A
aliguori 已提交
1159 1160 1161
            exit(1);
        }

1162
        if (n->has_vnet_hdr) {
J
Jason Wang 已提交
1163 1164
            if (iov_to_buf(out_sg, out_num, 0, &mhdr, n->guest_hdr_len) <
                n->guest_hdr_len) {
1165 1166 1167
                error_report("virtio-net header incorrect");
                exit(1);
            }
J
Jason Wang 已提交
1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180
            if (virtio_needs_swap(vdev)) {
                virtio_net_hdr_swap(vdev, (void *) &mhdr);
                sg2[0].iov_base = &mhdr;
                sg2[0].iov_len = n->guest_hdr_len;
                out_num = iov_copy(&sg2[1], ARRAY_SIZE(sg2) - 1,
                                   out_sg, out_num,
                                   n->guest_hdr_len, -1);
                if (out_num == VIRTQUEUE_MAX_SIZE) {
                    goto drop;
		}
                out_num += 1;
                out_sg = sg2;
	    }
1181
        }
1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196
        /*
         * If host wants to see the guest header as is, we can
         * pass it on unchanged. Otherwise, copy just the parts
         * that host is interested in.
         */
        assert(n->host_hdr_len <= n->guest_hdr_len);
        if (n->host_hdr_len != n->guest_hdr_len) {
            unsigned sg_num = iov_copy(sg, ARRAY_SIZE(sg),
                                       out_sg, out_num,
                                       0, n->host_hdr_len);
            sg_num += iov_copy(sg + sg_num, ARRAY_SIZE(sg) - sg_num,
                             out_sg, out_num,
                             n->guest_hdr_len, -1);
            out_num = sg_num;
            out_sg = sg;
A
aliguori 已提交
1197 1198
        }

J
Jason Wang 已提交
1199 1200
        ret = qemu_sendv_packet_async(qemu_get_subqueue(n->nic, queue_index),
                                      out_sg, out_num, virtio_net_tx_complete);
1201
        if (ret == 0) {
1202 1203
            virtio_queue_set_notification(q->tx_vq, 0);
            q->async_tx.elem = elem;
1204
            return -EBUSY;
1205 1206
        }

J
Jason Wang 已提交
1207
drop:
1208
        virtqueue_push(q->tx_vq, &elem, 0);
1209
        virtio_notify(vdev, q->tx_vq);
1210 1211 1212 1213

        if (++num_packets >= n->tx_burst) {
            break;
        }
A
aliguori 已提交
1214
    }
1215
    return num_packets;
A
aliguori 已提交
1216 1217
}

1218
static void virtio_net_handle_tx_timer(VirtIODevice *vdev, VirtQueue *vq)
A
aliguori 已提交
1219
{
1220
    VirtIONet *n = VIRTIO_NET(vdev);
J
Jason Wang 已提交
1221
    VirtIONetQueue *q = &n->vqs[vq2q(virtio_get_queue_index(vq))];
A
aliguori 已提交
1222

1223
    /* This happens when device was stopped but VCPU wasn't. */
1224
    if (!vdev->vm_running) {
1225
        q->tx_waiting = 1;
1226 1227 1228
        return;
    }

1229
    if (q->tx_waiting) {
A
aliguori 已提交
1230
        virtio_queue_set_notification(vq, 1);
1231
        timer_del(q->tx_timer);
1232 1233
        q->tx_waiting = 0;
        virtio_net_flush_tx(q);
A
aliguori 已提交
1234
    } else {
1235 1236
        timer_mod(q->tx_timer,
                       qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + n->tx_timeout);
1237
        q->tx_waiting = 1;
A
aliguori 已提交
1238 1239 1240 1241
        virtio_queue_set_notification(vq, 0);
    }
}

1242 1243
static void virtio_net_handle_tx_bh(VirtIODevice *vdev, VirtQueue *vq)
{
1244
    VirtIONet *n = VIRTIO_NET(vdev);
J
Jason Wang 已提交
1245
    VirtIONetQueue *q = &n->vqs[vq2q(virtio_get_queue_index(vq))];
1246

1247
    if (unlikely(q->tx_waiting)) {
1248 1249
        return;
    }
1250
    q->tx_waiting = 1;
1251
    /* This happens when device was stopped but VCPU wasn't. */
1252
    if (!vdev->vm_running) {
1253 1254
        return;
    }
1255
    virtio_queue_set_notification(vq, 0);
1256
    qemu_bh_schedule(q->tx_bh);
1257 1258
}

A
aliguori 已提交
1259 1260
static void virtio_net_tx_timer(void *opaque)
{
1261 1262
    VirtIONetQueue *q = opaque;
    VirtIONet *n = q->n;
1263
    VirtIODevice *vdev = VIRTIO_DEVICE(n);
1264 1265 1266 1267 1268 1269
    /* This happens when device was stopped but BH wasn't. */
    if (!vdev->vm_running) {
        /* Make sure tx waiting is set, so we'll run when restarted. */
        assert(q->tx_waiting);
        return;
    }
A
aliguori 已提交
1270

1271
    q->tx_waiting = 0;
A
aliguori 已提交
1272 1273

    /* Just in case the driver is not ready on more */
1274
    if (!(vdev->status & VIRTIO_CONFIG_S_DRIVER_OK)) {
A
aliguori 已提交
1275
        return;
1276
    }
A
aliguori 已提交
1277

1278 1279
    virtio_queue_set_notification(q->tx_vq, 1);
    virtio_net_flush_tx(q);
A
aliguori 已提交
1280 1281
}

1282 1283
static void virtio_net_tx_bh(void *opaque)
{
1284 1285
    VirtIONetQueue *q = opaque;
    VirtIONet *n = q->n;
1286
    VirtIODevice *vdev = VIRTIO_DEVICE(n);
1287 1288
    int32_t ret;

1289 1290 1291 1292 1293 1294
    /* This happens when device was stopped but BH wasn't. */
    if (!vdev->vm_running) {
        /* Make sure tx waiting is set, so we'll run when restarted. */
        assert(q->tx_waiting);
        return;
    }
1295

1296
    q->tx_waiting = 0;
1297 1298

    /* Just in case the driver is not ready on more */
1299
    if (unlikely(!(vdev->status & VIRTIO_CONFIG_S_DRIVER_OK))) {
1300
        return;
1301
    }
1302

1303
    ret = virtio_net_flush_tx(q);
1304 1305 1306 1307 1308 1309 1310
    if (ret == -EBUSY) {
        return; /* Notification re-enable handled by tx_complete */
    }

    /* If we flush a full burst of packets, assume there are
     * more coming and immediately reschedule */
    if (ret >= n->tx_burst) {
1311 1312
        qemu_bh_schedule(q->tx_bh);
        q->tx_waiting = 1;
1313 1314 1315 1316 1317 1318
        return;
    }

    /* If less than a full burst, re-enable notification and flush
     * anything that may have come in while we weren't looking.  If
     * we find something, assume the guest is still active and reschedule */
1319 1320 1321 1322 1323
    virtio_queue_set_notification(q->tx_vq, 1);
    if (virtio_net_flush_tx(q) > 0) {
        virtio_queue_set_notification(q->tx_vq, 0);
        qemu_bh_schedule(q->tx_bh);
        q->tx_waiting = 1;
1324 1325 1326
    }
}

1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400
static void virtio_net_add_queue(VirtIONet *n, int index)
{
    VirtIODevice *vdev = VIRTIO_DEVICE(n);

    n->vqs[index].rx_vq = virtio_add_queue(vdev, 256, virtio_net_handle_rx);
    if (n->net_conf.tx && !strcmp(n->net_conf.tx, "timer")) {
        n->vqs[index].tx_vq =
            virtio_add_queue(vdev, 256, virtio_net_handle_tx_timer);
        n->vqs[index].tx_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL,
                                              virtio_net_tx_timer,
                                              &n->vqs[index]);
    } else {
        n->vqs[index].tx_vq =
            virtio_add_queue(vdev, 256, virtio_net_handle_tx_bh);
        n->vqs[index].tx_bh = qemu_bh_new(virtio_net_tx_bh, &n->vqs[index]);
    }

    n->vqs[index].tx_waiting = 0;
    n->vqs[index].n = n;
}

static void virtio_net_del_queue(VirtIONet *n, int index)
{
    VirtIODevice *vdev = VIRTIO_DEVICE(n);
    VirtIONetQueue *q = &n->vqs[index];
    NetClientState *nc = qemu_get_subqueue(n->nic, index);

    qemu_purge_queued_packets(nc);

    virtio_del_queue(vdev, index * 2);
    if (q->tx_timer) {
        timer_del(q->tx_timer);
        timer_free(q->tx_timer);
    } else {
        qemu_bh_delete(q->tx_bh);
    }
    virtio_del_queue(vdev, index * 2 + 1);
}

static void virtio_net_change_num_queues(VirtIONet *n, int new_max_queues)
{
    VirtIODevice *vdev = VIRTIO_DEVICE(n);
    int old_num_queues = virtio_get_num_queues(vdev);
    int new_num_queues = new_max_queues * 2 + 1;
    int i;

    assert(old_num_queues >= 3);
    assert(old_num_queues % 2 == 1);

    if (old_num_queues == new_num_queues) {
        return;
    }

    /*
     * We always need to remove and add ctrl vq if
     * old_num_queues != new_num_queues. Remove ctrl_vq first,
     * and then we only enter one of the following too loops.
     */
    virtio_del_queue(vdev, old_num_queues - 1);

    for (i = new_num_queues - 1; i < old_num_queues - 1; i += 2) {
        /* new_num_queues < old_num_queues */
        virtio_net_del_queue(n, i / 2);
    }

    for (i = old_num_queues - 1; i < new_num_queues - 1; i += 2) {
        /* new_num_queues > old_num_queues */
        virtio_net_add_queue(n, i / 2);
    }

    /* add ctrl_vq last */
    n->ctrl_vq = virtio_add_queue(vdev, 64, virtio_net_handle_ctrl);
}

J
Jason Wang 已提交
1401
static void virtio_net_set_multiqueue(VirtIONet *n, int multiqueue)
J
Jason Wang 已提交
1402
{
1403 1404
    int max = multiqueue ? n->max_queues : 1;

J
Jason Wang 已提交
1405
    n->multiqueue = multiqueue;
1406
    virtio_net_change_num_queues(n, max);
J
Jason Wang 已提交
1407 1408 1409 1410

    virtio_net_set_queues(n);
}

A
aliguori 已提交
1411 1412 1413
static void virtio_net_save(QEMUFile *f, void *opaque)
{
    VirtIONet *n = opaque;
1414
    VirtIODevice *vdev = VIRTIO_DEVICE(n);
A
aliguori 已提交
1415

1416 1417 1418
    /* At this point, backend must be stopped, otherwise
     * it might keep writing to memory. */
    assert(!n->vhost_started);
1419
    virtio_save(vdev, f);
1420 1421 1422 1423 1424 1425
}

static void virtio_net_save_device(VirtIODevice *vdev, QEMUFile *f)
{
    VirtIONet *n = VIRTIO_NET(vdev);
    int i;
A
aliguori 已提交
1426

1427
    qemu_put_buffer(f, n->mac, ETH_ALEN);
1428
    qemu_put_be32(f, n->vqs[0].tx_waiting);
1429
    qemu_put_be32(f, n->mergeable_rx_bufs);
1430
    qemu_put_be16(f, n->status);
1431 1432
    qemu_put_byte(f, n->promisc);
    qemu_put_byte(f, n->allmulti);
1433 1434
    qemu_put_be32(f, n->mac_table.in_use);
    qemu_put_buffer(f, n->mac_table.macs, n->mac_table.in_use * ETH_ALEN);
1435
    qemu_put_buffer(f, (uint8_t *)n->vlans, MAX_VLAN >> 3);
M
Mark McLoughlin 已提交
1436
    qemu_put_be32(f, n->has_vnet_hdr);
1437 1438
    qemu_put_byte(f, n->mac_table.multi_overflow);
    qemu_put_byte(f, n->mac_table.uni_overflow);
1439 1440 1441 1442
    qemu_put_byte(f, n->alluni);
    qemu_put_byte(f, n->nomulti);
    qemu_put_byte(f, n->nouni);
    qemu_put_byte(f, n->nobcast);
1443
    qemu_put_byte(f, n->has_ufo);
1444 1445 1446 1447 1448 1449 1450
    if (n->max_queues > 1) {
        qemu_put_be16(f, n->max_queues);
        qemu_put_be16(f, n->curr_queues);
        for (i = 1; i < n->curr_queues; i++) {
            qemu_put_be32(f, n->vqs[i].tx_waiting);
        }
    }
1451

1452
    if (virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_GUEST_OFFLOADS)) {
1453 1454
        qemu_put_be64(f, n->curr_guest_offloads);
    }
A
aliguori 已提交
1455 1456 1457 1458 1459
}

static int virtio_net_load(QEMUFile *f, void *opaque, int version_id)
{
    VirtIONet *n = opaque;
1460
    VirtIODevice *vdev = VIRTIO_DEVICE(n);
1461
    int ret;
A
aliguori 已提交
1462

1463
    if (version_id < 2 || version_id > VIRTIO_NET_VM_VERSION)
A
aliguori 已提交
1464 1465
        return -EINVAL;

1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487
    ret = virtio_load(vdev, f, version_id);
    if (ret) {
        return ret;
    }

    if (virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_GUEST_OFFLOADS)) {
        n->curr_guest_offloads = qemu_get_be64(f);
    } else {
        n->curr_guest_offloads = virtio_net_supported_guest_offloads(n);
    }

    if (peer_has_vnet_hdr(n)) {
        virtio_net_apply_guest_offloads(n);
    }

    if (virtio_vdev_has_feature(vdev, VIRTIO_NET_F_GUEST_ANNOUNCE) &&
        virtio_vdev_has_feature(vdev, VIRTIO_NET_F_CTRL_VQ)) {
        n->announce_counter = SELF_ANNOUNCE_ROUNDS;
        timer_mod(n->announce_timer, qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL));
    }

    return 0;
1488 1489 1490 1491 1492 1493 1494
}

static int virtio_net_load_device(VirtIODevice *vdev, QEMUFile *f,
                                  int version_id)
{
    VirtIONet *n = VIRTIO_NET(vdev);
    int i, link_down;
A
aliguori 已提交
1495

1496
    qemu_get_buffer(f, n->mac, ETH_ALEN);
1497
    n->vqs[0].tx_waiting = qemu_get_be32(f);
1498

1499
    virtio_net_set_mrg_rx_bufs(n, qemu_get_be32(f),
1500 1501
                               virtio_vdev_has_feature(vdev,
                                                       VIRTIO_F_VERSION_1));
A
aliguori 已提交
1502

1503 1504 1505
    if (version_id >= 3)
        n->status = qemu_get_be16(f);

1506
    if (version_id >= 4) {
1507 1508 1509 1510 1511 1512 1513
        if (version_id < 8) {
            n->promisc = qemu_get_be32(f);
            n->allmulti = qemu_get_be32(f);
        } else {
            n->promisc = qemu_get_byte(f);
            n->allmulti = qemu_get_byte(f);
        }
1514 1515
    }

1516 1517 1518 1519 1520 1521
    if (version_id >= 5) {
        n->mac_table.in_use = qemu_get_be32(f);
        /* MAC_TABLE_ENTRIES may be different from the saved image */
        if (n->mac_table.in_use <= MAC_TABLE_ENTRIES) {
            qemu_get_buffer(f, n->mac_table.macs,
                            n->mac_table.in_use * ETH_ALEN);
1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532
        } else {
            int64_t i;

            /* Overflow detected - can happen if source has a larger MAC table.
             * We simply set overflow flag so there's no need to maintain the
             * table of addresses, discard them all.
             * Note: 64 bit math to avoid integer overflow.
             */
            for (i = 0; i < (int64_t)n->mac_table.in_use * ETH_ALEN; ++i) {
                qemu_get_byte(f);
            }
1533
            n->mac_table.multi_overflow = n->mac_table.uni_overflow = 1;
1534 1535 1536 1537
            n->mac_table.in_use = 0;
        }
    }
 
1538 1539 1540
    if (version_id >= 6)
        qemu_get_buffer(f, (uint8_t *)n->vlans, MAX_VLAN >> 3);

M
Mark McLoughlin 已提交
1541 1542
    if (version_id >= 7) {
        if (qemu_get_be32(f) && !peer_has_vnet_hdr(n)) {
1543
            error_report("virtio-net: saved image requires vnet_hdr=on");
M
Mark McLoughlin 已提交
1544 1545
            return -1;
        }
1546 1547
    }

1548 1549 1550 1551 1552
    if (version_id >= 9) {
        n->mac_table.multi_overflow = qemu_get_byte(f);
        n->mac_table.uni_overflow = qemu_get_byte(f);
    }

1553 1554 1555 1556 1557 1558 1559
    if (version_id >= 10) {
        n->alluni = qemu_get_byte(f);
        n->nomulti = qemu_get_byte(f);
        n->nouni = qemu_get_byte(f);
        n->nobcast = qemu_get_byte(f);
    }

1560 1561
    if (version_id >= 11) {
        if (qemu_get_byte(f) && !peer_has_ufo(n)) {
1562
            error_report("virtio-net: saved image requires TUN_F_UFO support");
1563 1564 1565 1566
            return -1;
        }
    }

1567 1568 1569 1570 1571 1572 1573
    if (n->max_queues > 1) {
        if (n->max_queues != qemu_get_be16(f)) {
            error_report("virtio-net: different max_queues ");
            return -1;
        }

        n->curr_queues = qemu_get_be16(f);
1574 1575 1576 1577 1578
        if (n->curr_queues > n->max_queues) {
            error_report("virtio-net: curr_queues %x > max_queues %x",
                         n->curr_queues, n->max_queues);
            return -1;
        }
1579 1580 1581 1582 1583 1584 1585
        for (i = 1; i < n->curr_queues; i++) {
            n->vqs[i].tx_waiting = qemu_get_be32(f);
        }
    }

    virtio_net_set_queues(n);

1586 1587 1588 1589 1590 1591 1592
    /* Find the first multicast entry in the saved MAC filter */
    for (i = 0; i < n->mac_table.in_use; i++) {
        if (n->mac_table.macs[i * ETH_ALEN] & 1) {
            break;
        }
    }
    n->mac_table.first_multi = i;
1593 1594 1595

    /* nc.link_down can't be migrated, so infer link_down according
     * to link status bit in n->status */
1596 1597 1598 1599
    link_down = (n->status & VIRTIO_NET_S_LINK_UP) == 0;
    for (i = 0; i < n->max_queues; i++) {
        qemu_get_subqueue(n->nic, i)->link_down = link_down;
    }
1600

A
aliguori 已提交
1601 1602 1603
    return 0;
}

M
Mark McLoughlin 已提交
1604
static NetClientInfo net_virtio_info = {
1605
    .type = NET_CLIENT_OPTIONS_KIND_NIC,
M
Mark McLoughlin 已提交
1606 1607 1608 1609
    .size = sizeof(NICState),
    .can_receive = virtio_net_can_receive,
    .receive = virtio_net_receive,
    .link_status_changed = virtio_net_set_link_status,
1610
    .query_rx_filter = virtio_net_query_rxfilter,
M
Mark McLoughlin 已提交
1611 1612
};

1613 1614
static bool virtio_net_guest_notifier_pending(VirtIODevice *vdev, int idx)
{
1615
    VirtIONet *n = VIRTIO_NET(vdev);
J
Jason Wang 已提交
1616
    NetClientState *nc = qemu_get_subqueue(n->nic, vq2q(idx));
1617
    assert(n->vhost_started);
1618
    return vhost_net_virtqueue_pending(get_vhost_net(nc->peer), idx);
1619 1620 1621 1622 1623
}

static void virtio_net_guest_notifier_mask(VirtIODevice *vdev, int idx,
                                           bool mask)
{
1624
    VirtIONet *n = VIRTIO_NET(vdev);
J
Jason Wang 已提交
1625
    NetClientState *nc = qemu_get_subqueue(n->nic, vq2q(idx));
1626
    assert(n->vhost_started);
1627
    vhost_net_virtqueue_mask(get_vhost_net(nc->peer),
1628 1629 1630
                             vdev, idx, mask);
}

G
Gerd Hoffmann 已提交
1631
static void virtio_net_set_config_size(VirtIONet *n, uint64_t host_features)
A
aliguori 已提交
1632
{
1633
    int i, config_size = 0;
1634
    virtio_add_feature(&host_features, VIRTIO_NET_F_MAC);
1635 1636 1637 1638 1639
    for (i = 0; feature_sizes[i].flags != 0; i++) {
        if (host_features & feature_sizes[i].flags) {
            config_size = MAX(feature_sizes[i].end, config_size);
        }
    }
1640 1641 1642
    n->config_size = config_size;
}

1643 1644 1645 1646 1647 1648 1649 1650
void virtio_net_set_netclient_name(VirtIONet *n, const char *name,
                                   const char *type)
{
    /*
     * The name can be NULL, the netclient name will be type.x.
     */
    assert(type != NULL);

1651 1652
    g_free(n->netclient_name);
    g_free(n->netclient_type);
1653
    n->netclient_name = g_strdup(name);
1654 1655 1656
    n->netclient_type = g_strdup(type);
}

1657
static void virtio_net_device_realize(DeviceState *dev, Error **errp)
1658
{
1659
    VirtIODevice *vdev = VIRTIO_DEVICE(dev);
1660
    VirtIONet *n = VIRTIO_NET(dev);
1661
    NetClientState *nc;
1662
    int i;
1663

1664
    virtio_net_set_config_size(n, n->host_features);
1665
    virtio_init(vdev, "virtio-net", VIRTIO_ID_NET, n->config_size);
A
aliguori 已提交
1666

1667
    n->max_queues = MAX(n->nic_conf.peers.queues, 1);
1668
    if (n->max_queues * 2 + 1 > VIRTIO_QUEUE_MAX) {
1669
        error_setg(errp, "Invalid number of queues (= %" PRIu32 "), "
S
Stefan Weil 已提交
1670
                   "must be a positive integer less than %d.",
1671
                   n->max_queues, (VIRTIO_QUEUE_MAX - 1) / 2);
1672 1673 1674
        virtio_cleanup(vdev);
        return;
    }
1675
    n->vqs = g_malloc0(sizeof(VirtIONetQueue) * n->max_queues);
J
Jason Wang 已提交
1676
    n->curr_queues = 1;
1677
    n->tx_timeout = n->net_conf.txtimer;
1678

1679 1680
    if (n->net_conf.tx && strcmp(n->net_conf.tx, "timer")
                       && strcmp(n->net_conf.tx, "bh")) {
1681 1682
        error_report("virtio-net: "
                     "Unknown option tx=%s, valid options: \"timer\" \"bh\"",
1683
                     n->net_conf.tx);
1684
        error_report("Defaulting to \"bh\"");
1685 1686
    }

1687
    for (i = 0; i < n->max_queues; i++) {
1688
        virtio_net_add_queue(n, i);
1689
    }
1690

1691
    n->ctrl_vq = virtio_add_queue(vdev, 64, virtio_net_handle_ctrl);
1692 1693
    qemu_macaddr_default_if_unset(&n->nic_conf.macaddr);
    memcpy(&n->mac[0], &n->nic_conf.macaddr, sizeof(n->mac));
1694
    n->status = VIRTIO_NET_S_LINK_UP;
J
Jason Wang 已提交
1695 1696
    n->announce_timer = timer_new_ms(QEMU_CLOCK_VIRTUAL,
                                     virtio_net_announce_timer, n);
A
aliguori 已提交
1697

1698 1699 1700 1701 1702 1703 1704 1705
    if (n->netclient_type) {
        /*
         * Happen when virtio_net_set_netclient_name has been called.
         */
        n->nic = qemu_new_nic(&net_virtio_info, &n->nic_conf,
                              n->netclient_type, n->netclient_name, n);
    } else {
        n->nic = qemu_new_nic(&net_virtio_info, &n->nic_conf,
1706
                              object_get_typename(OBJECT(dev)), dev->id, n);
1707 1708
    }

1709 1710
    peer_test_vnet_hdr(n);
    if (peer_has_vnet_hdr(n)) {
J
Jason Wang 已提交
1711
        for (i = 0; i < n->max_queues; i++) {
1712
            qemu_using_vnet_hdr(qemu_get_subqueue(n->nic, i)->peer, true);
J
Jason Wang 已提交
1713
        }
1714 1715 1716 1717
        n->host_hdr_len = sizeof(struct virtio_net_hdr);
    } else {
        n->host_hdr_len = 0;
    }
M
Mark McLoughlin 已提交
1718

1719
    qemu_format_nic_info_str(qemu_get_queue(n->nic), n->nic_conf.macaddr.a);
1720

J
Jason Wang 已提交
1721
    n->vqs[0].tx_waiting = 0;
1722
    n->tx_burst = n->net_conf.txburst;
1723
    virtio_net_set_mrg_rx_bufs(n, 0, 0);
1724
    n->promisc = 1; /* for compatibility */
A
aliguori 已提交
1725

1726
    n->mac_table.macs = g_malloc0(MAC_TABLE_ENTRIES * ETH_ALEN);
1727

1728
    n->vlans = g_malloc0(MAX_VLAN >> 3);
1729

1730 1731 1732
    nc = qemu_get_queue(n->nic);
    nc->rxfilter_notify_enabled = 1;

1733 1734
    n->qdev = dev;
    register_savevm(dev, "virtio-net", -1, VIRTIO_NET_VM_VERSION,
A
aliguori 已提交
1735
                    virtio_net_save, virtio_net_load, n);
1736 1737
}

1738
static void virtio_net_device_unrealize(DeviceState *dev, Error **errp)
1739
{
1740 1741
    VirtIODevice *vdev = VIRTIO_DEVICE(dev);
    VirtIONet *n = VIRTIO_NET(dev);
1742
    int i, max_queues;
1743 1744 1745 1746

    /* This will stop vhost backend if appropriate. */
    virtio_net_set_status(vdev, 0);

1747
    unregister_savevm(dev, "virtio-net", n);
1748

1749 1750 1751 1752
    g_free(n->netclient_name);
    n->netclient_name = NULL;
    g_free(n->netclient_type);
    n->netclient_type = NULL;
1753

1754 1755 1756
    g_free(n->mac_table.macs);
    g_free(n->vlans);

1757 1758 1759
    max_queues = n->multiqueue ? n->max_queues : 1;
    for (i = 0; i < max_queues; i++) {
        virtio_net_del_queue(n, i);
1760 1761
    }

J
Jason Wang 已提交
1762 1763
    timer_del(n->announce_timer);
    timer_free(n->announce_timer);
1764 1765
    g_free(n->vqs);
    qemu_del_nic(n->nic);
1766
    virtio_cleanup(vdev);
1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777
}

static void virtio_net_instance_init(Object *obj)
{
    VirtIONet *n = VIRTIO_NET(obj);

    /*
     * The default config_size is sizeof(struct virtio_net_config).
     * Can be overriden with virtio_net_set_config_size.
     */
    n->config_size = sizeof(struct virtio_net_config);
1778 1779 1780
    device_add_bootindex_property(obj, &n->nic_conf.bootindex,
                                  "bootindex", "/ethernet-phy@0",
                                  DEVICE(n), NULL);
1781 1782 1783
}

static Property virtio_net_properties[] = {
1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822
    DEFINE_PROP_BIT("csum", VirtIONet, host_features, VIRTIO_NET_F_CSUM, true),
    DEFINE_PROP_BIT("guest_csum", VirtIONet, host_features,
                    VIRTIO_NET_F_GUEST_CSUM, true),
    DEFINE_PROP_BIT("gso", VirtIONet, host_features, VIRTIO_NET_F_GSO, true),
    DEFINE_PROP_BIT("guest_tso4", VirtIONet, host_features,
                    VIRTIO_NET_F_GUEST_TSO4, true),
    DEFINE_PROP_BIT("guest_tso6", VirtIONet, host_features,
                    VIRTIO_NET_F_GUEST_TSO6, true),
    DEFINE_PROP_BIT("guest_ecn", VirtIONet, host_features,
                    VIRTIO_NET_F_GUEST_ECN, true),
    DEFINE_PROP_BIT("guest_ufo", VirtIONet, host_features,
                    VIRTIO_NET_F_GUEST_UFO, true),
    DEFINE_PROP_BIT("guest_announce", VirtIONet, host_features,
                    VIRTIO_NET_F_GUEST_ANNOUNCE, true),
    DEFINE_PROP_BIT("host_tso4", VirtIONet, host_features,
                    VIRTIO_NET_F_HOST_TSO4, true),
    DEFINE_PROP_BIT("host_tso6", VirtIONet, host_features,
                    VIRTIO_NET_F_HOST_TSO6, true),
    DEFINE_PROP_BIT("host_ecn", VirtIONet, host_features,
                    VIRTIO_NET_F_HOST_ECN, true),
    DEFINE_PROP_BIT("host_ufo", VirtIONet, host_features,
                    VIRTIO_NET_F_HOST_UFO, true),
    DEFINE_PROP_BIT("mrg_rxbuf", VirtIONet, host_features,
                    VIRTIO_NET_F_MRG_RXBUF, true),
    DEFINE_PROP_BIT("status", VirtIONet, host_features,
                    VIRTIO_NET_F_STATUS, true),
    DEFINE_PROP_BIT("ctrl_vq", VirtIONet, host_features,
                    VIRTIO_NET_F_CTRL_VQ, true),
    DEFINE_PROP_BIT("ctrl_rx", VirtIONet, host_features,
                    VIRTIO_NET_F_CTRL_RX, true),
    DEFINE_PROP_BIT("ctrl_vlan", VirtIONet, host_features,
                    VIRTIO_NET_F_CTRL_VLAN, true),
    DEFINE_PROP_BIT("ctrl_rx_extra", VirtIONet, host_features,
                    VIRTIO_NET_F_CTRL_RX_EXTRA, true),
    DEFINE_PROP_BIT("ctrl_mac_addr", VirtIONet, host_features,
                    VIRTIO_NET_F_CTRL_MAC_ADDR, true),
    DEFINE_PROP_BIT("ctrl_guest_offloads", VirtIONet, host_features,
                    VIRTIO_NET_F_CTRL_GUEST_OFFLOADS, true),
    DEFINE_PROP_BIT("mq", VirtIONet, host_features, VIRTIO_NET_F_MQ, false),
1823 1824
    DEFINE_NIC_PROPERTIES(VirtIONet, nic_conf),
    DEFINE_PROP_UINT32("x-txtimer", VirtIONet, net_conf.txtimer,
1825
                       TX_TIMER_INTERVAL),
1826 1827 1828 1829 1830 1831 1832 1833 1834
    DEFINE_PROP_INT32("x-txburst", VirtIONet, net_conf.txburst, TX_BURST),
    DEFINE_PROP_STRING("tx", VirtIONet, net_conf.tx),
    DEFINE_PROP_END_OF_LIST(),
};

static void virtio_net_class_init(ObjectClass *klass, void *data)
{
    DeviceClass *dc = DEVICE_CLASS(klass);
    VirtioDeviceClass *vdc = VIRTIO_DEVICE_CLASS(klass);
1835

1836
    dc->props = virtio_net_properties;
1837
    set_bit(DEVICE_CATEGORY_NETWORK, dc->categories);
1838
    vdc->realize = virtio_net_device_realize;
1839
    vdc->unrealize = virtio_net_device_unrealize;
1840 1841 1842 1843 1844 1845 1846 1847 1848
    vdc->get_config = virtio_net_get_config;
    vdc->set_config = virtio_net_set_config;
    vdc->get_features = virtio_net_get_features;
    vdc->set_features = virtio_net_set_features;
    vdc->bad_features = virtio_net_bad_features;
    vdc->reset = virtio_net_reset;
    vdc->set_status = virtio_net_set_status;
    vdc->guest_notifier_mask = virtio_net_guest_notifier_mask;
    vdc->guest_notifier_pending = virtio_net_guest_notifier_pending;
1849 1850
    vdc->load = virtio_net_load_device;
    vdc->save = virtio_net_save_device;
1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866
}

static const TypeInfo virtio_net_info = {
    .name = TYPE_VIRTIO_NET,
    .parent = TYPE_VIRTIO_DEVICE,
    .instance_size = sizeof(VirtIONet),
    .instance_init = virtio_net_instance_init,
    .class_init = virtio_net_class_init,
};

static void virtio_register_types(void)
{
    type_register_static(&virtio_net_info);
}

type_init(virtio_register_types)