virtio.c 63.5 KB
Newer Older
A
aliguori 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13
/*
 * Virtio Support
 *
 * Copyright IBM, Corp. 2007
 *
 * Authors:
 *  Anthony Liguori   <aliguori@us.ibm.com>
 *
 * This work is licensed under the terms of the GNU GPL, version 2.  See
 * the COPYING file in the top-level directory.
 *
 */

P
Peter Maydell 已提交
14
#include "qemu/osdep.h"
15
#include "qapi/error.h"
16 17
#include "qemu-common.h"
#include "cpu.h"
18
#include "trace.h"
19
#include "exec/address-spaces.h"
20
#include "qemu/error-report.h"
P
Paolo Bonzini 已提交
21
#include "hw/virtio/virtio.h"
22
#include "qemu/atomic.h"
P
Paolo Bonzini 已提交
23
#include "hw/virtio/virtio-bus.h"
24
#include "migration/migration.h"
25
#include "hw/virtio/virtio-access.h"
J
Jason Wang 已提交
26
#include "sysemu/dma.h"
A
aliguori 已提交
27

28 29 30 31 32
/*
 * The alignment to use between consumer and producer parts of vring.
 * x86 pagesize again. This is the default, used by transports like PCI
 * which don't provide a means for the guest to tell the host the alignment.
 */
33 34
#define VIRTIO_PCI_VRING_ALIGN         4096

A
aliguori 已提交
35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65
typedef struct VRingDesc
{
    uint64_t addr;
    uint32_t len;
    uint16_t flags;
    uint16_t next;
} VRingDesc;

typedef struct VRingAvail
{
    uint16_t flags;
    uint16_t idx;
    uint16_t ring[0];
} VRingAvail;

typedef struct VRingUsedElem
{
    uint32_t id;
    uint32_t len;
} VRingUsedElem;

typedef struct VRingUsed
{
    uint16_t flags;
    uint16_t idx;
    VRingUsedElem ring[0];
} VRingUsed;

typedef struct VRing
{
    unsigned int num;
C
Cornelia Huck 已提交
66
    unsigned int num_default;
67
    unsigned int align;
A
Avi Kivity 已提交
68 69 70
    hwaddr desc;
    hwaddr avail;
    hwaddr used;
A
aliguori 已提交
71 72 73 74 75
} VRing;

struct VirtQueue
{
    VRing vring;
76 77

    /* Next head to pop */
A
aliguori 已提交
78
    uint16_t last_avail_idx;
79

80 81 82
    /* Last avail_idx read from VQ. */
    uint16_t shadow_avail_idx;

83 84
    uint16_t used_idx;

M
Michael S. Tsirkin 已提交
85 86 87 88 89 90
    /* Last used index value we have signalled on */
    uint16_t signalled_used;

    /* Last used index value we have signalled on */
    bool signalled_used_valid;

91 92
    /* Nested host->guest notification disabled counter */
    unsigned int notification_disabled;
M
Michael S. Tsirkin 已提交
93

94 95
    uint16_t queue_index;

96
    unsigned int inuse;
M
Michael S. Tsirkin 已提交
97

98
    uint16_t vector;
99 100
    VirtIOHandleOutput handle_output;
    VirtIOHandleOutput handle_aio_output;
101 102 103
    VirtIODevice *vdev;
    EventNotifier guest_notifier;
    EventNotifier host_notifier;
104
    QLIST_ENTRY(VirtQueue) node;
A
aliguori 已提交
105 106 107
};

/* virt queue functions */
108
void virtio_queue_update_rings(VirtIODevice *vdev, int n)
A
aliguori 已提交
109
{
110
    VRing *vring = &vdev->vq[n].vring;
P
Paul Brook 已提交
111

112 113 114 115 116 117 118 119
    if (!vring->desc) {
        /* not yet setup -> nothing to do */
        return;
    }
    vring->avail = vring->desc + vring->num * sizeof(VRingDesc);
    vring->used = vring_align(vring->avail +
                              offsetof(VRingAvail, ring[vring->num]),
                              vring->align);
A
aliguori 已提交
120 121
}

122 123
static void vring_desc_read(VirtIODevice *vdev, VRingDesc *desc,
                            hwaddr desc_pa, int i)
A
aliguori 已提交
124
{
J
Jason Wang 已提交
125
    address_space_read(vdev->dma_as, desc_pa + i * sizeof(VRingDesc),
126 127 128 129 130
                       MEMTXATTRS_UNSPECIFIED, (void *)desc, sizeof(VRingDesc));
    virtio_tswap64s(vdev, &desc->addr);
    virtio_tswap32s(vdev, &desc->len);
    virtio_tswap16s(vdev, &desc->flags);
    virtio_tswap16s(vdev, &desc->next);
A
aliguori 已提交
131 132 133 134
}

static inline uint16_t vring_avail_flags(VirtQueue *vq)
{
A
Avi Kivity 已提交
135
    hwaddr pa;
A
aliguori 已提交
136
    pa = vq->vring.avail + offsetof(VRingAvail, flags);
137
    return virtio_lduw_phys(vq->vdev, pa);
A
aliguori 已提交
138 139 140 141
}

static inline uint16_t vring_avail_idx(VirtQueue *vq)
{
A
Avi Kivity 已提交
142
    hwaddr pa;
A
aliguori 已提交
143
    pa = vq->vring.avail + offsetof(VRingAvail, idx);
144 145
    vq->shadow_avail_idx = virtio_lduw_phys(vq->vdev, pa);
    return vq->shadow_avail_idx;
A
aliguori 已提交
146 147 148 149
}

static inline uint16_t vring_avail_ring(VirtQueue *vq, int i)
{
A
Avi Kivity 已提交
150
    hwaddr pa;
A
aliguori 已提交
151
    pa = vq->vring.avail + offsetof(VRingAvail, ring[i]);
152
    return virtio_lduw_phys(vq->vdev, pa);
A
aliguori 已提交
153 154
}

155
static inline uint16_t vring_get_used_event(VirtQueue *vq)
M
Michael S. Tsirkin 已提交
156 157 158 159
{
    return vring_avail_ring(vq, vq->vring.num);
}

160 161
static inline void vring_used_write(VirtQueue *vq, VRingUsedElem *uelem,
                                    int i)
A
aliguori 已提交
162
{
A
Avi Kivity 已提交
163
    hwaddr pa;
164 165 166
    virtio_tswap32s(vq->vdev, &uelem->id);
    virtio_tswap32s(vq->vdev, &uelem->len);
    pa = vq->vring.used + offsetof(VRingUsed, ring[i]);
J
Jason Wang 已提交
167
    address_space_write(vq->vdev->dma_as, pa, MEMTXATTRS_UNSPECIFIED,
168
                       (void *)uelem, sizeof(VRingUsedElem));
A
aliguori 已提交
169 170 171 172
}

static uint16_t vring_used_idx(VirtQueue *vq)
{
A
Avi Kivity 已提交
173
    hwaddr pa;
A
aliguori 已提交
174
    pa = vq->vring.used + offsetof(VRingUsed, idx);
175
    return virtio_lduw_phys(vq->vdev, pa);
A
aliguori 已提交
176 177
}

M
Michael S. Tsirkin 已提交
178
static inline void vring_used_idx_set(VirtQueue *vq, uint16_t val)
A
aliguori 已提交
179
{
A
Avi Kivity 已提交
180
    hwaddr pa;
A
aliguori 已提交
181
    pa = vq->vring.used + offsetof(VRingUsed, idx);
182
    virtio_stw_phys(vq->vdev, pa, val);
183
    vq->used_idx = val;
A
aliguori 已提交
184 185 186 187
}

static inline void vring_used_flags_set_bit(VirtQueue *vq, int mask)
{
188
    VirtIODevice *vdev = vq->vdev;
A
Avi Kivity 已提交
189
    hwaddr pa;
A
aliguori 已提交
190
    pa = vq->vring.used + offsetof(VRingUsed, flags);
191
    virtio_stw_phys(vdev, pa, virtio_lduw_phys(vdev, pa) | mask);
A
aliguori 已提交
192 193 194 195
}

static inline void vring_used_flags_unset_bit(VirtQueue *vq, int mask)
{
196
    VirtIODevice *vdev = vq->vdev;
A
Avi Kivity 已提交
197
    hwaddr pa;
A
aliguori 已提交
198
    pa = vq->vring.used + offsetof(VRingUsed, flags);
199
    virtio_stw_phys(vdev, pa, virtio_lduw_phys(vdev, pa) & ~mask);
A
aliguori 已提交
200 201
}

202
static inline void vring_set_avail_event(VirtQueue *vq, uint16_t val)
M
Michael S. Tsirkin 已提交
203
{
A
Avi Kivity 已提交
204
    hwaddr pa;
205
    if (vq->notification_disabled) {
M
Michael S. Tsirkin 已提交
206 207 208
        return;
    }
    pa = vq->vring.used + offsetof(VRingUsed, ring[vq->vring.num]);
209
    virtio_stw_phys(vq->vdev, pa, val);
M
Michael S. Tsirkin 已提交
210 211
}

A
aliguori 已提交
212 213
void virtio_queue_set_notification(VirtQueue *vq, int enable)
{
214 215 216 217 218 219 220
    if (enable) {
        assert(vq->notification_disabled > 0);
        vq->notification_disabled--;
    } else {
        vq->notification_disabled++;
    }

221
    if (virtio_vdev_has_feature(vq->vdev, VIRTIO_RING_F_EVENT_IDX)) {
222
        vring_set_avail_event(vq, vring_avail_idx(vq));
M
Michael S. Tsirkin 已提交
223
    } else if (enable) {
A
aliguori 已提交
224
        vring_used_flags_unset_bit(vq, VRING_USED_F_NO_NOTIFY);
M
Michael S. Tsirkin 已提交
225
    } else {
A
aliguori 已提交
226
        vring_used_flags_set_bit(vq, VRING_USED_F_NO_NOTIFY);
M
Michael S. Tsirkin 已提交
227
    }
228 229 230 231
    if (enable) {
        /* Expose avail event/used flags before caller checks the avail idx. */
        smp_mb();
    }
A
aliguori 已提交
232 233 234 235 236 237 238
}

int virtio_queue_ready(VirtQueue *vq)
{
    return vq->vring.avail != 0;
}

239 240
/* Fetch avail_idx from VQ memory only when we really need to know if
 * guest has added some buffers. */
A
aliguori 已提交
241 242
int virtio_queue_empty(VirtQueue *vq)
{
243 244 245 246
    if (vq->shadow_avail_idx != vq->last_avail_idx) {
        return 0;
    }

A
aliguori 已提交
247 248 249
    return vring_avail_idx(vq) == vq->last_avail_idx;
}

250 251
static void virtqueue_unmap_sg(VirtQueue *vq, const VirtQueueElement *elem,
                               unsigned int len)
A
aliguori 已提交
252
{
J
Jason Wang 已提交
253
    AddressSpace *dma_as = vq->vdev->dma_as;
A
aliguori 已提交
254 255 256 257 258 259 260
    unsigned int offset;
    int i;

    offset = 0;
    for (i = 0; i < elem->in_num; i++) {
        size_t size = MIN(len - offset, elem->in_sg[i].iov_len);

J
Jason Wang 已提交
261 262 263
        dma_memory_unmap(dma_as, elem->in_sg[i].iov_base,
                         elem->in_sg[i].iov_len,
                         DMA_DIRECTION_FROM_DEVICE, size);
A
aliguori 已提交
264

265
        offset += size;
A
aliguori 已提交
266 267
    }

268
    for (i = 0; i < elem->out_num; i++)
J
Jason Wang 已提交
269 270 271 272
        dma_memory_unmap(dma_as, elem->out_sg[i].iov_base,
                         elem->out_sg[i].iov_len,
                         DMA_DIRECTION_TO_DEVICE,
                         elem->out_sg[i].iov_len);
273 274
}

275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290
/* virtqueue_detach_element:
 * @vq: The #VirtQueue
 * @elem: The #VirtQueueElement
 * @len: number of bytes written
 *
 * Detach the element from the virtqueue.  This function is suitable for device
 * reset or other situations where a #VirtQueueElement is simply freed and will
 * not be pushed or discarded.
 */
void virtqueue_detach_element(VirtQueue *vq, const VirtQueueElement *elem,
                              unsigned int len)
{
    vq->inuse--;
    virtqueue_unmap_sg(vq, elem, len);
}

291
/* virtqueue_unpop:
292 293 294 295 296 297 298
 * @vq: The #VirtQueue
 * @elem: The #VirtQueueElement
 * @len: number of bytes written
 *
 * Pretend the most recent element wasn't popped from the virtqueue.  The next
 * call to virtqueue_pop() will refetch the element.
 */
299 300
void virtqueue_unpop(VirtQueue *vq, const VirtQueueElement *elem,
                     unsigned int len)
J
Jason Wang 已提交
301 302
{
    vq->last_avail_idx--;
303
    virtqueue_detach_element(vq, elem, len);
J
Jason Wang 已提交
304 305
}

S
Stefan Hajnoczi 已提交
306 307 308 309 310 311 312
/* virtqueue_rewind:
 * @vq: The #VirtQueue
 * @num: Number of elements to push back
 *
 * Pretend that elements weren't popped from the virtqueue.  The next
 * virtqueue_pop() will refetch the oldest element.
 *
313
 * Use virtqueue_unpop() instead if you have a VirtQueueElement.
S
Stefan Hajnoczi 已提交
314 315 316 317 318 319 320 321 322 323 324 325 326 327
 *
 * Returns: true on success, false if @num is greater than the number of in use
 * elements.
 */
bool virtqueue_rewind(VirtQueue *vq, unsigned int num)
{
    if (num > vq->inuse) {
        return false;
    }
    vq->last_avail_idx -= num;
    vq->inuse -= num;
    return true;
}

328 329 330
void virtqueue_fill(VirtQueue *vq, const VirtQueueElement *elem,
                    unsigned int len, unsigned int idx)
{
331 332
    VRingUsedElem uelem;

333 334 335
    trace_virtqueue_fill(vq, elem, len, idx);

    virtqueue_unmap_sg(vq, elem, len);
336

337 338 339 340
    if (unlikely(vq->vdev->broken)) {
        return;
    }

341
    idx = (idx + vq->used_idx) % vq->vring.num;
A
aliguori 已提交
342

343 344 345
    uelem.id = elem->index;
    uelem.len = len;
    vring_used_write(vq, &uelem, idx);
A
aliguori 已提交
346 347 348 349
}

void virtqueue_flush(VirtQueue *vq, unsigned int count)
{
M
Michael S. Tsirkin 已提交
350
    uint16_t old, new;
351 352 353 354 355 356

    if (unlikely(vq->vdev->broken)) {
        vq->inuse -= count;
        return;
    }

A
aliguori 已提交
357
    /* Make sure buffer is written before we update index. */
358
    smp_wmb();
359
    trace_virtqueue_flush(vq, count);
360
    old = vq->used_idx;
M
Michael S. Tsirkin 已提交
361 362
    new = old + count;
    vring_used_idx_set(vq, new);
A
aliguori 已提交
363
    vq->inuse -= count;
M
Michael S. Tsirkin 已提交
364 365
    if (unlikely((int16_t)(new - vq->signalled_used) < (uint16_t)(new - old)))
        vq->signalled_used_valid = false;
A
aliguori 已提交
366 367 368 369 370 371 372 373 374 375 376 377 378 379
}

void virtqueue_push(VirtQueue *vq, const VirtQueueElement *elem,
                    unsigned int len)
{
    virtqueue_fill(vq, elem, len, 0);
    virtqueue_flush(vq, 1);
}

static int virtqueue_num_heads(VirtQueue *vq, unsigned int idx)
{
    uint16_t num_heads = vring_avail_idx(vq) - idx;

    /* Check it isn't doing very strange things with descriptor numbers. */
A
aliguori 已提交
380
    if (num_heads > vq->vring.num) {
381
        virtio_error(vq->vdev, "Guest moved used index from %u to %u",
382
                     idx, vq->shadow_avail_idx);
383
        return -EINVAL;
A
aliguori 已提交
384
    }
385 386 387 388 389
    /* On success, callers read a descriptor at vq->last_avail_idx.
     * Make sure descriptor read does not bypass avail index read. */
    if (num_heads) {
        smp_rmb();
    }
A
aliguori 已提交
390 391 392 393

    return num_heads;
}

394 395
static bool virtqueue_get_head(VirtQueue *vq, unsigned int idx,
                               unsigned int *head)
A
aliguori 已提交
396 397 398
{
    /* Grab the next descriptor number they're advertising, and increment
     * the index we've seen. */
399
    *head = vring_avail_ring(vq, idx % vq->vring.num);
A
aliguori 已提交
400 401

    /* If their number is silly, that's a fatal mistake. */
402 403 404
    if (*head >= vq->vring.num) {
        virtio_error(vq->vdev, "Guest says index %u is available", *head);
        return false;
A
aliguori 已提交
405
    }
A
aliguori 已提交
406

407
    return true;
A
aliguori 已提交
408 409
}

410 411 412 413 414
enum {
    VIRTQUEUE_READ_DESC_ERROR = -1,
    VIRTQUEUE_READ_DESC_DONE = 0,   /* end of chain */
    VIRTQUEUE_READ_DESC_MORE = 1,   /* more buffers in chain */
};
A
aliguori 已提交
415

416 417 418 419
static int virtqueue_read_next_desc(VirtIODevice *vdev, VRingDesc *desc,
                                    hwaddr desc_pa, unsigned int max,
                                    unsigned int *next)
{
A
aliguori 已提交
420
    /* If this descriptor says it doesn't chain, we're done. */
421
    if (!(desc->flags & VRING_DESC_F_NEXT)) {
422
        return VIRTQUEUE_READ_DESC_DONE;
423
    }
A
aliguori 已提交
424 425

    /* Check they're not leading us off end of descriptors. */
426
    *next = desc->next;
A
aliguori 已提交
427
    /* Make sure compiler knows to grab that: we don't want it changing! */
428
    smp_wmb();
A
aliguori 已提交
429

430 431 432
    if (*next >= max) {
        virtio_error(vdev, "Desc next is %u", *next);
        return VIRTQUEUE_READ_DESC_ERROR;
A
aliguori 已提交
433
    }
A
aliguori 已提交
434

435 436
    vring_desc_read(vdev, desc, desc_pa, *next);
    return VIRTQUEUE_READ_DESC_MORE;
A
aliguori 已提交
437 438
}

439
void virtqueue_get_avail_bytes(VirtQueue *vq, unsigned int *in_bytes,
440 441
                               unsigned int *out_bytes,
                               unsigned max_in_bytes, unsigned max_out_bytes)
A
aliguori 已提交
442
{
443
    unsigned int idx;
444
    unsigned int total_bufs, in_total, out_total;
445
    int rc;
A
aliguori 已提交
446 447 448

    idx = vq->last_avail_idx;

449
    total_bufs = in_total = out_total = 0;
450
    while ((rc = virtqueue_num_heads(vq, idx)) > 0) {
451
        VirtIODevice *vdev = vq->vdev;
452
        unsigned int max, num_bufs, indirect = 0;
453
        VRingDesc desc;
A
Avi Kivity 已提交
454
        hwaddr desc_pa;
455
        unsigned int i;
A
aliguori 已提交
456

457 458
        max = vq->vring.num;
        num_bufs = total_bufs;
459 460 461 462 463

        if (!virtqueue_get_head(vq, idx++, &i)) {
            goto err;
        }

464
        desc_pa = vq->vring.desc;
465
        vring_desc_read(vdev, &desc, desc_pa, i);
466

467 468
        if (desc.flags & VRING_DESC_F_INDIRECT) {
            if (desc.len % sizeof(VRingDesc)) {
469 470
                virtio_error(vdev, "Invalid size for indirect buffer table");
                goto err;
471 472 473 474
            }

            /* If we've got too many, that implies a descriptor loop. */
            if (num_bufs >= max) {
475 476
                virtio_error(vdev, "Looped descriptor");
                goto err;
477 478 479 480
            }

            /* loop over the indirect descriptor table */
            indirect = 1;
481 482
            max = desc.len / sizeof(VRingDesc);
            desc_pa = desc.addr;
483
            num_bufs = i = 0;
484
            vring_desc_read(vdev, &desc, desc_pa, i);
485 486
        }

A
aliguori 已提交
487 488
        do {
            /* If we've got too many, that implies a descriptor loop. */
489
            if (++num_bufs > max) {
490 491
                virtio_error(vdev, "Looped descriptor");
                goto err;
A
aliguori 已提交
492
            }
A
aliguori 已提交
493

494 495
            if (desc.flags & VRING_DESC_F_WRITE) {
                in_total += desc.len;
A
aliguori 已提交
496
            } else {
497
                out_total += desc.len;
A
aliguori 已提交
498
            }
499 500 501
            if (in_total >= max_in_bytes && out_total >= max_out_bytes) {
                goto done;
            }
502 503 504 505 506 507 508

            rc = virtqueue_read_next_desc(vdev, &desc, desc_pa, max, &i);
        } while (rc == VIRTQUEUE_READ_DESC_MORE);

        if (rc == VIRTQUEUE_READ_DESC_ERROR) {
            goto err;
        }
509 510 511 512 513

        if (!indirect)
            total_bufs = num_bufs;
        else
            total_bufs++;
A
aliguori 已提交
514
    }
515 516 517 518 519

    if (rc < 0) {
        goto err;
    }

520
done:
521 522 523 524 525 526
    if (in_bytes) {
        *in_bytes = in_total;
    }
    if (out_bytes) {
        *out_bytes = out_total;
    }
527 528 529 530 531
    return;

err:
    in_total = out_total = 0;
    goto done;
532
}
A
aliguori 已提交
533

534 535 536 537 538
int virtqueue_avail_bytes(VirtQueue *vq, unsigned int in_bytes,
                          unsigned int out_bytes)
{
    unsigned int in_total, out_total;

539 540
    virtqueue_get_avail_bytes(vq, &in_total, &out_total, in_bytes, out_bytes);
    return in_bytes <= in_total && out_bytes <= out_total;
A
aliguori 已提交
541 542
}

543 544
static bool virtqueue_map_desc(VirtIODevice *vdev, unsigned int *p_num_sg,
                               hwaddr *addr, struct iovec *iov,
545 546 547
                               unsigned int max_num_sg, bool is_write,
                               hwaddr pa, size_t sz)
{
548
    bool ok = false;
549 550 551
    unsigned num_sg = *p_num_sg;
    assert(num_sg <= max_num_sg);

552
    if (!sz) {
553 554
        virtio_error(vdev, "virtio: zero sized buffers are not allowed");
        goto out;
555 556
    }

557 558 559 560
    while (sz) {
        hwaddr len = sz;

        if (num_sg == max_num_sg) {
561 562 563
            virtio_error(vdev, "virtio: too many write descriptors in "
                               "indirect table");
            goto out;
564 565
        }

J
Jason Wang 已提交
566 567 568 569
        iov[num_sg].iov_base = dma_memory_map(vdev->dma_as, pa, &len,
                                              is_write ?
                                              DMA_DIRECTION_FROM_DEVICE :
                                              DMA_DIRECTION_TO_DEVICE);
570
        if (!iov[num_sg].iov_base) {
571 572
            virtio_error(vdev, "virtio: bogus descriptor or out of resources");
            goto out;
573 574
        }

575 576 577 578 579 580 581
        iov[num_sg].iov_len = len;
        addr[num_sg] = pa;

        sz -= len;
        pa += len;
        num_sg++;
    }
582 583 584
    ok = true;

out:
585
    *p_num_sg = num_sg;
586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603
    return ok;
}

/* Only used by error code paths before we have a VirtQueueElement (therefore
 * virtqueue_unmap_sg() can't be used).  Assumes buffers weren't written to
 * yet.
 */
static void virtqueue_undo_map_desc(unsigned int out_num, unsigned int in_num,
                                    struct iovec *iov)
{
    unsigned int i;

    for (i = 0; i < out_num + in_num; i++) {
        int is_write = i >= out_num;

        cpu_physical_memory_unmap(iov->iov_base, iov->iov_len, is_write, 0);
        iov++;
    }
604 605
}

J
Jason Wang 已提交
606 607 608
static void virtqueue_map_iovec(VirtIODevice *vdev, struct iovec *sg,
                                hwaddr *addr, unsigned int *num_sg,
                                unsigned int max_size, int is_write)
K
Kevin Wolf 已提交
609 610
{
    unsigned int i;
A
Avi Kivity 已提交
611
    hwaddr len;
K
Kevin Wolf 已提交
612

M
Michael S. Tsirkin 已提交
613 614 615 616 617 618 619 620 621 622 623 624 625
    /* Note: this function MUST validate input, some callers
     * are passing in num_sg values received over the network.
     */
    /* TODO: teach all callers that this can fail, and return failure instead
     * of asserting here.
     * When we do, we might be able to re-enable NDEBUG below.
     */
#ifdef NDEBUG
#error building with NDEBUG is not supported
#endif
    assert(*num_sg <= max_size);

    for (i = 0; i < *num_sg; i++) {
K
Kevin Wolf 已提交
626
        len = sg[i].iov_len;
J
Jason Wang 已提交
627 628 629 630
        sg[i].iov_base = dma_memory_map(vdev->dma_as,
                                        addr[i], &len, is_write ?
                                        DMA_DIRECTION_FROM_DEVICE :
                                        DMA_DIRECTION_TO_DEVICE);
M
Michael S. Tsirkin 已提交
631
        if (!sg[i].iov_base) {
M
Michael Tokarev 已提交
632
            error_report("virtio: error trying to map MMIO memory");
K
Kevin Wolf 已提交
633 634
            exit(1);
        }
635 636
        if (len != sg[i].iov_len) {
            error_report("virtio: unexpected memory split");
M
Michael S. Tsirkin 已提交
637 638
            exit(1);
        }
K
Kevin Wolf 已提交
639 640 641
    }
}

J
Jason Wang 已提交
642
void virtqueue_map(VirtIODevice *vdev, VirtQueueElement *elem)
M
Michael S. Tsirkin 已提交
643
{
J
Jason Wang 已提交
644 645 646 647 648 649 650
    virtqueue_map_iovec(vdev, elem->in_sg, elem->in_addr, &elem->in_num,
                        MIN(ARRAY_SIZE(elem->in_sg), ARRAY_SIZE(elem->in_addr)),
                        1);
    virtqueue_map_iovec(vdev, elem->out_sg, elem->out_addr, &elem->out_num,
                        MIN(ARRAY_SIZE(elem->out_sg),
                        ARRAY_SIZE(elem->out_addr)),
                        0);
651 652
}

653
static void *virtqueue_alloc_element(size_t sz, unsigned out_num, unsigned in_num)
654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671
{
    VirtQueueElement *elem;
    size_t in_addr_ofs = QEMU_ALIGN_UP(sz, __alignof__(elem->in_addr[0]));
    size_t out_addr_ofs = in_addr_ofs + in_num * sizeof(elem->in_addr[0]);
    size_t out_addr_end = out_addr_ofs + out_num * sizeof(elem->out_addr[0]);
    size_t in_sg_ofs = QEMU_ALIGN_UP(out_addr_end, __alignof__(elem->in_sg[0]));
    size_t out_sg_ofs = in_sg_ofs + in_num * sizeof(elem->in_sg[0]);
    size_t out_sg_end = out_sg_ofs + out_num * sizeof(elem->out_sg[0]);

    assert(sz >= sizeof(VirtQueueElement));
    elem = g_malloc(out_sg_end);
    elem->out_num = out_num;
    elem->in_num = in_num;
    elem->in_addr = (void *)elem + in_addr_ofs;
    elem->out_addr = (void *)elem + out_addr_ofs;
    elem->in_sg = (void *)elem + in_sg_ofs;
    elem->out_sg = (void *)elem + out_sg_ofs;
    return elem;
M
Michael S. Tsirkin 已提交
672 673
}

674
void *virtqueue_pop(VirtQueue *vq, size_t sz)
A
aliguori 已提交
675
{
676
    unsigned int i, head, max;
A
Avi Kivity 已提交
677
    hwaddr desc_pa = vq->vring.desc;
678
    VirtIODevice *vdev = vq->vdev;
679
    VirtQueueElement *elem;
680 681 682
    unsigned out_num, in_num;
    hwaddr addr[VIRTQUEUE_MAX_SIZE];
    struct iovec iov[VIRTQUEUE_MAX_SIZE];
683
    VRingDesc desc;
684
    int rc;
A
aliguori 已提交
685

686 687 688
    if (unlikely(vdev->broken)) {
        return NULL;
    }
689
    if (virtio_queue_empty(vq)) {
690 691
        return NULL;
    }
692 693 694
    /* Needed after virtio_queue_empty(), see comment in
     * virtqueue_num_heads(). */
    smp_rmb();
A
aliguori 已提交
695 696

    /* When we start there are none of either input nor output. */
697
    out_num = in_num = 0;
A
aliguori 已提交
698

699 700
    max = vq->vring.num;

701
    if (vq->inuse >= vq->vring.num) {
702 703
        virtio_error(vdev, "Virtqueue size exceeded");
        return NULL;
704 705
    }

706 707 708 709
    if (!virtqueue_get_head(vq, vq->last_avail_idx++, &head)) {
        return NULL;
    }

710
    if (virtio_vdev_has_feature(vdev, VIRTIO_RING_F_EVENT_IDX)) {
711
        vring_set_avail_event(vq, vq->last_avail_idx);
M
Michael S. Tsirkin 已提交
712
    }
713

714
    i = head;
715 716 717
    vring_desc_read(vdev, &desc, desc_pa, i);
    if (desc.flags & VRING_DESC_F_INDIRECT) {
        if (desc.len % sizeof(VRingDesc)) {
718 719
            virtio_error(vdev, "Invalid size for indirect buffer table");
            return NULL;
720 721 722
        }

        /* loop over the indirect descriptor table */
723 724
        max = desc.len / sizeof(VRingDesc);
        desc_pa = desc.addr;
725
        i = 0;
726
        vring_desc_read(vdev, &desc, desc_pa, i);
727 728
    }

K
Kevin Wolf 已提交
729
    /* Collect all the descriptors */
A
aliguori 已提交
730
    do {
731 732
        bool map_ok;

733
        if (desc.flags & VRING_DESC_F_WRITE) {
734 735 736 737
            map_ok = virtqueue_map_desc(vdev, &in_num, addr + out_num,
                                        iov + out_num,
                                        VIRTQUEUE_MAX_SIZE - out_num, true,
                                        desc.addr, desc.len);
K
Kevin Wolf 已提交
738
        } else {
739
            if (in_num) {
740 741
                virtio_error(vdev, "Incorrect order for descriptors");
                goto err_undo_map;
742
            }
743 744 745 746 747 748
            map_ok = virtqueue_map_desc(vdev, &out_num, addr, iov,
                                        VIRTQUEUE_MAX_SIZE, false,
                                        desc.addr, desc.len);
        }
        if (!map_ok) {
            goto err_undo_map;
K
Kevin Wolf 已提交
749
        }
A
aliguori 已提交
750 751

        /* If we've got too many, that implies a descriptor loop. */
752
        if ((in_num + out_num) > max) {
753 754
            virtio_error(vdev, "Looped descriptor");
            goto err_undo_map;
A
aliguori 已提交
755
        }
756 757 758 759 760 761 762

        rc = virtqueue_read_next_desc(vdev, &desc, desc_pa, max, &i);
    } while (rc == VIRTQUEUE_READ_DESC_MORE);

    if (rc == VIRTQUEUE_READ_DESC_ERROR) {
        goto err_undo_map;
    }
A
aliguori 已提交
763

764 765
    /* Now copy what we have collected and mapped */
    elem = virtqueue_alloc_element(sz, out_num, in_num);
A
aliguori 已提交
766
    elem->index = head;
767 768 769 770 771 772 773 774
    for (i = 0; i < out_num; i++) {
        elem->out_addr[i] = addr[i];
        elem->out_sg[i] = iov[i];
    }
    for (i = 0; i < in_num; i++) {
        elem->in_addr[i] = addr[out_num + i];
        elem->in_sg[i] = iov[out_num + i];
    }
A
aliguori 已提交
775 776 777

    vq->inuse++;

778
    trace_virtqueue_pop(vq, elem, elem->in_num, elem->out_num);
779
    return elem;
780 781 782 783

err_undo_map:
    virtqueue_undo_map_desc(out_num, in_num, iov);
    return NULL;
A
aliguori 已提交
784 785
}

786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802
/* Reading and writing a structure directly to QEMUFile is *awful*, but
 * it is what QEMU has always done by mistake.  We can change it sooner
 * or later by bumping the version number of the affected vm states.
 * In the meanwhile, since the in-memory layout of VirtQueueElement
 * has changed, we need to marshal to and from the layout that was
 * used before the change.
 */
typedef struct VirtQueueElementOld {
    unsigned int index;
    unsigned int out_num;
    unsigned int in_num;
    hwaddr in_addr[VIRTQUEUE_MAX_SIZE];
    hwaddr out_addr[VIRTQUEUE_MAX_SIZE];
    struct iovec in_sg[VIRTQUEUE_MAX_SIZE];
    struct iovec out_sg[VIRTQUEUE_MAX_SIZE];
} VirtQueueElementOld;

J
Jason Wang 已提交
803
void *qemu_get_virtqueue_element(VirtIODevice *vdev, QEMUFile *f, size_t sz)
804
{
805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833
    VirtQueueElement *elem;
    VirtQueueElementOld data;
    int i;

    qemu_get_buffer(f, (uint8_t *)&data, sizeof(VirtQueueElementOld));

    elem = virtqueue_alloc_element(sz, data.out_num, data.in_num);
    elem->index = data.index;

    for (i = 0; i < elem->in_num; i++) {
        elem->in_addr[i] = data.in_addr[i];
    }

    for (i = 0; i < elem->out_num; i++) {
        elem->out_addr[i] = data.out_addr[i];
    }

    for (i = 0; i < elem->in_num; i++) {
        /* Base is overwritten by virtqueue_map.  */
        elem->in_sg[i].iov_base = 0;
        elem->in_sg[i].iov_len = data.in_sg[i].iov_len;
    }

    for (i = 0; i < elem->out_num; i++) {
        /* Base is overwritten by virtqueue_map.  */
        elem->out_sg[i].iov_base = 0;
        elem->out_sg[i].iov_len = data.out_sg[i].iov_len;
    }

J
Jason Wang 已提交
834
    virtqueue_map(vdev, elem);
835 836 837 838 839
    return elem;
}

void qemu_put_virtqueue_element(QEMUFile *f, VirtQueueElement *elem)
{
840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866
    VirtQueueElementOld data;
    int i;

    memset(&data, 0, sizeof(data));
    data.index = elem->index;
    data.in_num = elem->in_num;
    data.out_num = elem->out_num;

    for (i = 0; i < elem->in_num; i++) {
        data.in_addr[i] = elem->in_addr[i];
    }

    for (i = 0; i < elem->out_num; i++) {
        data.out_addr[i] = elem->out_addr[i];
    }

    for (i = 0; i < elem->in_num; i++) {
        /* Base is overwritten by virtqueue_map when loading.  Do not
         * save it, as it would leak the QEMU address space layout.  */
        data.in_sg[i].iov_len = elem->in_sg[i].iov_len;
    }

    for (i = 0; i < elem->out_num; i++) {
        /* Do not save iov_base as above.  */
        data.out_sg[i].iov_len = elem->out_sg[i].iov_len;
    }
    qemu_put_buffer(f, (uint8_t *)&data, sizeof(VirtQueueElementOld));
867 868
}

A
aliguori 已提交
869
/* virtio device */
870 871
static void virtio_notify_vector(VirtIODevice *vdev, uint16_t vector)
{
K
KONRAD Frederic 已提交
872 873 874
    BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
    VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);

875 876 877 878
    if (unlikely(vdev->broken)) {
        return;
    }

K
KONRAD Frederic 已提交
879 880
    if (k->notify) {
        k->notify(qbus->parent, vector);
881 882
    }
}
A
aliguori 已提交
883

P
Paul Brook 已提交
884
void virtio_update_irq(VirtIODevice *vdev)
A
aliguori 已提交
885
{
886
    virtio_notify_vector(vdev, VIRTIO_NO_VECTOR);
A
aliguori 已提交
887 888
}

889 890 891 892
static int virtio_validate_features(VirtIODevice *vdev)
{
    VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);

J
Jason Wang 已提交
893 894 895 896 897
    if (virtio_host_has_feature(vdev, VIRTIO_F_IOMMU_PLATFORM) &&
        !virtio_vdev_has_feature(vdev, VIRTIO_F_IOMMU_PLATFORM)) {
        return -EFAULT;
    }

898 899 900 901 902 903 904 905
    if (k->validate_features) {
        return k->validate_features(vdev);
    } else {
        return 0;
    }
}

int virtio_set_status(VirtIODevice *vdev, uint8_t val)
906
{
907
    VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
908 909
    trace_virtio_set_status(vdev, val);

910
    if (virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1)) {
911 912 913 914 915 916 917 918 919
        if (!(vdev->status & VIRTIO_CONFIG_S_FEATURES_OK) &&
            val & VIRTIO_CONFIG_S_FEATURES_OK) {
            int ret = virtio_validate_features(vdev);

            if (ret) {
                return ret;
            }
        }
    }
920 921
    if (k->set_status) {
        k->set_status(vdev, val);
922 923
    }
    vdev->status = val;
924
    return 0;
925 926
}

927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947
bool target_words_bigendian(void);
static enum virtio_device_endian virtio_default_endian(void)
{
    if (target_words_bigendian()) {
        return VIRTIO_DEVICE_ENDIAN_BIG;
    } else {
        return VIRTIO_DEVICE_ENDIAN_LITTLE;
    }
}

static enum virtio_device_endian virtio_current_cpu_endian(void)
{
    CPUClass *cc = CPU_GET_CLASS(current_cpu);

    if (cc->virtio_is_big_endian(current_cpu)) {
        return VIRTIO_DEVICE_ENDIAN_BIG;
    } else {
        return VIRTIO_DEVICE_ENDIAN_LITTLE;
    }
}

P
Paul Brook 已提交
948
void virtio_reset(void *opaque)
A
aliguori 已提交
949 950
{
    VirtIODevice *vdev = opaque;
951
    VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
A
aliguori 已提交
952 953
    int i;

954
    virtio_set_status(vdev, 0);
955 956 957 958 959 960 961
    if (current_cpu) {
        /* Guest initiated reset */
        vdev->device_endian = virtio_current_cpu_endian();
    } else {
        /* System reset */
        vdev->device_endian = virtio_default_endian();
    }
962

963 964 965
    if (k->reset) {
        k->reset(vdev);
    }
A
aliguori 已提交
966

967
    vdev->broken = false;
968
    vdev->guest_features = 0;
A
aliguori 已提交
969 970
    vdev->queue_sel = 0;
    vdev->status = 0;
P
Paolo Bonzini 已提交
971
    atomic_set(&vdev->isr, 0);
972 973
    vdev->config_vector = VIRTIO_NO_VECTOR;
    virtio_notify_vector(vdev, vdev->config_vector);
A
aliguori 已提交
974

975
    for(i = 0; i < VIRTIO_QUEUE_MAX; i++) {
A
aliguori 已提交
976 977 978 979
        vdev->vq[i].vring.desc = 0;
        vdev->vq[i].vring.avail = 0;
        vdev->vq[i].vring.used = 0;
        vdev->vq[i].last_avail_idx = 0;
980
        vdev->vq[i].shadow_avail_idx = 0;
981
        vdev->vq[i].used_idx = 0;
982
        virtio_queue_set_vector(vdev, i, VIRTIO_NO_VECTOR);
M
Michael S. Tsirkin 已提交
983 984
        vdev->vq[i].signalled_used = 0;
        vdev->vq[i].signalled_used_valid = false;
985
        vdev->vq[i].notification_disabled = 0;
C
Cornelia Huck 已提交
986
        vdev->vq[i].vring.num = vdev->vq[i].vring.num_default;
987
        vdev->vq[i].inuse = 0;
A
aliguori 已提交
988 989 990
    }
}

P
Paul Brook 已提交
991
uint32_t virtio_config_readb(VirtIODevice *vdev, uint32_t addr)
A
aliguori 已提交
992
{
993
    VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
A
aliguori 已提交
994 995
    uint8_t val;

996
    if (addr + sizeof(val) > vdev->config_len) {
A
aliguori 已提交
997
        return (uint32_t)-1;
998 999 1000
    }

    k->get_config(vdev, vdev->config);
A
aliguori 已提交
1001

1002
    val = ldub_p(vdev->config + addr);
A
aliguori 已提交
1003 1004 1005
    return val;
}

P
Paul Brook 已提交
1006
uint32_t virtio_config_readw(VirtIODevice *vdev, uint32_t addr)
A
aliguori 已提交
1007
{
1008
    VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
A
aliguori 已提交
1009 1010
    uint16_t val;

1011
    if (addr + sizeof(val) > vdev->config_len) {
A
aliguori 已提交
1012
        return (uint32_t)-1;
1013 1014 1015
    }

    k->get_config(vdev, vdev->config);
A
aliguori 已提交
1016

1017
    val = lduw_p(vdev->config + addr);
A
aliguori 已提交
1018 1019 1020
    return val;
}

P
Paul Brook 已提交
1021
uint32_t virtio_config_readl(VirtIODevice *vdev, uint32_t addr)
A
aliguori 已提交
1022
{
1023
    VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
A
aliguori 已提交
1024 1025
    uint32_t val;

1026
    if (addr + sizeof(val) > vdev->config_len) {
A
aliguori 已提交
1027
        return (uint32_t)-1;
1028 1029 1030
    }

    k->get_config(vdev, vdev->config);
A
aliguori 已提交
1031

1032
    val = ldl_p(vdev->config + addr);
A
aliguori 已提交
1033 1034 1035
    return val;
}

P
Paul Brook 已提交
1036
void virtio_config_writeb(VirtIODevice *vdev, uint32_t addr, uint32_t data)
A
aliguori 已提交
1037
{
1038
    VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
A
aliguori 已提交
1039 1040
    uint8_t val = data;

1041
    if (addr + sizeof(val) > vdev->config_len) {
A
aliguori 已提交
1042
        return;
1043
    }
A
aliguori 已提交
1044

1045
    stb_p(vdev->config + addr, val);
A
aliguori 已提交
1046

1047 1048 1049
    if (k->set_config) {
        k->set_config(vdev, vdev->config);
    }
A
aliguori 已提交
1050 1051
}

P
Paul Brook 已提交
1052
void virtio_config_writew(VirtIODevice *vdev, uint32_t addr, uint32_t data)
A
aliguori 已提交
1053
{
1054
    VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
A
aliguori 已提交
1055 1056
    uint16_t val = data;

1057
    if (addr + sizeof(val) > vdev->config_len) {
A
aliguori 已提交
1058
        return;
1059
    }
A
aliguori 已提交
1060

1061
    stw_p(vdev->config + addr, val);
A
aliguori 已提交
1062

1063 1064 1065
    if (k->set_config) {
        k->set_config(vdev, vdev->config);
    }
A
aliguori 已提交
1066 1067
}

P
Paul Brook 已提交
1068
void virtio_config_writel(VirtIODevice *vdev, uint32_t addr, uint32_t data)
A
aliguori 已提交
1069
{
1070
    VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
A
aliguori 已提交
1071 1072
    uint32_t val = data;

1073
    if (addr + sizeof(val) > vdev->config_len) {
A
aliguori 已提交
1074
        return;
1075
    }
A
aliguori 已提交
1076

1077
    stl_p(vdev->config + addr, val);
A
aliguori 已提交
1078

1079 1080 1081
    if (k->set_config) {
        k->set_config(vdev, vdev->config);
    }
A
aliguori 已提交
1082 1083
}

1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179
uint32_t virtio_config_modern_readb(VirtIODevice *vdev, uint32_t addr)
{
    VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
    uint8_t val;

    if (addr + sizeof(val) > vdev->config_len) {
        return (uint32_t)-1;
    }

    k->get_config(vdev, vdev->config);

    val = ldub_p(vdev->config + addr);
    return val;
}

uint32_t virtio_config_modern_readw(VirtIODevice *vdev, uint32_t addr)
{
    VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
    uint16_t val;

    if (addr + sizeof(val) > vdev->config_len) {
        return (uint32_t)-1;
    }

    k->get_config(vdev, vdev->config);

    val = lduw_le_p(vdev->config + addr);
    return val;
}

uint32_t virtio_config_modern_readl(VirtIODevice *vdev, uint32_t addr)
{
    VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
    uint32_t val;

    if (addr + sizeof(val) > vdev->config_len) {
        return (uint32_t)-1;
    }

    k->get_config(vdev, vdev->config);

    val = ldl_le_p(vdev->config + addr);
    return val;
}

void virtio_config_modern_writeb(VirtIODevice *vdev,
                                 uint32_t addr, uint32_t data)
{
    VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
    uint8_t val = data;

    if (addr + sizeof(val) > vdev->config_len) {
        return;
    }

    stb_p(vdev->config + addr, val);

    if (k->set_config) {
        k->set_config(vdev, vdev->config);
    }
}

void virtio_config_modern_writew(VirtIODevice *vdev,
                                 uint32_t addr, uint32_t data)
{
    VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
    uint16_t val = data;

    if (addr + sizeof(val) > vdev->config_len) {
        return;
    }

    stw_le_p(vdev->config + addr, val);

    if (k->set_config) {
        k->set_config(vdev, vdev->config);
    }
}

void virtio_config_modern_writel(VirtIODevice *vdev,
                                 uint32_t addr, uint32_t data)
{
    VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
    uint32_t val = data;

    if (addr + sizeof(val) > vdev->config_len) {
        return;
    }

    stl_le_p(vdev->config + addr, val);

    if (k->set_config) {
        k->set_config(vdev, vdev->config);
    }
}

A
Avi Kivity 已提交
1180
void virtio_queue_set_addr(VirtIODevice *vdev, int n, hwaddr addr)
A
aliguori 已提交
1181
{
1182 1183
    vdev->vq[n].vring.desc = addr;
    virtio_queue_update_rings(vdev, n);
P
Paul Brook 已提交
1184 1185
}

A
Avi Kivity 已提交
1186
hwaddr virtio_queue_get_addr(VirtIODevice *vdev, int n)
P
Paul Brook 已提交
1187
{
1188 1189 1190 1191 1192 1193 1194 1195 1196
    return vdev->vq[n].vring.desc;
}

void virtio_queue_set_rings(VirtIODevice *vdev, int n, hwaddr desc,
                            hwaddr avail, hwaddr used)
{
    vdev->vq[n].vring.desc = desc;
    vdev->vq[n].vring.avail = avail;
    vdev->vq[n].vring.used = used;
P
Paul Brook 已提交
1197 1198
}

1199 1200
void virtio_queue_set_num(VirtIODevice *vdev, int n, int num)
{
1201 1202 1203 1204 1205 1206 1207
    /* Don't allow guest to flip queue between existent and
     * nonexistent states, or to set it to an invalid size.
     */
    if (!!num != !!vdev->vq[n].vring.num ||
        num > VIRTQUEUE_MAX_SIZE ||
        num < 0) {
        return;
1208
    }
1209
    vdev->vq[n].vring.num = num;
1210 1211
}

1212 1213 1214 1215 1216 1217 1218 1219 1220 1221
VirtQueue *virtio_vector_first_queue(VirtIODevice *vdev, uint16_t vector)
{
    return QLIST_FIRST(&vdev->vector_queues[vector]);
}

VirtQueue *virtio_vector_next_queue(VirtQueue *vq)
{
    return QLIST_NEXT(vq, node);
}

P
Paul Brook 已提交
1222 1223 1224 1225
int virtio_queue_get_num(VirtIODevice *vdev, int n)
{
    return vdev->vq[n].vring.num;
}
A
aliguori 已提交
1226

1227 1228 1229 1230
int virtio_get_num_queues(VirtIODevice *vdev)
{
    int i;

1231
    for (i = 0; i < VIRTIO_QUEUE_MAX; i++) {
1232 1233 1234 1235 1236 1237 1238 1239
        if (!virtio_queue_get_num(vdev, i)) {
            break;
        }
    }

    return i;
}

1240 1241 1242 1243 1244
void virtio_queue_set_align(VirtIODevice *vdev, int n, int align)
{
    BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
    VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);

1245
    /* virtio-1 compliant devices cannot change the alignment */
1246
    if (virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1)) {
1247 1248 1249
        error_report("tried to modify queue alignment for virtio-1 device");
        return;
    }
1250 1251 1252 1253 1254 1255 1256
    /* Check that the transport told us it was going to do this
     * (so a buggy transport will immediately assert rather than
     * silently failing to migrate this state)
     */
    assert(k->has_variable_vring_alignment);

    vdev->vq[n].vring.align = align;
1257
    virtio_queue_update_rings(vdev, n);
1258 1259
}

M
Michael S. Tsirkin 已提交
1260 1261 1262 1263 1264 1265 1266 1267 1268 1269
static void virtio_queue_notify_aio_vq(VirtQueue *vq)
{
    if (vq->vring.desc && vq->handle_aio_output) {
        VirtIODevice *vdev = vq->vdev;

        trace_virtio_queue_notify(vdev, vq - vdev->vq, vq);
        vq->handle_aio_output(vdev, vq);
    }
}

1270
static void virtio_queue_notify_vq(VirtQueue *vq)
1271
{
1272
    if (vq->vring.desc && vq->handle_output) {
1273
        VirtIODevice *vdev = vq->vdev;
1274

1275 1276 1277 1278
        if (unlikely(vdev->broken)) {
            return;
        }

1279 1280 1281 1282 1283
        trace_virtio_queue_notify(vdev, vq - vdev->vq, vq);
        vq->handle_output(vdev, vq);
    }
}

P
Paul Brook 已提交
1284 1285
void virtio_queue_notify(VirtIODevice *vdev, int n)
{
1286
    virtio_queue_notify_vq(&vdev->vq[n]);
A
aliguori 已提交
1287 1288
}

1289 1290
uint16_t virtio_queue_vector(VirtIODevice *vdev, int n)
{
1291
    return n < VIRTIO_QUEUE_MAX ? vdev->vq[n].vector :
1292 1293 1294 1295 1296
        VIRTIO_NO_VECTOR;
}

void virtio_queue_set_vector(VirtIODevice *vdev, int n, uint16_t vector)
{
1297 1298
    VirtQueue *vq = &vdev->vq[n];

1299
    if (n < VIRTIO_QUEUE_MAX) {
1300 1301 1302 1303
        if (vdev->vector_queues &&
            vdev->vq[n].vector != VIRTIO_NO_VECTOR) {
            QLIST_REMOVE(vq, node);
        }
1304
        vdev->vq[n].vector = vector;
1305 1306 1307 1308 1309
        if (vdev->vector_queues &&
            vector != VIRTIO_NO_VECTOR) {
            QLIST_INSERT_HEAD(&vdev->vector_queues[vector], vq, node);
        }
    }
1310 1311
}

1312 1313
VirtQueue *virtio_add_queue(VirtIODevice *vdev, int queue_size,
                            VirtIOHandleOutput handle_output)
A
aliguori 已提交
1314 1315 1316
{
    int i;

1317
    for (i = 0; i < VIRTIO_QUEUE_MAX; i++) {
A
aliguori 已提交
1318 1319 1320 1321
        if (vdev->vq[i].vring.num == 0)
            break;
    }

1322
    if (i == VIRTIO_QUEUE_MAX || queue_size > VIRTQUEUE_MAX_SIZE)
A
aliguori 已提交
1323 1324 1325
        abort();

    vdev->vq[i].vring.num = queue_size;
C
Cornelia Huck 已提交
1326
    vdev->vq[i].vring.num_default = queue_size;
1327
    vdev->vq[i].vring.align = VIRTIO_PCI_VRING_ALIGN;
A
aliguori 已提交
1328
    vdev->vq[i].handle_output = handle_output;
M
Michael S. Tsirkin 已提交
1329
    vdev->vq[i].handle_aio_output = NULL;
A
aliguori 已提交
1330 1331 1332 1333

    return &vdev->vq[i];
}

1334 1335
void virtio_del_queue(VirtIODevice *vdev, int n)
{
1336
    if (n < 0 || n >= VIRTIO_QUEUE_MAX) {
1337 1338 1339 1340
        abort();
    }

    vdev->vq[n].vring.num = 0;
C
Cornelia Huck 已提交
1341
    vdev->vq[n].vring.num_default = 0;
1342 1343
}

P
Paolo Bonzini 已提交
1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355
static void virtio_set_isr(VirtIODevice *vdev, int value)
{
    uint8_t old = atomic_read(&vdev->isr);

    /* Do not write ISR if it does not change, so that its cacheline remains
     * shared in the common case where the guest does not read it.
     */
    if ((old & value) != value) {
        atomic_or(&vdev->isr, value);
    }
}

1356
bool virtio_should_notify(VirtIODevice *vdev, VirtQueue *vq)
M
Michael S. Tsirkin 已提交
1357 1358 1359
{
    uint16_t old, new;
    bool v;
1360 1361
    /* We need to expose used array entries before checking used event. */
    smp_mb();
1362
    /* Always notify when queue is empty (when feature acknowledge) */
1363
    if (virtio_vdev_has_feature(vdev, VIRTIO_F_NOTIFY_ON_EMPTY) &&
1364
        !vq->inuse && virtio_queue_empty(vq)) {
M
Michael S. Tsirkin 已提交
1365 1366 1367
        return true;
    }

1368
    if (!virtio_vdev_has_feature(vdev, VIRTIO_RING_F_EVENT_IDX)) {
M
Michael S. Tsirkin 已提交
1369 1370 1371 1372 1373 1374
        return !(vring_avail_flags(vq) & VRING_AVAIL_F_NO_INTERRUPT);
    }

    v = vq->signalled_used_valid;
    vq->signalled_used_valid = true;
    old = vq->signalled_used;
1375
    new = vq->signalled_used = vq->used_idx;
1376
    return !v || vring_need_event(vring_get_used_event(vq), new, old);
M
Michael S. Tsirkin 已提交
1377 1378
}

1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405
void virtio_notify_irqfd(VirtIODevice *vdev, VirtQueue *vq)
{
    if (!virtio_should_notify(vdev, vq)) {
        return;
    }

    trace_virtio_notify_irqfd(vdev, vq);

    /*
     * virtio spec 1.0 says ISR bit 0 should be ignored with MSI, but
     * windows drivers included in virtio-win 1.8.0 (circa 2015) are
     * incorrectly polling this bit during crashdump and hibernation
     * in MSI mode, causing a hang if this bit is never updated.
     * Recent releases of Windows do not really shut down, but rather
     * log out and hibernate to make the next startup faster.  Hence,
     * this manifested as a more serious hang during shutdown with
     *
     * Next driver release from 2016 fixed this problem, so working around it
     * is not a must, but it's easy to do so let's do it here.
     *
     * Note: it's safe to update ISR from any thread as it was switched
     * to an atomic operation.
     */
    virtio_set_isr(vq->vdev, 0x1);
    event_notifier_set(&vq->guest_notifier);
}

M
Michael S. Tsirkin 已提交
1406 1407
void virtio_notify(VirtIODevice *vdev, VirtQueue *vq)
{
1408
    if (!virtio_should_notify(vdev, vq)) {
A
aliguori 已提交
1409
        return;
M
Michael S. Tsirkin 已提交
1410
    }
A
aliguori 已提交
1411

1412
    trace_virtio_notify(vdev, vq);
P
Paolo Bonzini 已提交
1413
    virtio_set_isr(vq->vdev, 0x1);
1414
    virtio_notify_vector(vdev, vq->vector);
A
aliguori 已提交
1415 1416 1417 1418
}

void virtio_notify_config(VirtIODevice *vdev)
{
1419 1420 1421
    if (!(vdev->status & VIRTIO_CONFIG_S_DRIVER_OK))
        return;

P
Paolo Bonzini 已提交
1422
    virtio_set_isr(vdev, 0x3);
1423
    vdev->generation++;
1424
    virtio_notify_vector(vdev, vdev->config_vector);
A
aliguori 已提交
1425 1426
}

1427 1428 1429 1430 1431
static bool virtio_device_endian_needed(void *opaque)
{
    VirtIODevice *vdev = opaque;

    assert(vdev->device_endian != VIRTIO_DEVICE_ENDIAN_UNKNOWN);
1432
    if (!virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1)) {
1433 1434 1435 1436
        return vdev->device_endian != virtio_default_endian();
    }
    /* Devices conforming to VIRTIO 1.0 or later are always LE. */
    return vdev->device_endian != VIRTIO_DEVICE_ENDIAN_LITTLE;
1437 1438
}

G
Gerd Hoffmann 已提交
1439 1440 1441 1442 1443 1444 1445
static bool virtio_64bit_features_needed(void *opaque)
{
    VirtIODevice *vdev = opaque;

    return (vdev->host_features >> 32) != 0;
}

J
Jason Wang 已提交
1446 1447 1448 1449 1450 1451 1452
static bool virtio_virtqueue_needed(void *opaque)
{
    VirtIODevice *vdev = opaque;

    return virtio_host_has_feature(vdev, VIRTIO_F_VERSION_1);
}

C
Cornelia Huck 已提交
1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465
static bool virtio_ringsize_needed(void *opaque)
{
    VirtIODevice *vdev = opaque;
    int i;

    for (i = 0; i < VIRTIO_QUEUE_MAX; i++) {
        if (vdev->vq[i].vring.num != vdev->vq[i].vring.num_default) {
            return true;
        }
    }
    return false;
}

1466 1467 1468 1469 1470 1471 1472 1473 1474 1475
static bool virtio_extra_state_needed(void *opaque)
{
    VirtIODevice *vdev = opaque;
    BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
    VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);

    return k->has_extra_state &&
        k->has_extra_state(qbus->parent);
}

1476 1477 1478 1479 1480 1481 1482
static bool virtio_broken_needed(void *opaque)
{
    VirtIODevice *vdev = opaque;

    return vdev->broken;
}

1483
static const VMStateDescription vmstate_virtqueue = {
J
Jason Wang 已提交
1484
    .name = "virtqueue_state",
1485 1486 1487 1488 1489 1490 1491
    .version_id = 1,
    .minimum_version_id = 1,
    .fields = (VMStateField[]) {
        VMSTATE_UINT64(vring.avail, struct VirtQueue),
        VMSTATE_UINT64(vring.used, struct VirtQueue),
        VMSTATE_END_OF_LIST()
    }
J
Jason Wang 已提交
1492 1493 1494 1495 1496 1497 1498 1499
};

static const VMStateDescription vmstate_virtio_virtqueues = {
    .name = "virtio/virtqueues",
    .version_id = 1,
    .minimum_version_id = 1,
    .needed = &virtio_virtqueue_needed,
    .fields = (VMStateField[]) {
D
Dr. David Alan Gilbert 已提交
1500 1501
        VMSTATE_STRUCT_VARRAY_POINTER_KNOWN(vq, struct VirtIODevice,
                      VIRTIO_QUEUE_MAX, 0, vmstate_virtqueue, VirtQueue),
J
Jason Wang 已提交
1502 1503 1504 1505
        VMSTATE_END_OF_LIST()
    }
};

1506
static const VMStateDescription vmstate_ringsize = {
C
Cornelia Huck 已提交
1507
    .name = "ringsize_state",
1508 1509 1510 1511 1512 1513
    .version_id = 1,
    .minimum_version_id = 1,
    .fields = (VMStateField[]) {
        VMSTATE_UINT32(vring.num_default, struct VirtQueue),
        VMSTATE_END_OF_LIST()
    }
C
Cornelia Huck 已提交
1514 1515 1516 1517 1518 1519 1520 1521
};

static const VMStateDescription vmstate_virtio_ringsize = {
    .name = "virtio/ringsize",
    .version_id = 1,
    .minimum_version_id = 1,
    .needed = &virtio_ringsize_needed,
    .fields = (VMStateField[]) {
D
Dr. David Alan Gilbert 已提交
1522 1523
        VMSTATE_STRUCT_VARRAY_POINTER_KNOWN(vq, struct VirtIODevice,
                      VIRTIO_QUEUE_MAX, 0, vmstate_ringsize, VirtQueue),
C
Cornelia Huck 已提交
1524 1525 1526 1527
        VMSTATE_END_OF_LIST()
    }
};

1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574
static int get_extra_state(QEMUFile *f, void *pv, size_t size)
{
    VirtIODevice *vdev = pv;
    BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
    VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);

    if (!k->load_extra_state) {
        return -1;
    } else {
        return k->load_extra_state(qbus->parent, f);
    }
}

static void put_extra_state(QEMUFile *f, void *pv, size_t size)
{
    VirtIODevice *vdev = pv;
    BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
    VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);

    k->save_extra_state(qbus->parent, f);
}

static const VMStateInfo vmstate_info_extra_state = {
    .name = "virtqueue_extra_state",
    .get = get_extra_state,
    .put = put_extra_state,
};

static const VMStateDescription vmstate_virtio_extra_state = {
    .name = "virtio/extra_state",
    .version_id = 1,
    .minimum_version_id = 1,
    .needed = &virtio_extra_state_needed,
    .fields = (VMStateField[]) {
        {
            .name         = "extra_state",
            .version_id   = 0,
            .field_exists = NULL,
            .size         = 0,
            .info         = &vmstate_info_extra_state,
            .flags        = VMS_SINGLE,
            .offset       = 0,
        },
        VMSTATE_END_OF_LIST()
    }
};

1575 1576 1577 1578
static const VMStateDescription vmstate_virtio_device_endian = {
    .name = "virtio/device_endian",
    .version_id = 1,
    .minimum_version_id = 1,
1579
    .needed = &virtio_device_endian_needed,
1580 1581 1582 1583 1584 1585
    .fields = (VMStateField[]) {
        VMSTATE_UINT8(device_endian, VirtIODevice),
        VMSTATE_END_OF_LIST()
    }
};

G
Gerd Hoffmann 已提交
1586 1587 1588 1589
static const VMStateDescription vmstate_virtio_64bit_features = {
    .name = "virtio/64bit_features",
    .version_id = 1,
    .minimum_version_id = 1,
1590
    .needed = &virtio_64bit_features_needed,
G
Gerd Hoffmann 已提交
1591 1592 1593 1594 1595 1596
    .fields = (VMStateField[]) {
        VMSTATE_UINT64(guest_features, VirtIODevice),
        VMSTATE_END_OF_LIST()
    }
};

1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607
static const VMStateDescription vmstate_virtio_broken = {
    .name = "virtio/broken",
    .version_id = 1,
    .minimum_version_id = 1,
    .needed = &virtio_broken_needed,
    .fields = (VMStateField[]) {
        VMSTATE_BOOL(broken, VirtIODevice),
        VMSTATE_END_OF_LIST()
    }
};

1608 1609 1610 1611 1612 1613 1614
static const VMStateDescription vmstate_virtio = {
    .name = "virtio",
    .version_id = 1,
    .minimum_version_id = 1,
    .minimum_version_id_old = 1,
    .fields = (VMStateField[]) {
        VMSTATE_END_OF_LIST()
1615
    },
1616 1617 1618
    .subsections = (const VMStateDescription*[]) {
        &vmstate_virtio_device_endian,
        &vmstate_virtio_64bit_features,
J
Jason Wang 已提交
1619
        &vmstate_virtio_virtqueues,
C
Cornelia Huck 已提交
1620
        &vmstate_virtio_ringsize,
1621
        &vmstate_virtio_broken,
1622
        &vmstate_virtio_extra_state,
1623
        NULL
1624 1625 1626
    }
};

A
aliguori 已提交
1627 1628
void virtio_save(VirtIODevice *vdev, QEMUFile *f)
{
K
KONRAD Frederic 已提交
1629 1630
    BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
    VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
1631
    VirtioDeviceClass *vdc = VIRTIO_DEVICE_GET_CLASS(vdev);
G
Gerd Hoffmann 已提交
1632
    uint32_t guest_features_lo = (vdev->guest_features & 0xffffffff);
A
aliguori 已提交
1633 1634
    int i;

K
KONRAD Frederic 已提交
1635 1636 1637
    if (k->save_config) {
        k->save_config(qbus->parent, f);
    }
A
aliguori 已提交
1638 1639 1640 1641

    qemu_put_8s(f, &vdev->status);
    qemu_put_8s(f, &vdev->isr);
    qemu_put_be16s(f, &vdev->queue_sel);
G
Gerd Hoffmann 已提交
1642
    qemu_put_be32s(f, &guest_features_lo);
A
aliguori 已提交
1643 1644 1645
    qemu_put_be32(f, vdev->config_len);
    qemu_put_buffer(f, vdev->config, vdev->config_len);

1646
    for (i = 0; i < VIRTIO_QUEUE_MAX; i++) {
A
aliguori 已提交
1647 1648 1649 1650 1651 1652
        if (vdev->vq[i].vring.num == 0)
            break;
    }

    qemu_put_be32(f, i);

1653
    for (i = 0; i < VIRTIO_QUEUE_MAX; i++) {
A
aliguori 已提交
1654 1655 1656 1657
        if (vdev->vq[i].vring.num == 0)
            break;

        qemu_put_be32(f, vdev->vq[i].vring.num);
1658 1659 1660
        if (k->has_variable_vring_alignment) {
            qemu_put_be32(f, vdev->vq[i].vring.align);
        }
1661 1662
        /* XXX virtio-1 devices */
        qemu_put_be64(f, vdev->vq[i].vring.desc);
A
aliguori 已提交
1663
        qemu_put_be16s(f, &vdev->vq[i].last_avail_idx);
K
KONRAD Frederic 已提交
1664 1665 1666
        if (k->save_queue) {
            k->save_queue(qbus->parent, i, f);
        }
A
aliguori 已提交
1667
    }
1668 1669 1670 1671

    if (vdc->save != NULL) {
        vdc->save(vdev, f);
    }
1672

1673 1674 1675 1676
    if (vdc->vmsd) {
        vmstate_save_state(f, vdc->vmsd, vdev, NULL);
    }

1677
    /* Subsections */
1678
    vmstate_save_state(f, &vmstate_virtio, vdev, NULL);
A
aliguori 已提交
1679 1680
}

1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701
/* A wrapper for use as a VMState .put function */
static void virtio_device_put(QEMUFile *f, void *opaque, size_t size)
{
    virtio_save(VIRTIO_DEVICE(opaque), f);
}

/* A wrapper for use as a VMState .get function */
static int virtio_device_get(QEMUFile *f, void *opaque, size_t size)
{
    VirtIODevice *vdev = VIRTIO_DEVICE(opaque);
    DeviceClass *dc = DEVICE_CLASS(VIRTIO_DEVICE_GET_CLASS(vdev));

    return virtio_load(vdev, f, dc->vmsd->version_id);
}

const VMStateInfo  virtio_vmstate_info = {
    .name = "virtio",
    .get = virtio_device_get,
    .put = virtio_device_put,
};

1702
static int virtio_set_features_nocheck(VirtIODevice *vdev, uint64_t val)
1703
{
1704
    VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
C
Cornelia Huck 已提交
1705
    bool bad = (val & ~(vdev->host_features)) != 0;
1706

C
Cornelia Huck 已提交
1707
    val &= vdev->host_features;
1708 1709
    if (k->set_features) {
        k->set_features(vdev, val);
1710 1711 1712 1713 1714
    }
    vdev->guest_features = val;
    return bad ? -1 : 0;
}

1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726
int virtio_set_features(VirtIODevice *vdev, uint64_t val)
{
   /*
     * The driver must not attempt to set features after feature negotiation
     * has finished.
     */
    if (vdev->status & VIRTIO_CONFIG_S_FEATURES_OK) {
        return -EINVAL;
    }
    return virtio_set_features_nocheck(vdev, val);
}

1727
int virtio_load(VirtIODevice *vdev, QEMUFile *f, int version_id)
A
aliguori 已提交
1728
{
1729
    int i, ret;
1730
    int32_t config_len;
1731
    uint32_t num;
1732
    uint32_t features;
K
KONRAD Frederic 已提交
1733 1734
    BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
    VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
1735
    VirtioDeviceClass *vdc = VIRTIO_DEVICE_GET_CLASS(vdev);
A
aliguori 已提交
1736

1737 1738 1739 1740 1741 1742
    /*
     * We poison the endianness to ensure it does not get used before
     * subsections have been loaded.
     */
    vdev->device_endian = VIRTIO_DEVICE_ENDIAN_UNKNOWN;

K
KONRAD Frederic 已提交
1743 1744
    if (k->load_config) {
        ret = k->load_config(qbus->parent, f);
1745 1746 1747
        if (ret)
            return ret;
    }
A
aliguori 已提交
1748 1749 1750 1751

    qemu_get_8s(f, &vdev->status);
    qemu_get_8s(f, &vdev->isr);
    qemu_get_be16s(f, &vdev->queue_sel);
1752
    if (vdev->queue_sel >= VIRTIO_QUEUE_MAX) {
1753 1754
        return -1;
    }
1755
    qemu_get_be32s(f, &features);
1756

1757 1758 1759 1760 1761 1762 1763 1764 1765 1766
    /*
     * Temporarily set guest_features low bits - needed by
     * virtio net load code testing for VIRTIO_NET_F_CTRL_GUEST_OFFLOADS
     * VIRTIO_NET_F_GUEST_ANNOUNCE and VIRTIO_NET_F_CTRL_VQ.
     *
     * Note: devices should always test host features in future - don't create
     * new dependencies like this.
     */
    vdev->guest_features = features;

1767
    config_len = qemu_get_be32(f);
1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778

    /*
     * There are cases where the incoming config can be bigger or smaller
     * than what we have; so load what we have space for, and skip
     * any excess that's in the stream.
     */
    qemu_get_buffer(f, vdev->config, MIN(config_len, vdev->config_len));

    while (config_len > vdev->config_len) {
        qemu_get_byte(f);
        config_len--;
1779
    }
A
aliguori 已提交
1780 1781 1782

    num = qemu_get_be32(f);

1783
    if (num > VIRTIO_QUEUE_MAX) {
1784
        error_report("Invalid number of virtqueues: 0x%x", num);
1785 1786 1787
        return -1;
    }

A
aliguori 已提交
1788 1789
    for (i = 0; i < num; i++) {
        vdev->vq[i].vring.num = qemu_get_be32(f);
1790 1791 1792
        if (k->has_variable_vring_alignment) {
            vdev->vq[i].vring.align = qemu_get_be32(f);
        }
1793
        vdev->vq[i].vring.desc = qemu_get_be64(f);
A
aliguori 已提交
1794
        qemu_get_be16s(f, &vdev->vq[i].last_avail_idx);
M
Michael S. Tsirkin 已提交
1795
        vdev->vq[i].signalled_used_valid = false;
1796
        vdev->vq[i].notification_disabled = 0;
A
aliguori 已提交
1797

1798 1799 1800
        if (vdev->vq[i].vring.desc) {
            /* XXX virtio-1 devices */
            virtio_queue_update_rings(vdev, i);
M
Michael S. Tsirkin 已提交
1801 1802
        } else if (vdev->vq[i].last_avail_idx) {
            error_report("VQ %d address 0x0 "
1803
                         "inconsistent with Host index 0x%x",
M
Michael S. Tsirkin 已提交
1804 1805
                         i, vdev->vq[i].last_avail_idx);
                return -1;
1806
        }
K
KONRAD Frederic 已提交
1807 1808
        if (k->load_queue) {
            ret = k->load_queue(qbus->parent, i, f);
1809 1810
            if (ret)
                return ret;
1811
        }
A
aliguori 已提交
1812 1813
    }

1814
    virtio_notify_vector(vdev, VIRTIO_NO_VECTOR);
1815 1816

    if (vdc->load != NULL) {
1817 1818 1819 1820
        ret = vdc->load(vdev, f, version_id);
        if (ret) {
            return ret;
        }
1821 1822
    }

1823 1824 1825 1826 1827 1828 1829
    if (vdc->vmsd) {
        ret = vmstate_load_state(f, vdc->vmsd, vdev, version_id);
        if (ret) {
            return ret;
        }
    }

1830 1831 1832 1833 1834 1835 1836 1837 1838 1839
    /* Subsections */
    ret = vmstate_load_state(f, &vmstate_virtio, vdev, 1);
    if (ret) {
        return ret;
    }

    if (vdev->device_endian == VIRTIO_DEVICE_ENDIAN_UNKNOWN) {
        vdev->device_endian = virtio_default_endian();
    }

G
Gerd Hoffmann 已提交
1840 1841 1842 1843 1844 1845 1846
    if (virtio_64bit_features_needed(vdev)) {
        /*
         * Subsection load filled vdev->guest_features.  Run them
         * through virtio_set_features to sanity-check them against
         * host_features.
         */
        uint64_t features64 = vdev->guest_features;
1847
        if (virtio_set_features_nocheck(vdev, features64) < 0) {
G
Gerd Hoffmann 已提交
1848 1849 1850 1851 1852 1853
            error_report("Features 0x%" PRIx64 " unsupported. "
                         "Allowed features: 0x%" PRIx64,
                         features64, vdev->host_features);
            return -1;
        }
    } else {
1854
        if (virtio_set_features_nocheck(vdev, features) < 0) {
G
Gerd Hoffmann 已提交
1855 1856 1857 1858 1859 1860 1861
            error_report("Features 0x%x unsupported. "
                         "Allowed features: 0x%" PRIx64,
                         features, vdev->host_features);
            return -1;
        }
    }

1862
    for (i = 0; i < num; i++) {
1863
        if (vdev->vq[i].vring.desc) {
1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874
            uint16_t nheads;
            nheads = vring_avail_idx(&vdev->vq[i]) - vdev->vq[i].last_avail_idx;
            /* Check it isn't doing strange things with descriptor numbers. */
            if (nheads > vdev->vq[i].vring.num) {
                error_report("VQ %d size 0x%x Guest index 0x%x "
                             "inconsistent with Host index 0x%x: delta 0x%x",
                             i, vdev->vq[i].vring.num,
                             vring_avail_idx(&vdev->vq[i]),
                             vdev->vq[i].last_avail_idx, nheads);
                return -1;
            }
1875
            vdev->vq[i].used_idx = vring_used_idx(&vdev->vq[i]);
1876
            vdev->vq[i].shadow_avail_idx = vring_avail_idx(&vdev->vq[i]);
1877 1878 1879 1880

            /*
             * Some devices migrate VirtQueueElements that have been popped
             * from the avail ring but not yet returned to the used ring.
1881 1882
             * Since max ring size < UINT16_MAX it's safe to use modulo
             * UINT16_MAX + 1 subtraction.
1883
             */
1884 1885
            vdev->vq[i].inuse = (uint16_t)(vdev->vq[i].last_avail_idx -
                                vdev->vq[i].used_idx);
1886 1887 1888 1889 1890 1891 1892 1893
            if (vdev->vq[i].inuse > vdev->vq[i].vring.num) {
                error_report("VQ %d size 0x%x < last_avail_idx 0x%x - "
                             "used_idx 0x%x",
                             i, vdev->vq[i].vring.num,
                             vdev->vq[i].last_avail_idx,
                             vdev->vq[i].used_idx);
                return -1;
            }
1894 1895 1896 1897
        }
    }

    return 0;
A
aliguori 已提交
1898 1899
}

1900
void virtio_cleanup(VirtIODevice *vdev)
1901
{
1902
    qemu_del_vm_change_state_handler(vdev->vmstate);
1903
    g_free(vdev->config);
1904
    g_free(vdev->vq);
1905
    g_free(vdev->vector_queues);
1906 1907
}

1908
static void virtio_vmstate_change(void *opaque, int running, RunState state)
1909 1910
{
    VirtIODevice *vdev = opaque;
K
KONRAD Frederic 已提交
1911 1912
    BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
    VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
1913
    bool backend_run = running && (vdev->status & VIRTIO_CONFIG_S_DRIVER_OK);
1914
    vdev->vm_running = running;
1915 1916 1917 1918 1919

    if (backend_run) {
        virtio_set_status(vdev, vdev->status);
    }

K
KONRAD Frederic 已提交
1920 1921
    if (k->vmstate_change) {
        k->vmstate_change(qbus->parent, backend_run);
1922 1923 1924 1925 1926 1927 1928
    }

    if (!backend_run) {
        virtio_set_status(vdev, vdev->status);
    }
}

1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939
void virtio_instance_init_common(Object *proxy_obj, void *data,
                                 size_t vdev_size, const char *vdev_name)
{
    DeviceState *vdev = data;

    object_initialize(vdev, vdev_size, vdev_name);
    object_property_add_child(proxy_obj, "virtio-backend", OBJECT(vdev), NULL);
    object_unref(OBJECT(vdev));
    qdev_alias_all_properties(vdev, proxy_obj);
}

1940 1941
void virtio_init(VirtIODevice *vdev, const char *name,
                 uint16_t device_id, size_t config_size)
A
aliguori 已提交
1942
{
1943 1944
    BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
    VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
1945
    int i;
1946 1947 1948 1949 1950 1951 1952
    int nvectors = k->query_nvectors ? k->query_nvectors(qbus->parent) : 0;

    if (nvectors) {
        vdev->vector_queues =
            g_malloc0(sizeof(*vdev->vector_queues) * nvectors);
    }

P
Paul Brook 已提交
1953
    vdev->device_id = device_id;
A
aliguori 已提交
1954
    vdev->status = 0;
P
Paolo Bonzini 已提交
1955
    atomic_set(&vdev->isr, 0);
A
aliguori 已提交
1956
    vdev->queue_sel = 0;
1957
    vdev->config_vector = VIRTIO_NO_VECTOR;
1958
    vdev->vq = g_malloc0(sizeof(VirtQueue) * VIRTIO_QUEUE_MAX);
1959
    vdev->vm_running = runstate_is_running();
1960
    vdev->broken = false;
1961
    for (i = 0; i < VIRTIO_QUEUE_MAX; i++) {
1962
        vdev->vq[i].vector = VIRTIO_NO_VECTOR;
1963
        vdev->vq[i].vdev = vdev;
1964
        vdev->vq[i].queue_index = i;
1965
    }
A
aliguori 已提交
1966 1967 1968

    vdev->name = name;
    vdev->config_len = config_size;
1969
    if (vdev->config_len) {
1970
        vdev->config = g_malloc0(config_size);
1971
    } else {
A
aliguori 已提交
1972
        vdev->config = NULL;
1973 1974 1975
    }
    vdev->vmstate = qemu_add_vm_change_state_handler(virtio_vmstate_change,
                                                     vdev);
1976
    vdev->device_endian = virtio_default_endian();
1977
    vdev->use_guest_notifier_mask = true;
1978
}
A
aliguori 已提交
1979

A
Avi Kivity 已提交
1980
hwaddr virtio_queue_get_desc_addr(VirtIODevice *vdev, int n)
1981 1982 1983 1984
{
    return vdev->vq[n].vring.desc;
}

A
Avi Kivity 已提交
1985
hwaddr virtio_queue_get_avail_addr(VirtIODevice *vdev, int n)
1986 1987 1988 1989
{
    return vdev->vq[n].vring.avail;
}

A
Avi Kivity 已提交
1990
hwaddr virtio_queue_get_used_addr(VirtIODevice *vdev, int n)
1991 1992 1993 1994
{
    return vdev->vq[n].vring.used;
}

A
Avi Kivity 已提交
1995
hwaddr virtio_queue_get_desc_size(VirtIODevice *vdev, int n)
1996 1997 1998 1999
{
    return sizeof(VRingDesc) * vdev->vq[n].vring.num;
}

A
Avi Kivity 已提交
2000
hwaddr virtio_queue_get_avail_size(VirtIODevice *vdev, int n)
2001 2002
{
    return offsetof(VRingAvail, ring) +
2003
        sizeof(uint16_t) * vdev->vq[n].vring.num;
2004 2005
}

A
Avi Kivity 已提交
2006
hwaddr virtio_queue_get_used_size(VirtIODevice *vdev, int n)
2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019
{
    return offsetof(VRingUsed, ring) +
        sizeof(VRingUsedElem) * vdev->vq[n].vring.num;
}

uint16_t virtio_queue_get_last_avail_idx(VirtIODevice *vdev, int n)
{
    return vdev->vq[n].last_avail_idx;
}

void virtio_queue_set_last_avail_idx(VirtIODevice *vdev, int n, uint16_t idx)
{
    vdev->vq[n].last_avail_idx = idx;
2020
    vdev->vq[n].shadow_avail_idx = idx;
2021 2022
}

2023 2024 2025 2026 2027
void virtio_queue_invalidate_signalled_used(VirtIODevice *vdev, int n)
{
    vdev->vq[n].signalled_used_valid = false;
}

2028 2029 2030 2031 2032
VirtQueue *virtio_get_queue(VirtIODevice *vdev, int n)
{
    return vdev->vq + n;
}

2033 2034 2035 2036 2037
uint16_t virtio_get_queue_index(VirtQueue *vq)
{
    return vq->queue_index;
}

2038 2039 2040 2041
static void virtio_queue_guest_notifier_read(EventNotifier *n)
{
    VirtQueue *vq = container_of(n, VirtQueue, guest_notifier);
    if (event_notifier_test_and_clear(n)) {
2042
        virtio_notify_vector(vq->vdev, vq->vector);
2043 2044 2045 2046 2047 2048 2049
    }
}

void virtio_queue_set_guest_notifier_fd_handler(VirtQueue *vq, bool assign,
                                                bool with_irqfd)
{
    if (assign && !with_irqfd) {
2050
        event_notifier_set_handler(&vq->guest_notifier, false,
2051 2052
                                   virtio_queue_guest_notifier_read);
    } else {
2053
        event_notifier_set_handler(&vq->guest_notifier, false, NULL);
2054 2055 2056 2057 2058 2059 2060 2061
    }
    if (!assign) {
        /* Test and clear notifier before closing it,
         * in case poll callback didn't have time to run. */
        virtio_queue_guest_notifier_read(&vq->guest_notifier);
    }
}

2062 2063 2064 2065
EventNotifier *virtio_queue_get_guest_notifier(VirtQueue *vq)
{
    return &vq->guest_notifier;
}
2066

M
Michael S. Tsirkin 已提交
2067
static void virtio_queue_host_notifier_aio_read(EventNotifier *n)
2068 2069 2070
{
    VirtQueue *vq = container_of(n, VirtQueue, host_notifier);
    if (event_notifier_test_and_clear(n)) {
M
Michael S. Tsirkin 已提交
2071
        virtio_queue_notify_aio_vq(vq);
2072 2073 2074
    }
}

2075 2076 2077 2078 2079 2080 2081
static void virtio_queue_host_notifier_aio_poll_begin(EventNotifier *n)
{
    VirtQueue *vq = container_of(n, VirtQueue, host_notifier);

    virtio_queue_set_notification(vq, 0);
}

2082 2083 2084 2085 2086 2087 2088 2089 2090 2091 2092 2093 2094
static bool virtio_queue_host_notifier_aio_poll(void *opaque)
{
    EventNotifier *n = opaque;
    VirtQueue *vq = container_of(n, VirtQueue, host_notifier);

    if (virtio_queue_empty(vq)) {
        return false;
    }

    virtio_queue_notify_aio_vq(vq);
    return true;
}

2095 2096 2097 2098 2099 2100 2101 2102
static void virtio_queue_host_notifier_aio_poll_end(EventNotifier *n)
{
    VirtQueue *vq = container_of(n, VirtQueue, host_notifier);

    /* Caller polls once more after this to catch requests that race with us */
    virtio_queue_set_notification(vq, 1);
}

2103
void virtio_queue_aio_set_host_notifier_handler(VirtQueue *vq, AioContext *ctx,
2104
                                                VirtIOHandleOutput handle_output)
2105
{
2106 2107
    if (handle_output) {
        vq->handle_aio_output = handle_output;
2108
        aio_set_event_notifier(ctx, &vq->host_notifier, true,
2109 2110
                               virtio_queue_host_notifier_aio_read,
                               virtio_queue_host_notifier_aio_poll);
2111 2112 2113
        aio_set_event_notifier_poll(ctx, &vq->host_notifier,
                                    virtio_queue_host_notifier_aio_poll_begin,
                                    virtio_queue_host_notifier_aio_poll_end);
2114
    } else {
2115
        aio_set_event_notifier(ctx, &vq->host_notifier, true, NULL, NULL);
2116 2117
        /* Test and clear notifier before after disabling event,
         * in case poll callback didn't have time to run. */
M
Michael S. Tsirkin 已提交
2118
        virtio_queue_host_notifier_aio_read(&vq->host_notifier);
2119
        vq->handle_aio_output = NULL;
M
Michael S. Tsirkin 已提交
2120 2121 2122
    }
}

2123
void virtio_queue_host_notifier_read(EventNotifier *n)
M
Michael S. Tsirkin 已提交
2124 2125 2126 2127
{
    VirtQueue *vq = container_of(n, VirtQueue, host_notifier);
    if (event_notifier_test_and_clear(n)) {
        virtio_queue_notify_vq(vq);
2128 2129 2130
    }
}

2131 2132 2133 2134
EventNotifier *virtio_queue_get_host_notifier(VirtQueue *vq)
{
    return &vq->host_notifier;
}
2135

2136 2137
void virtio_device_set_child_bus_name(VirtIODevice *vdev, char *bus_name)
{
2138
    g_free(vdev->bus_name);
2139
    vdev->bus_name = g_strdup(bus_name);
2140 2141
}

2142 2143 2144 2145 2146 2147 2148 2149 2150 2151 2152 2153 2154 2155 2156 2157
void GCC_FMT_ATTR(2, 3) virtio_error(VirtIODevice *vdev, const char *fmt, ...)
{
    va_list ap;

    va_start(ap, fmt);
    error_vreport(fmt, ap);
    va_end(ap);

    vdev->broken = true;

    if (virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1)) {
        virtio_set_status(vdev, vdev->status | VIRTIO_CONFIG_S_NEEDS_RESET);
        virtio_notify_config(vdev);
    }
}

2158 2159 2160 2161 2162 2163
static void virtio_device_realize(DeviceState *dev, Error **errp)
{
    VirtIODevice *vdev = VIRTIO_DEVICE(dev);
    VirtioDeviceClass *vdc = VIRTIO_DEVICE_GET_CLASS(dev);
    Error *err = NULL;

2164 2165 2166
    /* Devices should either use vmsd or the load/save methods */
    assert(!vdc->vmsd || !vdc->load);

2167 2168 2169 2170 2171 2172
    if (vdc->realize != NULL) {
        vdc->realize(dev, &err);
        if (err != NULL) {
            error_propagate(errp, err);
            return;
        }
2173
    }
J
Jason Wang 已提交
2174 2175 2176 2177 2178 2179

    virtio_bus_device_plugged(vdev, &err);
    if (err != NULL) {
        error_propagate(errp, err);
        return;
    }
2180 2181
}

2182
static void virtio_device_unrealize(DeviceState *dev, Error **errp)
2183
{
2184
    VirtIODevice *vdev = VIRTIO_DEVICE(dev);
2185 2186
    VirtioDeviceClass *vdc = VIRTIO_DEVICE_GET_CLASS(dev);
    Error *err = NULL;
2187

2188 2189
    virtio_bus_device_unplugged(vdev);

2190 2191 2192 2193 2194 2195
    if (vdc->unrealize != NULL) {
        vdc->unrealize(dev, &err);
        if (err != NULL) {
            error_propagate(errp, err);
            return;
        }
2196
    }
2197

2198 2199
    g_free(vdev->bus_name);
    vdev->bus_name = NULL;
2200 2201
}

C
Cornelia Huck 已提交
2202 2203 2204 2205 2206
static Property virtio_properties[] = {
    DEFINE_VIRTIO_COMMON_FEATURES(VirtIODevice, host_features),
    DEFINE_PROP_END_OF_LIST(),
};

2207 2208 2209 2210 2211 2212
static int virtio_device_start_ioeventfd_impl(VirtIODevice *vdev)
{
    VirtioBusState *qbus = VIRTIO_BUS(qdev_get_parent_bus(DEVICE(vdev)));
    int n, r, err;

    for (n = 0; n < VIRTIO_QUEUE_MAX; n++) {
2213
        VirtQueue *vq = &vdev->vq[n];
2214 2215 2216
        if (!virtio_queue_get_num(vdev, n)) {
            continue;
        }
2217
        r = virtio_bus_set_host_notifier(qbus, n, true);
2218 2219 2220 2221
        if (r < 0) {
            err = r;
            goto assign_error;
        }
2222 2223
        event_notifier_set_handler(&vq->host_notifier, true,
                                   virtio_queue_host_notifier_read);
2224 2225 2226 2227 2228 2229 2230 2231 2232
    }

    for (n = 0; n < VIRTIO_QUEUE_MAX; n++) {
        /* Kick right away to begin processing requests already in vring */
        VirtQueue *vq = &vdev->vq[n];
        if (!vq->vring.num) {
            continue;
        }
        event_notifier_set(&vq->host_notifier);
2233 2234 2235 2236 2237
    }
    return 0;

assign_error:
    while (--n >= 0) {
2238
        VirtQueue *vq = &vdev->vq[n];
2239 2240 2241 2242
        if (!virtio_queue_get_num(vdev, n)) {
            continue;
        }

2243
        event_notifier_set_handler(&vq->host_notifier, true, NULL);
2244
        r = virtio_bus_set_host_notifier(qbus, n, false);
2245 2246 2247 2248 2249 2250 2251 2252 2253 2254 2255 2256 2257 2258 2259 2260 2261 2262 2263
        assert(r >= 0);
    }
    return err;
}

int virtio_device_start_ioeventfd(VirtIODevice *vdev)
{
    BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
    VirtioBusState *vbus = VIRTIO_BUS(qbus);

    return virtio_bus_start_ioeventfd(vbus);
}

static void virtio_device_stop_ioeventfd_impl(VirtIODevice *vdev)
{
    VirtioBusState *qbus = VIRTIO_BUS(qdev_get_parent_bus(DEVICE(vdev)));
    int n, r;

    for (n = 0; n < VIRTIO_QUEUE_MAX; n++) {
2264 2265
        VirtQueue *vq = &vdev->vq[n];

2266 2267 2268
        if (!virtio_queue_get_num(vdev, n)) {
            continue;
        }
2269
        event_notifier_set_handler(&vq->host_notifier, true, NULL);
2270
        r = virtio_bus_set_host_notifier(qbus, n, false);
2271 2272 2273 2274 2275 2276 2277 2278 2279 2280 2281 2282
        assert(r >= 0);
    }
}

void virtio_device_stop_ioeventfd(VirtIODevice *vdev)
{
    BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
    VirtioBusState *vbus = VIRTIO_BUS(qbus);

    virtio_bus_stop_ioeventfd(vbus);
}

2283 2284 2285 2286 2287 2288 2289 2290 2291 2292 2293 2294 2295 2296 2297 2298
int virtio_device_grab_ioeventfd(VirtIODevice *vdev)
{
    BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
    VirtioBusState *vbus = VIRTIO_BUS(qbus);

    return virtio_bus_grab_ioeventfd(vbus);
}

void virtio_device_release_ioeventfd(VirtIODevice *vdev)
{
    BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
    VirtioBusState *vbus = VIRTIO_BUS(qbus);

    virtio_bus_release_ioeventfd(vbus);
}

2299 2300 2301
static void virtio_device_class_init(ObjectClass *klass, void *data)
{
    /* Set the default value here. */
2302
    VirtioDeviceClass *vdc = VIRTIO_DEVICE_CLASS(klass);
2303
    DeviceClass *dc = DEVICE_CLASS(klass);
2304 2305 2306

    dc->realize = virtio_device_realize;
    dc->unrealize = virtio_device_unrealize;
2307
    dc->bus_type = TYPE_VIRTIO_BUS;
C
Cornelia Huck 已提交
2308
    dc->props = virtio_properties;
2309 2310
    vdc->start_ioeventfd = virtio_device_start_ioeventfd_impl;
    vdc->stop_ioeventfd = virtio_device_stop_ioeventfd_impl;
2311 2312

    vdc->legacy_features |= VIRTIO_LEGACY_FEATURES;
2313 2314
}

2315 2316 2317 2318 2319 2320 2321 2322
bool virtio_device_ioeventfd_enabled(VirtIODevice *vdev)
{
    BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
    VirtioBusState *vbus = VIRTIO_BUS(qbus);

    return virtio_bus_ioeventfd_enabled(vbus);
}

2323 2324 2325 2326 2327 2328 2329 2330 2331 2332 2333 2334 2335 2336 2337
static const TypeInfo virtio_device_info = {
    .name = TYPE_VIRTIO_DEVICE,
    .parent = TYPE_DEVICE,
    .instance_size = sizeof(VirtIODevice),
    .class_init = virtio_device_class_init,
    .abstract = true,
    .class_size = sizeof(VirtioDeviceClass),
};

static void virtio_register_types(void)
{
    type_register_static(&virtio_device_info);
}

type_init(virtio_register_types)