virtio.c 74.0 KB
Newer Older
A
aliguori 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13
/*
 * Virtio Support
 *
 * Copyright IBM, Corp. 2007
 *
 * Authors:
 *  Anthony Liguori   <aliguori@us.ibm.com>
 *
 * This work is licensed under the terms of the GNU GPL, version 2.  See
 * the COPYING file in the top-level directory.
 *
 */

P
Peter Maydell 已提交
14
#include "qemu/osdep.h"
15
#include "qapi/error.h"
16 17
#include "qemu-common.h"
#include "cpu.h"
18
#include "trace.h"
19
#include "exec/address-spaces.h"
20
#include "qemu/error-report.h"
P
Paolo Bonzini 已提交
21
#include "hw/virtio/virtio.h"
22
#include "qemu/atomic.h"
P
Paolo Bonzini 已提交
23
#include "hw/virtio/virtio-bus.h"
24
#include "hw/virtio/virtio-access.h"
J
Jason Wang 已提交
25
#include "sysemu/dma.h"
A
aliguori 已提交
26

27 28 29 30 31
/*
 * The alignment to use between consumer and producer parts of vring.
 * x86 pagesize again. This is the default, used by transports like PCI
 * which don't provide a means for the guest to tell the host the alignment.
 */
32 33
#define VIRTIO_PCI_VRING_ALIGN         4096

A
aliguori 已提交
34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61
typedef struct VRingDesc
{
    uint64_t addr;
    uint32_t len;
    uint16_t flags;
    uint16_t next;
} VRingDesc;

typedef struct VRingAvail
{
    uint16_t flags;
    uint16_t idx;
    uint16_t ring[0];
} VRingAvail;

typedef struct VRingUsedElem
{
    uint32_t id;
    uint32_t len;
} VRingUsedElem;

typedef struct VRingUsed
{
    uint16_t flags;
    uint16_t idx;
    VRingUsedElem ring[0];
} VRingUsed;

62 63 64 65 66 67 68
typedef struct VRingMemoryRegionCaches {
    struct rcu_head rcu;
    MemoryRegionCache desc;
    MemoryRegionCache avail;
    MemoryRegionCache used;
} VRingMemoryRegionCaches;

A
aliguori 已提交
69 70 71
typedef struct VRing
{
    unsigned int num;
C
Cornelia Huck 已提交
72
    unsigned int num_default;
73
    unsigned int align;
A
Avi Kivity 已提交
74 75 76
    hwaddr desc;
    hwaddr avail;
    hwaddr used;
77
    VRingMemoryRegionCaches *caches;
A
aliguori 已提交
78 79 80 81 82
} VRing;

struct VirtQueue
{
    VRing vring;
83 84

    /* Next head to pop */
A
aliguori 已提交
85
    uint16_t last_avail_idx;
86

87 88 89
    /* Last avail_idx read from VQ. */
    uint16_t shadow_avail_idx;

90 91
    uint16_t used_idx;

M
Michael S. Tsirkin 已提交
92 93 94 95 96 97
    /* Last used index value we have signalled on */
    uint16_t signalled_used;

    /* Last used index value we have signalled on */
    bool signalled_used_valid;

98 99
    /* Notification enabled? */
    bool notification;
M
Michael S. Tsirkin 已提交
100

101 102
    uint16_t queue_index;

103
    unsigned int inuse;
M
Michael S. Tsirkin 已提交
104

105
    uint16_t vector;
106
    VirtIOHandleOutput handle_output;
107
    VirtIOHandleAIOOutput handle_aio_output;
108 109 110
    VirtIODevice *vdev;
    EventNotifier guest_notifier;
    EventNotifier host_notifier;
111
    QLIST_ENTRY(VirtQueue) node;
A
aliguori 已提交
112 113
};

114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132
static void virtio_free_region_cache(VRingMemoryRegionCaches *caches)
{
    if (!caches) {
        return;
    }

    address_space_cache_destroy(&caches->desc);
    address_space_cache_destroy(&caches->avail);
    address_space_cache_destroy(&caches->used);
    g_free(caches);
}

static void virtio_init_region_cache(VirtIODevice *vdev, int n)
{
    VirtQueue *vq = &vdev->vq[n];
    VRingMemoryRegionCaches *old = vq->vring.caches;
    VRingMemoryRegionCaches *new;
    hwaddr addr, size;
    int event_size;
133
    int64_t len;
134 135 136 137 138 139 140 141 142

    event_size = virtio_vdev_has_feature(vq->vdev, VIRTIO_RING_F_EVENT_IDX) ? 2 : 0;

    addr = vq->vring.desc;
    if (!addr) {
        return;
    }
    new = g_new0(VRingMemoryRegionCaches, 1);
    size = virtio_queue_get_desc_size(vdev, n);
143 144 145 146 147 148
    len = address_space_cache_init(&new->desc, vdev->dma_as,
                                   addr, size, false);
    if (len < size) {
        virtio_error(vdev, "Cannot map desc");
        goto err_desc;
    }
149 150

    size = virtio_queue_get_used_size(vdev, n) + event_size;
151 152 153 154 155 156
    len = address_space_cache_init(&new->used, vdev->dma_as,
                                   vq->vring.used, size, true);
    if (len < size) {
        virtio_error(vdev, "Cannot map used");
        goto err_used;
    }
157 158

    size = virtio_queue_get_avail_size(vdev, n) + event_size;
159 160 161 162 163 164
    len = address_space_cache_init(&new->avail, vdev->dma_as,
                                   vq->vring.avail, size, false);
    if (len < size) {
        virtio_error(vdev, "Cannot map avail");
        goto err_avail;
    }
165 166 167 168 169

    atomic_rcu_set(&vq->vring.caches, new);
    if (old) {
        call_rcu(old, virtio_free_region_cache, rcu);
    }
170 171 172 173 174 175 176 177
    return;

err_avail:
    address_space_cache_destroy(&new->used);
err_used:
    address_space_cache_destroy(&new->desc);
err_desc:
    g_free(new);
178 179
}

A
aliguori 已提交
180
/* virt queue functions */
181
void virtio_queue_update_rings(VirtIODevice *vdev, int n)
A
aliguori 已提交
182
{
183
    VRing *vring = &vdev->vq[n].vring;
P
Paul Brook 已提交
184

185
    if (!vring->num || !vring->desc || !vring->align) {
186 187 188 189 190 191 192
        /* not yet setup -> nothing to do */
        return;
    }
    vring->avail = vring->desc + vring->num * sizeof(VRingDesc);
    vring->used = vring_align(vring->avail +
                              offsetof(VRingAvail, ring[vring->num]),
                              vring->align);
193
    virtio_init_region_cache(vdev, n);
A
aliguori 已提交
194 195
}

196
/* Called within rcu_read_lock().  */
197
static void vring_desc_read(VirtIODevice *vdev, VRingDesc *desc,
198
                            MemoryRegionCache *cache, int i)
A
aliguori 已提交
199
{
200 201
    address_space_read_cached(cache, i * sizeof(VRingDesc),
                              desc, sizeof(VRingDesc));
202 203 204 205
    virtio_tswap64s(vdev, &desc->addr);
    virtio_tswap32s(vdev, &desc->len);
    virtio_tswap16s(vdev, &desc->flags);
    virtio_tswap16s(vdev, &desc->next);
A
aliguori 已提交
206 207
}

208 209 210 211 212 213
static VRingMemoryRegionCaches *vring_get_region_caches(struct VirtQueue *vq)
{
    VRingMemoryRegionCaches *caches = atomic_rcu_read(&vq->vring.caches);
    assert(caches != NULL);
    return caches;
}
214
/* Called within rcu_read_lock().  */
A
aliguori 已提交
215 216
static inline uint16_t vring_avail_flags(VirtQueue *vq)
{
217
    VRingMemoryRegionCaches *caches = vring_get_region_caches(vq);
218 219
    hwaddr pa = offsetof(VRingAvail, flags);
    return virtio_lduw_phys_cached(vq->vdev, &caches->avail, pa);
A
aliguori 已提交
220 221
}

222
/* Called within rcu_read_lock().  */
A
aliguori 已提交
223 224
static inline uint16_t vring_avail_idx(VirtQueue *vq)
{
225
    VRingMemoryRegionCaches *caches = vring_get_region_caches(vq);
226 227
    hwaddr pa = offsetof(VRingAvail, idx);
    vq->shadow_avail_idx = virtio_lduw_phys_cached(vq->vdev, &caches->avail, pa);
228
    return vq->shadow_avail_idx;
A
aliguori 已提交
229 230
}

231
/* Called within rcu_read_lock().  */
A
aliguori 已提交
232 233
static inline uint16_t vring_avail_ring(VirtQueue *vq, int i)
{
234
    VRingMemoryRegionCaches *caches = vring_get_region_caches(vq);
235 236
    hwaddr pa = offsetof(VRingAvail, ring[i]);
    return virtio_lduw_phys_cached(vq->vdev, &caches->avail, pa);
A
aliguori 已提交
237 238
}

239
/* Called within rcu_read_lock().  */
240
static inline uint16_t vring_get_used_event(VirtQueue *vq)
M
Michael S. Tsirkin 已提交
241 242 243 244
{
    return vring_avail_ring(vq, vq->vring.num);
}

245
/* Called within rcu_read_lock().  */
246 247
static inline void vring_used_write(VirtQueue *vq, VRingUsedElem *uelem,
                                    int i)
A
aliguori 已提交
248
{
249
    VRingMemoryRegionCaches *caches = vring_get_region_caches(vq);
250
    hwaddr pa = offsetof(VRingUsed, ring[i]);
251 252
    virtio_tswap32s(vq->vdev, &uelem->id);
    virtio_tswap32s(vq->vdev, &uelem->len);
253 254
    address_space_write_cached(&caches->used, pa, uelem, sizeof(VRingUsedElem));
    address_space_cache_invalidate(&caches->used, pa, sizeof(VRingUsedElem));
A
aliguori 已提交
255 256
}

257
/* Called within rcu_read_lock().  */
A
aliguori 已提交
258 259
static uint16_t vring_used_idx(VirtQueue *vq)
{
260
    VRingMemoryRegionCaches *caches = vring_get_region_caches(vq);
261 262
    hwaddr pa = offsetof(VRingUsed, idx);
    return virtio_lduw_phys_cached(vq->vdev, &caches->used, pa);
A
aliguori 已提交
263 264
}

265
/* Called within rcu_read_lock().  */
M
Michael S. Tsirkin 已提交
266
static inline void vring_used_idx_set(VirtQueue *vq, uint16_t val)
A
aliguori 已提交
267
{
268
    VRingMemoryRegionCaches *caches = vring_get_region_caches(vq);
269 270 271
    hwaddr pa = offsetof(VRingUsed, idx);
    virtio_stw_phys_cached(vq->vdev, &caches->used, pa, val);
    address_space_cache_invalidate(&caches->used, pa, sizeof(val));
272
    vq->used_idx = val;
A
aliguori 已提交
273 274
}

275
/* Called within rcu_read_lock().  */
A
aliguori 已提交
276 277
static inline void vring_used_flags_set_bit(VirtQueue *vq, int mask)
{
278
    VRingMemoryRegionCaches *caches = vring_get_region_caches(vq);
279
    VirtIODevice *vdev = vq->vdev;
280 281 282 283 284
    hwaddr pa = offsetof(VRingUsed, flags);
    uint16_t flags = virtio_lduw_phys_cached(vq->vdev, &caches->used, pa);

    virtio_stw_phys_cached(vdev, &caches->used, pa, flags | mask);
    address_space_cache_invalidate(&caches->used, pa, sizeof(flags));
A
aliguori 已提交
285 286
}

287
/* Called within rcu_read_lock().  */
A
aliguori 已提交
288 289
static inline void vring_used_flags_unset_bit(VirtQueue *vq, int mask)
{
290
    VRingMemoryRegionCaches *caches = vring_get_region_caches(vq);
291
    VirtIODevice *vdev = vq->vdev;
292 293 294 295 296
    hwaddr pa = offsetof(VRingUsed, flags);
    uint16_t flags = virtio_lduw_phys_cached(vq->vdev, &caches->used, pa);

    virtio_stw_phys_cached(vdev, &caches->used, pa, flags & ~mask);
    address_space_cache_invalidate(&caches->used, pa, sizeof(flags));
A
aliguori 已提交
297 298
}

299
/* Called within rcu_read_lock().  */
300
static inline void vring_set_avail_event(VirtQueue *vq, uint16_t val)
M
Michael S. Tsirkin 已提交
301
{
302
    VRingMemoryRegionCaches *caches;
A
Avi Kivity 已提交
303
    hwaddr pa;
304
    if (!vq->notification) {
M
Michael S. Tsirkin 已提交
305 306
        return;
    }
307

308
    caches = vring_get_region_caches(vq);
309 310
    pa = offsetof(VRingUsed, ring[vq->vring.num]);
    virtio_stw_phys_cached(vq->vdev, &caches->used, pa, val);
311
    address_space_cache_invalidate(&caches->used, pa, sizeof(val));
M
Michael S. Tsirkin 已提交
312 313
}

A
aliguori 已提交
314 315
void virtio_queue_set_notification(VirtQueue *vq, int enable)
{
316
    vq->notification = enable;
317

318 319 320 321
    if (!vq->vring.desc) {
        return;
    }

322
    rcu_read_lock();
323
    if (virtio_vdev_has_feature(vq->vdev, VIRTIO_RING_F_EVENT_IDX)) {
324
        vring_set_avail_event(vq, vring_avail_idx(vq));
M
Michael S. Tsirkin 已提交
325
    } else if (enable) {
A
aliguori 已提交
326
        vring_used_flags_unset_bit(vq, VRING_USED_F_NO_NOTIFY);
M
Michael S. Tsirkin 已提交
327
    } else {
A
aliguori 已提交
328
        vring_used_flags_set_bit(vq, VRING_USED_F_NO_NOTIFY);
M
Michael S. Tsirkin 已提交
329
    }
330 331 332 333
    if (enable) {
        /* Expose avail event/used flags before caller checks the avail idx. */
        smp_mb();
    }
334
    rcu_read_unlock();
A
aliguori 已提交
335 336 337 338 339 340 341
}

int virtio_queue_ready(VirtQueue *vq)
{
    return vq->vring.avail != 0;
}

342
/* Fetch avail_idx from VQ memory only when we really need to know if
343 344 345
 * guest has added some buffers.
 * Called within rcu_read_lock().  */
static int virtio_queue_empty_rcu(VirtQueue *vq)
A
aliguori 已提交
346
{
J
Jason Wang 已提交
347 348 349 350
    if (unlikely(!vq->vring.avail)) {
        return 1;
    }

351 352 353 354
    if (vq->shadow_avail_idx != vq->last_avail_idx) {
        return 0;
    }

A
aliguori 已提交
355 356 357
    return vring_avail_idx(vq) == vq->last_avail_idx;
}

358 359 360 361
int virtio_queue_empty(VirtQueue *vq)
{
    bool empty;

J
Jason Wang 已提交
362 363 364 365
    if (unlikely(!vq->vring.avail)) {
        return 1;
    }

366 367 368 369 370 371 372 373 374 375
    if (vq->shadow_avail_idx != vq->last_avail_idx) {
        return 0;
    }

    rcu_read_lock();
    empty = vring_avail_idx(vq) == vq->last_avail_idx;
    rcu_read_unlock();
    return empty;
}

376 377
static void virtqueue_unmap_sg(VirtQueue *vq, const VirtQueueElement *elem,
                               unsigned int len)
A
aliguori 已提交
378
{
J
Jason Wang 已提交
379
    AddressSpace *dma_as = vq->vdev->dma_as;
A
aliguori 已提交
380 381 382 383 384 385 386
    unsigned int offset;
    int i;

    offset = 0;
    for (i = 0; i < elem->in_num; i++) {
        size_t size = MIN(len - offset, elem->in_sg[i].iov_len);

J
Jason Wang 已提交
387 388 389
        dma_memory_unmap(dma_as, elem->in_sg[i].iov_base,
                         elem->in_sg[i].iov_len,
                         DMA_DIRECTION_FROM_DEVICE, size);
A
aliguori 已提交
390

391
        offset += size;
A
aliguori 已提交
392 393
    }

394
    for (i = 0; i < elem->out_num; i++)
J
Jason Wang 已提交
395 396 397 398
        dma_memory_unmap(dma_as, elem->out_sg[i].iov_base,
                         elem->out_sg[i].iov_len,
                         DMA_DIRECTION_TO_DEVICE,
                         elem->out_sg[i].iov_len);
399 400
}

401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416
/* virtqueue_detach_element:
 * @vq: The #VirtQueue
 * @elem: The #VirtQueueElement
 * @len: number of bytes written
 *
 * Detach the element from the virtqueue.  This function is suitable for device
 * reset or other situations where a #VirtQueueElement is simply freed and will
 * not be pushed or discarded.
 */
void virtqueue_detach_element(VirtQueue *vq, const VirtQueueElement *elem,
                              unsigned int len)
{
    vq->inuse--;
    virtqueue_unmap_sg(vq, elem, len);
}

417
/* virtqueue_unpop:
418 419 420 421 422 423 424
 * @vq: The #VirtQueue
 * @elem: The #VirtQueueElement
 * @len: number of bytes written
 *
 * Pretend the most recent element wasn't popped from the virtqueue.  The next
 * call to virtqueue_pop() will refetch the element.
 */
425 426
void virtqueue_unpop(VirtQueue *vq, const VirtQueueElement *elem,
                     unsigned int len)
J
Jason Wang 已提交
427 428
{
    vq->last_avail_idx--;
429
    virtqueue_detach_element(vq, elem, len);
J
Jason Wang 已提交
430 431
}

S
Stefan Hajnoczi 已提交
432 433 434 435 436 437 438
/* virtqueue_rewind:
 * @vq: The #VirtQueue
 * @num: Number of elements to push back
 *
 * Pretend that elements weren't popped from the virtqueue.  The next
 * virtqueue_pop() will refetch the oldest element.
 *
439
 * Use virtqueue_unpop() instead if you have a VirtQueueElement.
S
Stefan Hajnoczi 已提交
440 441 442 443 444 445 446 447 448 449 450 451 452 453
 *
 * Returns: true on success, false if @num is greater than the number of in use
 * elements.
 */
bool virtqueue_rewind(VirtQueue *vq, unsigned int num)
{
    if (num > vq->inuse) {
        return false;
    }
    vq->last_avail_idx -= num;
    vq->inuse -= num;
    return true;
}

454
/* Called within rcu_read_lock().  */
455 456 457
void virtqueue_fill(VirtQueue *vq, const VirtQueueElement *elem,
                    unsigned int len, unsigned int idx)
{
458 459
    VRingUsedElem uelem;

460 461 462
    trace_virtqueue_fill(vq, elem, len, idx);

    virtqueue_unmap_sg(vq, elem, len);
463

464 465 466 467
    if (unlikely(vq->vdev->broken)) {
        return;
    }

J
Jason Wang 已提交
468 469 470 471
    if (unlikely(!vq->vring.used)) {
        return;
    }

472
    idx = (idx + vq->used_idx) % vq->vring.num;
A
aliguori 已提交
473

474 475 476
    uelem.id = elem->index;
    uelem.len = len;
    vring_used_write(vq, &uelem, idx);
A
aliguori 已提交
477 478
}

479
/* Called within rcu_read_lock().  */
A
aliguori 已提交
480 481
void virtqueue_flush(VirtQueue *vq, unsigned int count)
{
M
Michael S. Tsirkin 已提交
482
    uint16_t old, new;
483 484 485 486 487 488

    if (unlikely(vq->vdev->broken)) {
        vq->inuse -= count;
        return;
    }

J
Jason Wang 已提交
489 490 491 492
    if (unlikely(!vq->vring.used)) {
        return;
    }

A
aliguori 已提交
493
    /* Make sure buffer is written before we update index. */
494
    smp_wmb();
495
    trace_virtqueue_flush(vq, count);
496
    old = vq->used_idx;
M
Michael S. Tsirkin 已提交
497 498
    new = old + count;
    vring_used_idx_set(vq, new);
A
aliguori 已提交
499
    vq->inuse -= count;
M
Michael S. Tsirkin 已提交
500 501
    if (unlikely((int16_t)(new - vq->signalled_used) < (uint16_t)(new - old)))
        vq->signalled_used_valid = false;
A
aliguori 已提交
502 503 504 505 506
}

void virtqueue_push(VirtQueue *vq, const VirtQueueElement *elem,
                    unsigned int len)
{
507
    rcu_read_lock();
A
aliguori 已提交
508 509
    virtqueue_fill(vq, elem, len, 0);
    virtqueue_flush(vq, 1);
510
    rcu_read_unlock();
A
aliguori 已提交
511 512
}

513
/* Called within rcu_read_lock().  */
A
aliguori 已提交
514 515 516 517 518
static int virtqueue_num_heads(VirtQueue *vq, unsigned int idx)
{
    uint16_t num_heads = vring_avail_idx(vq) - idx;

    /* Check it isn't doing very strange things with descriptor numbers. */
A
aliguori 已提交
519
    if (num_heads > vq->vring.num) {
520
        virtio_error(vq->vdev, "Guest moved used index from %u to %u",
521
                     idx, vq->shadow_avail_idx);
522
        return -EINVAL;
A
aliguori 已提交
523
    }
524 525 526 527 528
    /* On success, callers read a descriptor at vq->last_avail_idx.
     * Make sure descriptor read does not bypass avail index read. */
    if (num_heads) {
        smp_rmb();
    }
A
aliguori 已提交
529 530 531 532

    return num_heads;
}

533
/* Called within rcu_read_lock().  */
534 535
static bool virtqueue_get_head(VirtQueue *vq, unsigned int idx,
                               unsigned int *head)
A
aliguori 已提交
536 537 538
{
    /* Grab the next descriptor number they're advertising, and increment
     * the index we've seen. */
539
    *head = vring_avail_ring(vq, idx % vq->vring.num);
A
aliguori 已提交
540 541

    /* If their number is silly, that's a fatal mistake. */
542 543 544
    if (*head >= vq->vring.num) {
        virtio_error(vq->vdev, "Guest says index %u is available", *head);
        return false;
A
aliguori 已提交
545
    }
A
aliguori 已提交
546

547
    return true;
A
aliguori 已提交
548 549
}

550 551 552 553 554
enum {
    VIRTQUEUE_READ_DESC_ERROR = -1,
    VIRTQUEUE_READ_DESC_DONE = 0,   /* end of chain */
    VIRTQUEUE_READ_DESC_MORE = 1,   /* more buffers in chain */
};
A
aliguori 已提交
555

556
static int virtqueue_read_next_desc(VirtIODevice *vdev, VRingDesc *desc,
557
                                    MemoryRegionCache *desc_cache, unsigned int max,
558 559
                                    unsigned int *next)
{
A
aliguori 已提交
560
    /* If this descriptor says it doesn't chain, we're done. */
561
    if (!(desc->flags & VRING_DESC_F_NEXT)) {
562
        return VIRTQUEUE_READ_DESC_DONE;
563
    }
A
aliguori 已提交
564 565

    /* Check they're not leading us off end of descriptors. */
566
    *next = desc->next;
A
aliguori 已提交
567
    /* Make sure compiler knows to grab that: we don't want it changing! */
568
    smp_wmb();
A
aliguori 已提交
569

570 571 572
    if (*next >= max) {
        virtio_error(vdev, "Desc next is %u", *next);
        return VIRTQUEUE_READ_DESC_ERROR;
A
aliguori 已提交
573
    }
A
aliguori 已提交
574

575
    vring_desc_read(vdev, desc, desc_cache, *next);
576
    return VIRTQUEUE_READ_DESC_MORE;
A
aliguori 已提交
577 578
}

579
void virtqueue_get_avail_bytes(VirtQueue *vq, unsigned int *in_bytes,
580 581
                               unsigned int *out_bytes,
                               unsigned max_in_bytes, unsigned max_out_bytes)
A
aliguori 已提交
582
{
583 584
    VirtIODevice *vdev = vq->vdev;
    unsigned int max, idx;
585
    unsigned int total_bufs, in_total, out_total;
586
    VRingMemoryRegionCaches *caches;
587 588
    MemoryRegionCache indirect_desc_cache = MEMORY_REGION_CACHE_INVALID;
    int64_t len = 0;
589
    int rc;
A
aliguori 已提交
590

J
Jason Wang 已提交
591 592 593 594 595 596 597 598 599 600
    if (unlikely(!vq->vring.desc)) {
        if (in_bytes) {
            *in_bytes = 0;
        }
        if (out_bytes) {
            *out_bytes = 0;
        }
        return;
    }

601
    rcu_read_lock();
A
aliguori 已提交
602
    idx = vq->last_avail_idx;
603
    total_bufs = in_total = out_total = 0;
604 605

    max = vq->vring.num;
606
    caches = vring_get_region_caches(vq);
607
    if (caches->desc.len < max * sizeof(VRingDesc)) {
608 609 610 611
        virtio_error(vdev, "Cannot map descriptor ring");
        goto err;
    }

612
    while ((rc = virtqueue_num_heads(vq, idx)) > 0) {
613
        MemoryRegionCache *desc_cache = &caches->desc;
614
        unsigned int num_bufs;
615
        VRingDesc desc;
616
        unsigned int i;
A
aliguori 已提交
617

618
        num_bufs = total_bufs;
619 620 621 622 623

        if (!virtqueue_get_head(vq, idx++, &i)) {
            goto err;
        }

624
        vring_desc_read(vdev, &desc, desc_cache, i);
625

626 627
        if (desc.flags & VRING_DESC_F_INDIRECT) {
            if (desc.len % sizeof(VRingDesc)) {
628 629
                virtio_error(vdev, "Invalid size for indirect buffer table");
                goto err;
630 631 632 633
            }

            /* If we've got too many, that implies a descriptor loop. */
            if (num_bufs >= max) {
634 635
                virtio_error(vdev, "Looped descriptor");
                goto err;
636 637 638
            }

            /* loop over the indirect descriptor table */
639 640 641 642
            len = address_space_cache_init(&indirect_desc_cache,
                                           vdev->dma_as,
                                           desc.addr, desc.len, false);
            desc_cache = &indirect_desc_cache;
643 644 645 646 647
            if (len < desc.len) {
                virtio_error(vdev, "Cannot map indirect buffer");
                goto err;
            }

648
            max = desc.len / sizeof(VRingDesc);
649
            num_bufs = i = 0;
650
            vring_desc_read(vdev, &desc, desc_cache, i);
651 652
        }

A
aliguori 已提交
653 654
        do {
            /* If we've got too many, that implies a descriptor loop. */
655
            if (++num_bufs > max) {
656 657
                virtio_error(vdev, "Looped descriptor");
                goto err;
A
aliguori 已提交
658
            }
A
aliguori 已提交
659

660 661
            if (desc.flags & VRING_DESC_F_WRITE) {
                in_total += desc.len;
A
aliguori 已提交
662
            } else {
663
                out_total += desc.len;
A
aliguori 已提交
664
            }
665 666 667
            if (in_total >= max_in_bytes && out_total >= max_out_bytes) {
                goto done;
            }
668

669
            rc = virtqueue_read_next_desc(vdev, &desc, desc_cache, max, &i);
670 671 672 673 674
        } while (rc == VIRTQUEUE_READ_DESC_MORE);

        if (rc == VIRTQUEUE_READ_DESC_ERROR) {
            goto err;
        }
675

676 677
        if (desc_cache == &indirect_desc_cache) {
            address_space_cache_destroy(&indirect_desc_cache);
678
            total_bufs++;
679 680 681
        } else {
            total_bufs = num_bufs;
        }
A
aliguori 已提交
682
    }
683 684 685 686 687

    if (rc < 0) {
        goto err;
    }

688
done:
689
    address_space_cache_destroy(&indirect_desc_cache);
690 691 692 693 694 695
    if (in_bytes) {
        *in_bytes = in_total;
    }
    if (out_bytes) {
        *out_bytes = out_total;
    }
696
    rcu_read_unlock();
697 698 699 700 701
    return;

err:
    in_total = out_total = 0;
    goto done;
702
}
A
aliguori 已提交
703

704 705 706 707 708
int virtqueue_avail_bytes(VirtQueue *vq, unsigned int in_bytes,
                          unsigned int out_bytes)
{
    unsigned int in_total, out_total;

709 710
    virtqueue_get_avail_bytes(vq, &in_total, &out_total, in_bytes, out_bytes);
    return in_bytes <= in_total && out_bytes <= out_total;
A
aliguori 已提交
711 712
}

713 714
static bool virtqueue_map_desc(VirtIODevice *vdev, unsigned int *p_num_sg,
                               hwaddr *addr, struct iovec *iov,
715 716 717
                               unsigned int max_num_sg, bool is_write,
                               hwaddr pa, size_t sz)
{
718
    bool ok = false;
719 720 721
    unsigned num_sg = *p_num_sg;
    assert(num_sg <= max_num_sg);

722
    if (!sz) {
723 724
        virtio_error(vdev, "virtio: zero sized buffers are not allowed");
        goto out;
725 726
    }

727 728 729 730
    while (sz) {
        hwaddr len = sz;

        if (num_sg == max_num_sg) {
731 732 733
            virtio_error(vdev, "virtio: too many write descriptors in "
                               "indirect table");
            goto out;
734 735
        }

J
Jason Wang 已提交
736 737 738 739
        iov[num_sg].iov_base = dma_memory_map(vdev->dma_as, pa, &len,
                                              is_write ?
                                              DMA_DIRECTION_FROM_DEVICE :
                                              DMA_DIRECTION_TO_DEVICE);
740
        if (!iov[num_sg].iov_base) {
741 742
            virtio_error(vdev, "virtio: bogus descriptor or out of resources");
            goto out;
743 744
        }

745 746 747 748 749 750 751
        iov[num_sg].iov_len = len;
        addr[num_sg] = pa;

        sz -= len;
        pa += len;
        num_sg++;
    }
752 753 754
    ok = true;

out:
755
    *p_num_sg = num_sg;
756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773
    return ok;
}

/* Only used by error code paths before we have a VirtQueueElement (therefore
 * virtqueue_unmap_sg() can't be used).  Assumes buffers weren't written to
 * yet.
 */
static void virtqueue_undo_map_desc(unsigned int out_num, unsigned int in_num,
                                    struct iovec *iov)
{
    unsigned int i;

    for (i = 0; i < out_num + in_num; i++) {
        int is_write = i >= out_num;

        cpu_physical_memory_unmap(iov->iov_base, iov->iov_len, is_write, 0);
        iov++;
    }
774 775
}

J
Jason Wang 已提交
776 777
static void virtqueue_map_iovec(VirtIODevice *vdev, struct iovec *sg,
                                hwaddr *addr, unsigned int *num_sg,
778
                                int is_write)
K
Kevin Wolf 已提交
779 780
{
    unsigned int i;
A
Avi Kivity 已提交
781
    hwaddr len;
K
Kevin Wolf 已提交
782

M
Michael S. Tsirkin 已提交
783
    for (i = 0; i < *num_sg; i++) {
K
Kevin Wolf 已提交
784
        len = sg[i].iov_len;
J
Jason Wang 已提交
785 786 787 788
        sg[i].iov_base = dma_memory_map(vdev->dma_as,
                                        addr[i], &len, is_write ?
                                        DMA_DIRECTION_FROM_DEVICE :
                                        DMA_DIRECTION_TO_DEVICE);
M
Michael S. Tsirkin 已提交
789
        if (!sg[i].iov_base) {
M
Michael Tokarev 已提交
790
            error_report("virtio: error trying to map MMIO memory");
K
Kevin Wolf 已提交
791 792
            exit(1);
        }
793 794
        if (len != sg[i].iov_len) {
            error_report("virtio: unexpected memory split");
M
Michael S. Tsirkin 已提交
795 796
            exit(1);
        }
K
Kevin Wolf 已提交
797 798 799
    }
}

J
Jason Wang 已提交
800
void virtqueue_map(VirtIODevice *vdev, VirtQueueElement *elem)
M
Michael S. Tsirkin 已提交
801
{
802 803
    virtqueue_map_iovec(vdev, elem->in_sg, elem->in_addr, &elem->in_num, 1);
    virtqueue_map_iovec(vdev, elem->out_sg, elem->out_addr, &elem->out_num, 0);
804 805
}

806
static void *virtqueue_alloc_element(size_t sz, unsigned out_num, unsigned in_num)
807 808 809 810 811 812 813 814 815 816 817
{
    VirtQueueElement *elem;
    size_t in_addr_ofs = QEMU_ALIGN_UP(sz, __alignof__(elem->in_addr[0]));
    size_t out_addr_ofs = in_addr_ofs + in_num * sizeof(elem->in_addr[0]);
    size_t out_addr_end = out_addr_ofs + out_num * sizeof(elem->out_addr[0]);
    size_t in_sg_ofs = QEMU_ALIGN_UP(out_addr_end, __alignof__(elem->in_sg[0]));
    size_t out_sg_ofs = in_sg_ofs + in_num * sizeof(elem->in_sg[0]);
    size_t out_sg_end = out_sg_ofs + out_num * sizeof(elem->out_sg[0]);

    assert(sz >= sizeof(VirtQueueElement));
    elem = g_malloc(out_sg_end);
818
    trace_virtqueue_alloc_element(elem, sz, in_num, out_num);
819 820 821 822 823 824 825
    elem->out_num = out_num;
    elem->in_num = in_num;
    elem->in_addr = (void *)elem + in_addr_ofs;
    elem->out_addr = (void *)elem + out_addr_ofs;
    elem->in_sg = (void *)elem + in_sg_ofs;
    elem->out_sg = (void *)elem + out_sg_ofs;
    return elem;
M
Michael S. Tsirkin 已提交
826 827
}

828
void *virtqueue_pop(VirtQueue *vq, size_t sz)
A
aliguori 已提交
829
{
830
    unsigned int i, head, max;
831
    VRingMemoryRegionCaches *caches;
832 833 834
    MemoryRegionCache indirect_desc_cache = MEMORY_REGION_CACHE_INVALID;
    MemoryRegionCache *desc_cache;
    int64_t len;
835
    VirtIODevice *vdev = vq->vdev;
836
    VirtQueueElement *elem = NULL;
837
    unsigned out_num, in_num, elem_entries;
838 839
    hwaddr addr[VIRTQUEUE_MAX_SIZE];
    struct iovec iov[VIRTQUEUE_MAX_SIZE];
840
    VRingDesc desc;
841
    int rc;
A
aliguori 已提交
842

843 844 845
    if (unlikely(vdev->broken)) {
        return NULL;
    }
846 847 848
    rcu_read_lock();
    if (virtio_queue_empty_rcu(vq)) {
        goto done;
849
    }
850 851 852
    /* Needed after virtio_queue_empty(), see comment in
     * virtqueue_num_heads(). */
    smp_rmb();
A
aliguori 已提交
853 854

    /* When we start there are none of either input nor output. */
855
    out_num = in_num = elem_entries = 0;
A
aliguori 已提交
856

857 858
    max = vq->vring.num;

859
    if (vq->inuse >= vq->vring.num) {
860
        virtio_error(vdev, "Virtqueue size exceeded");
861
        goto done;
862 863
    }

864
    if (!virtqueue_get_head(vq, vq->last_avail_idx++, &head)) {
865
        goto done;
866 867
    }

868
    if (virtio_vdev_has_feature(vdev, VIRTIO_RING_F_EVENT_IDX)) {
869
        vring_set_avail_event(vq, vq->last_avail_idx);
M
Michael S. Tsirkin 已提交
870
    }
871

872
    i = head;
873

874
    caches = vring_get_region_caches(vq);
875
    if (caches->desc.len < max * sizeof(VRingDesc)) {
876 877 878 879
        virtio_error(vdev, "Cannot map descriptor ring");
        goto done;
    }

880
    desc_cache = &caches->desc;
881
    vring_desc_read(vdev, &desc, desc_cache, i);
882 883
    if (desc.flags & VRING_DESC_F_INDIRECT) {
        if (desc.len % sizeof(VRingDesc)) {
884
            virtio_error(vdev, "Invalid size for indirect buffer table");
885
            goto done;
886 887 888
        }

        /* loop over the indirect descriptor table */
889 890 891
        len = address_space_cache_init(&indirect_desc_cache, vdev->dma_as,
                                       desc.addr, desc.len, false);
        desc_cache = &indirect_desc_cache;
892 893 894 895 896
        if (len < desc.len) {
            virtio_error(vdev, "Cannot map indirect buffer");
            goto done;
        }

897
        max = desc.len / sizeof(VRingDesc);
898
        i = 0;
899
        vring_desc_read(vdev, &desc, desc_cache, i);
900 901
    }

K
Kevin Wolf 已提交
902
    /* Collect all the descriptors */
A
aliguori 已提交
903
    do {
904 905
        bool map_ok;

906
        if (desc.flags & VRING_DESC_F_WRITE) {
907 908 909 910
            map_ok = virtqueue_map_desc(vdev, &in_num, addr + out_num,
                                        iov + out_num,
                                        VIRTQUEUE_MAX_SIZE - out_num, true,
                                        desc.addr, desc.len);
K
Kevin Wolf 已提交
911
        } else {
912
            if (in_num) {
913 914
                virtio_error(vdev, "Incorrect order for descriptors");
                goto err_undo_map;
915
            }
916 917 918 919 920 921
            map_ok = virtqueue_map_desc(vdev, &out_num, addr, iov,
                                        VIRTQUEUE_MAX_SIZE, false,
                                        desc.addr, desc.len);
        }
        if (!map_ok) {
            goto err_undo_map;
K
Kevin Wolf 已提交
922
        }
A
aliguori 已提交
923 924

        /* If we've got too many, that implies a descriptor loop. */
925
        if (++elem_entries > max) {
926 927
            virtio_error(vdev, "Looped descriptor");
            goto err_undo_map;
A
aliguori 已提交
928
        }
929

930
        rc = virtqueue_read_next_desc(vdev, &desc, desc_cache, max, &i);
931 932 933 934 935
    } while (rc == VIRTQUEUE_READ_DESC_MORE);

    if (rc == VIRTQUEUE_READ_DESC_ERROR) {
        goto err_undo_map;
    }
A
aliguori 已提交
936

937 938
    /* Now copy what we have collected and mapped */
    elem = virtqueue_alloc_element(sz, out_num, in_num);
A
aliguori 已提交
939
    elem->index = head;
940 941 942 943 944 945 946 947
    for (i = 0; i < out_num; i++) {
        elem->out_addr[i] = addr[i];
        elem->out_sg[i] = iov[i];
    }
    for (i = 0; i < in_num; i++) {
        elem->in_addr[i] = addr[out_num + i];
        elem->in_sg[i] = iov[out_num + i];
    }
A
aliguori 已提交
948 949 950

    vq->inuse++;

951
    trace_virtqueue_pop(vq, elem, elem->in_num, elem->out_num);
952
done:
953
    address_space_cache_destroy(&indirect_desc_cache);
954
    rcu_read_unlock();
955

956
    return elem;
957 958 959

err_undo_map:
    virtqueue_undo_map_desc(out_num, in_num, iov);
960
    goto done;
A
aliguori 已提交
961 962
}

963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000
/* virtqueue_drop_all:
 * @vq: The #VirtQueue
 * Drops all queued buffers and indicates them to the guest
 * as if they are done. Useful when buffers can not be
 * processed but must be returned to the guest.
 */
unsigned int virtqueue_drop_all(VirtQueue *vq)
{
    unsigned int dropped = 0;
    VirtQueueElement elem = {};
    VirtIODevice *vdev = vq->vdev;
    bool fEventIdx = virtio_vdev_has_feature(vdev, VIRTIO_RING_F_EVENT_IDX);

    if (unlikely(vdev->broken)) {
        return 0;
    }

    while (!virtio_queue_empty(vq) && vq->inuse < vq->vring.num) {
        /* works similar to virtqueue_pop but does not map buffers
        * and does not allocate any memory */
        smp_rmb();
        if (!virtqueue_get_head(vq, vq->last_avail_idx, &elem.index)) {
            break;
        }
        vq->inuse++;
        vq->last_avail_idx++;
        if (fEventIdx) {
            vring_set_avail_event(vq, vq->last_avail_idx);
        }
        /* immediately push the element, nothing to unmap
         * as both in_num and out_num are set to 0 */
        virtqueue_push(vq, &elem, 0);
        dropped++;
    }

    return dropped;
}

1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017
/* Reading and writing a structure directly to QEMUFile is *awful*, but
 * it is what QEMU has always done by mistake.  We can change it sooner
 * or later by bumping the version number of the affected vm states.
 * In the meanwhile, since the in-memory layout of VirtQueueElement
 * has changed, we need to marshal to and from the layout that was
 * used before the change.
 */
typedef struct VirtQueueElementOld {
    unsigned int index;
    unsigned int out_num;
    unsigned int in_num;
    hwaddr in_addr[VIRTQUEUE_MAX_SIZE];
    hwaddr out_addr[VIRTQUEUE_MAX_SIZE];
    struct iovec in_sg[VIRTQUEUE_MAX_SIZE];
    struct iovec out_sg[VIRTQUEUE_MAX_SIZE];
} VirtQueueElementOld;

J
Jason Wang 已提交
1018
void *qemu_get_virtqueue_element(VirtIODevice *vdev, QEMUFile *f, size_t sz)
1019
{
1020 1021 1022 1023 1024 1025
    VirtQueueElement *elem;
    VirtQueueElementOld data;
    int i;

    qemu_get_buffer(f, (uint8_t *)&data, sizeof(VirtQueueElementOld));

1026 1027
    /* TODO: teach all callers that this can fail, and return failure instead
     * of asserting here.
1028 1029
     * This is just one thing (there are probably more) that must be
     * fixed before we can allow NDEBUG compilation.
1030 1031 1032 1033
     */
    assert(ARRAY_SIZE(data.in_addr) >= data.in_num);
    assert(ARRAY_SIZE(data.out_addr) >= data.out_num);

1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056
    elem = virtqueue_alloc_element(sz, data.out_num, data.in_num);
    elem->index = data.index;

    for (i = 0; i < elem->in_num; i++) {
        elem->in_addr[i] = data.in_addr[i];
    }

    for (i = 0; i < elem->out_num; i++) {
        elem->out_addr[i] = data.out_addr[i];
    }

    for (i = 0; i < elem->in_num; i++) {
        /* Base is overwritten by virtqueue_map.  */
        elem->in_sg[i].iov_base = 0;
        elem->in_sg[i].iov_len = data.in_sg[i].iov_len;
    }

    for (i = 0; i < elem->out_num; i++) {
        /* Base is overwritten by virtqueue_map.  */
        elem->out_sg[i].iov_base = 0;
        elem->out_sg[i].iov_len = data.out_sg[i].iov_len;
    }

J
Jason Wang 已提交
1057
    virtqueue_map(vdev, elem);
1058 1059 1060 1061 1062
    return elem;
}

void qemu_put_virtqueue_element(QEMUFile *f, VirtQueueElement *elem)
{
1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089
    VirtQueueElementOld data;
    int i;

    memset(&data, 0, sizeof(data));
    data.index = elem->index;
    data.in_num = elem->in_num;
    data.out_num = elem->out_num;

    for (i = 0; i < elem->in_num; i++) {
        data.in_addr[i] = elem->in_addr[i];
    }

    for (i = 0; i < elem->out_num; i++) {
        data.out_addr[i] = elem->out_addr[i];
    }

    for (i = 0; i < elem->in_num; i++) {
        /* Base is overwritten by virtqueue_map when loading.  Do not
         * save it, as it would leak the QEMU address space layout.  */
        data.in_sg[i].iov_len = elem->in_sg[i].iov_len;
    }

    for (i = 0; i < elem->out_num; i++) {
        /* Do not save iov_base as above.  */
        data.out_sg[i].iov_len = elem->out_sg[i].iov_len;
    }
    qemu_put_buffer(f, (uint8_t *)&data, sizeof(VirtQueueElementOld));
1090 1091
}

A
aliguori 已提交
1092
/* virtio device */
1093 1094
static void virtio_notify_vector(VirtIODevice *vdev, uint16_t vector)
{
K
KONRAD Frederic 已提交
1095 1096 1097
    BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
    VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);

1098 1099 1100 1101
    if (unlikely(vdev->broken)) {
        return;
    }

K
KONRAD Frederic 已提交
1102 1103
    if (k->notify) {
        k->notify(qbus->parent, vector);
1104 1105
    }
}
A
aliguori 已提交
1106

P
Paul Brook 已提交
1107
void virtio_update_irq(VirtIODevice *vdev)
A
aliguori 已提交
1108
{
1109
    virtio_notify_vector(vdev, VIRTIO_NO_VECTOR);
A
aliguori 已提交
1110 1111
}

1112 1113 1114 1115
static int virtio_validate_features(VirtIODevice *vdev)
{
    VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);

J
Jason Wang 已提交
1116 1117 1118 1119 1120
    if (virtio_host_has_feature(vdev, VIRTIO_F_IOMMU_PLATFORM) &&
        !virtio_vdev_has_feature(vdev, VIRTIO_F_IOMMU_PLATFORM)) {
        return -EFAULT;
    }

1121 1122 1123 1124 1125 1126 1127 1128
    if (k->validate_features) {
        return k->validate_features(vdev);
    } else {
        return 0;
    }
}

int virtio_set_status(VirtIODevice *vdev, uint8_t val)
1129
{
1130
    VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
1131 1132
    trace_virtio_set_status(vdev, val);

1133
    if (virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1)) {
1134 1135 1136 1137 1138 1139 1140 1141 1142
        if (!(vdev->status & VIRTIO_CONFIG_S_FEATURES_OK) &&
            val & VIRTIO_CONFIG_S_FEATURES_OK) {
            int ret = virtio_validate_features(vdev);

            if (ret) {
                return ret;
            }
        }
    }
1143 1144
    if (k->set_status) {
        k->set_status(vdev, val);
1145 1146
    }
    vdev->status = val;
1147
    return 0;
1148 1149
}

1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170
bool target_words_bigendian(void);
static enum virtio_device_endian virtio_default_endian(void)
{
    if (target_words_bigendian()) {
        return VIRTIO_DEVICE_ENDIAN_BIG;
    } else {
        return VIRTIO_DEVICE_ENDIAN_LITTLE;
    }
}

static enum virtio_device_endian virtio_current_cpu_endian(void)
{
    CPUClass *cc = CPU_GET_CLASS(current_cpu);

    if (cc->virtio_is_big_endian(current_cpu)) {
        return VIRTIO_DEVICE_ENDIAN_BIG;
    } else {
        return VIRTIO_DEVICE_ENDIAN_LITTLE;
    }
}

1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181
static void virtio_virtqueue_reset_region_cache(struct VirtQueue *vq)
{
    VRingMemoryRegionCaches *caches;

    caches = atomic_read(&vq->vring.caches);
    atomic_rcu_set(&vq->vring.caches, NULL);
    if (caches) {
        call_rcu(caches, virtio_free_region_cache, rcu);
    }
}

P
Paul Brook 已提交
1182
void virtio_reset(void *opaque)
A
aliguori 已提交
1183 1184
{
    VirtIODevice *vdev = opaque;
1185
    VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
A
aliguori 已提交
1186 1187
    int i;

1188
    virtio_set_status(vdev, 0);
1189 1190 1191 1192 1193 1194 1195
    if (current_cpu) {
        /* Guest initiated reset */
        vdev->device_endian = virtio_current_cpu_endian();
    } else {
        /* System reset */
        vdev->device_endian = virtio_default_endian();
    }
1196

1197 1198 1199
    if (k->reset) {
        k->reset(vdev);
    }
A
aliguori 已提交
1200

1201
    vdev->broken = false;
1202
    vdev->guest_features = 0;
A
aliguori 已提交
1203 1204
    vdev->queue_sel = 0;
    vdev->status = 0;
P
Paolo Bonzini 已提交
1205
    atomic_set(&vdev->isr, 0);
1206 1207
    vdev->config_vector = VIRTIO_NO_VECTOR;
    virtio_notify_vector(vdev, vdev->config_vector);
A
aliguori 已提交
1208

1209
    for(i = 0; i < VIRTIO_QUEUE_MAX; i++) {
A
aliguori 已提交
1210 1211 1212 1213
        vdev->vq[i].vring.desc = 0;
        vdev->vq[i].vring.avail = 0;
        vdev->vq[i].vring.used = 0;
        vdev->vq[i].last_avail_idx = 0;
1214
        vdev->vq[i].shadow_avail_idx = 0;
1215
        vdev->vq[i].used_idx = 0;
1216
        virtio_queue_set_vector(vdev, i, VIRTIO_NO_VECTOR);
M
Michael S. Tsirkin 已提交
1217 1218
        vdev->vq[i].signalled_used = 0;
        vdev->vq[i].signalled_used_valid = false;
1219
        vdev->vq[i].notification = true;
C
Cornelia Huck 已提交
1220
        vdev->vq[i].vring.num = vdev->vq[i].vring.num_default;
1221
        vdev->vq[i].inuse = 0;
1222
        virtio_virtqueue_reset_region_cache(&vdev->vq[i]);
A
aliguori 已提交
1223 1224 1225
    }
}

P
Paul Brook 已提交
1226
uint32_t virtio_config_readb(VirtIODevice *vdev, uint32_t addr)
A
aliguori 已提交
1227
{
1228
    VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
A
aliguori 已提交
1229 1230
    uint8_t val;

1231
    if (addr + sizeof(val) > vdev->config_len) {
A
aliguori 已提交
1232
        return (uint32_t)-1;
1233 1234 1235
    }

    k->get_config(vdev, vdev->config);
A
aliguori 已提交
1236

1237
    val = ldub_p(vdev->config + addr);
A
aliguori 已提交
1238 1239 1240
    return val;
}

P
Paul Brook 已提交
1241
uint32_t virtio_config_readw(VirtIODevice *vdev, uint32_t addr)
A
aliguori 已提交
1242
{
1243
    VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
A
aliguori 已提交
1244 1245
    uint16_t val;

1246
    if (addr + sizeof(val) > vdev->config_len) {
A
aliguori 已提交
1247
        return (uint32_t)-1;
1248 1249 1250
    }

    k->get_config(vdev, vdev->config);
A
aliguori 已提交
1251

1252
    val = lduw_p(vdev->config + addr);
A
aliguori 已提交
1253 1254 1255
    return val;
}

P
Paul Brook 已提交
1256
uint32_t virtio_config_readl(VirtIODevice *vdev, uint32_t addr)
A
aliguori 已提交
1257
{
1258
    VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
A
aliguori 已提交
1259 1260
    uint32_t val;

1261
    if (addr + sizeof(val) > vdev->config_len) {
A
aliguori 已提交
1262
        return (uint32_t)-1;
1263 1264 1265
    }

    k->get_config(vdev, vdev->config);
A
aliguori 已提交
1266

1267
    val = ldl_p(vdev->config + addr);
A
aliguori 已提交
1268 1269 1270
    return val;
}

P
Paul Brook 已提交
1271
void virtio_config_writeb(VirtIODevice *vdev, uint32_t addr, uint32_t data)
A
aliguori 已提交
1272
{
1273
    VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
A
aliguori 已提交
1274 1275
    uint8_t val = data;

1276
    if (addr + sizeof(val) > vdev->config_len) {
A
aliguori 已提交
1277
        return;
1278
    }
A
aliguori 已提交
1279

1280
    stb_p(vdev->config + addr, val);
A
aliguori 已提交
1281

1282 1283 1284
    if (k->set_config) {
        k->set_config(vdev, vdev->config);
    }
A
aliguori 已提交
1285 1286
}

P
Paul Brook 已提交
1287
void virtio_config_writew(VirtIODevice *vdev, uint32_t addr, uint32_t data)
A
aliguori 已提交
1288
{
1289
    VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
A
aliguori 已提交
1290 1291
    uint16_t val = data;

1292
    if (addr + sizeof(val) > vdev->config_len) {
A
aliguori 已提交
1293
        return;
1294
    }
A
aliguori 已提交
1295

1296
    stw_p(vdev->config + addr, val);
A
aliguori 已提交
1297

1298 1299 1300
    if (k->set_config) {
        k->set_config(vdev, vdev->config);
    }
A
aliguori 已提交
1301 1302
}

P
Paul Brook 已提交
1303
void virtio_config_writel(VirtIODevice *vdev, uint32_t addr, uint32_t data)
A
aliguori 已提交
1304
{
1305
    VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
A
aliguori 已提交
1306 1307
    uint32_t val = data;

1308
    if (addr + sizeof(val) > vdev->config_len) {
A
aliguori 已提交
1309
        return;
1310
    }
A
aliguori 已提交
1311

1312
    stl_p(vdev->config + addr, val);
A
aliguori 已提交
1313

1314 1315 1316
    if (k->set_config) {
        k->set_config(vdev, vdev->config);
    }
A
aliguori 已提交
1317 1318
}

1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414
uint32_t virtio_config_modern_readb(VirtIODevice *vdev, uint32_t addr)
{
    VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
    uint8_t val;

    if (addr + sizeof(val) > vdev->config_len) {
        return (uint32_t)-1;
    }

    k->get_config(vdev, vdev->config);

    val = ldub_p(vdev->config + addr);
    return val;
}

uint32_t virtio_config_modern_readw(VirtIODevice *vdev, uint32_t addr)
{
    VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
    uint16_t val;

    if (addr + sizeof(val) > vdev->config_len) {
        return (uint32_t)-1;
    }

    k->get_config(vdev, vdev->config);

    val = lduw_le_p(vdev->config + addr);
    return val;
}

uint32_t virtio_config_modern_readl(VirtIODevice *vdev, uint32_t addr)
{
    VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
    uint32_t val;

    if (addr + sizeof(val) > vdev->config_len) {
        return (uint32_t)-1;
    }

    k->get_config(vdev, vdev->config);

    val = ldl_le_p(vdev->config + addr);
    return val;
}

void virtio_config_modern_writeb(VirtIODevice *vdev,
                                 uint32_t addr, uint32_t data)
{
    VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
    uint8_t val = data;

    if (addr + sizeof(val) > vdev->config_len) {
        return;
    }

    stb_p(vdev->config + addr, val);

    if (k->set_config) {
        k->set_config(vdev, vdev->config);
    }
}

void virtio_config_modern_writew(VirtIODevice *vdev,
                                 uint32_t addr, uint32_t data)
{
    VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
    uint16_t val = data;

    if (addr + sizeof(val) > vdev->config_len) {
        return;
    }

    stw_le_p(vdev->config + addr, val);

    if (k->set_config) {
        k->set_config(vdev, vdev->config);
    }
}

void virtio_config_modern_writel(VirtIODevice *vdev,
                                 uint32_t addr, uint32_t data)
{
    VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
    uint32_t val = data;

    if (addr + sizeof(val) > vdev->config_len) {
        return;
    }

    stl_le_p(vdev->config + addr, val);

    if (k->set_config) {
        k->set_config(vdev, vdev->config);
    }
}

A
Avi Kivity 已提交
1415
void virtio_queue_set_addr(VirtIODevice *vdev, int n, hwaddr addr)
A
aliguori 已提交
1416
{
1417 1418 1419
    if (!vdev->vq[n].vring.num) {
        return;
    }
1420 1421
    vdev->vq[n].vring.desc = addr;
    virtio_queue_update_rings(vdev, n);
P
Paul Brook 已提交
1422 1423
}

A
Avi Kivity 已提交
1424
hwaddr virtio_queue_get_addr(VirtIODevice *vdev, int n)
P
Paul Brook 已提交
1425
{
1426 1427 1428 1429 1430 1431
    return vdev->vq[n].vring.desc;
}

void virtio_queue_set_rings(VirtIODevice *vdev, int n, hwaddr desc,
                            hwaddr avail, hwaddr used)
{
1432 1433 1434
    if (!vdev->vq[n].vring.num) {
        return;
    }
1435 1436 1437
    vdev->vq[n].vring.desc = desc;
    vdev->vq[n].vring.avail = avail;
    vdev->vq[n].vring.used = used;
1438
    virtio_init_region_cache(vdev, n);
P
Paul Brook 已提交
1439 1440
}

1441 1442
void virtio_queue_set_num(VirtIODevice *vdev, int n, int num)
{
1443 1444 1445 1446 1447 1448 1449
    /* Don't allow guest to flip queue between existent and
     * nonexistent states, or to set it to an invalid size.
     */
    if (!!num != !!vdev->vq[n].vring.num ||
        num > VIRTQUEUE_MAX_SIZE ||
        num < 0) {
        return;
1450
    }
1451
    vdev->vq[n].vring.num = num;
1452 1453
}

1454 1455 1456 1457 1458 1459 1460 1461 1462 1463
VirtQueue *virtio_vector_first_queue(VirtIODevice *vdev, uint16_t vector)
{
    return QLIST_FIRST(&vdev->vector_queues[vector]);
}

VirtQueue *virtio_vector_next_queue(VirtQueue *vq)
{
    return QLIST_NEXT(vq, node);
}

P
Paul Brook 已提交
1464 1465 1466 1467
int virtio_queue_get_num(VirtIODevice *vdev, int n)
{
    return vdev->vq[n].vring.num;
}
A
aliguori 已提交
1468

M
Michael S. Tsirkin 已提交
1469 1470 1471 1472 1473
int virtio_queue_get_max_num(VirtIODevice *vdev, int n)
{
    return vdev->vq[n].vring.num_default;
}

1474 1475 1476 1477
int virtio_get_num_queues(VirtIODevice *vdev)
{
    int i;

1478
    for (i = 0; i < VIRTIO_QUEUE_MAX; i++) {
1479 1480 1481 1482 1483 1484 1485 1486
        if (!virtio_queue_get_num(vdev, i)) {
            break;
        }
    }

    return i;
}

1487 1488 1489 1490 1491
void virtio_queue_set_align(VirtIODevice *vdev, int n, int align)
{
    BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
    VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);

1492
    /* virtio-1 compliant devices cannot change the alignment */
1493
    if (virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1)) {
1494 1495 1496
        error_report("tried to modify queue alignment for virtio-1 device");
        return;
    }
1497 1498 1499 1500 1501 1502
    /* Check that the transport told us it was going to do this
     * (so a buggy transport will immediately assert rather than
     * silently failing to migrate this state)
     */
    assert(k->has_variable_vring_alignment);

1503 1504 1505 1506
    if (align) {
        vdev->vq[n].vring.align = align;
        virtio_queue_update_rings(vdev, n);
    }
1507 1508
}

1509
static bool virtio_queue_notify_aio_vq(VirtQueue *vq)
M
Michael S. Tsirkin 已提交
1510 1511 1512 1513 1514
{
    if (vq->vring.desc && vq->handle_aio_output) {
        VirtIODevice *vdev = vq->vdev;

        trace_virtio_queue_notify(vdev, vq - vdev->vq, vq);
1515
        return vq->handle_aio_output(vdev, vq);
M
Michael S. Tsirkin 已提交
1516
    }
1517 1518

    return false;
M
Michael S. Tsirkin 已提交
1519 1520
}

1521
static void virtio_queue_notify_vq(VirtQueue *vq)
1522
{
1523
    if (vq->vring.desc && vq->handle_output) {
1524
        VirtIODevice *vdev = vq->vdev;
1525

1526 1527 1528 1529
        if (unlikely(vdev->broken)) {
            return;
        }

1530 1531 1532 1533 1534
        trace_virtio_queue_notify(vdev, vq - vdev->vq, vq);
        vq->handle_output(vdev, vq);
    }
}

P
Paul Brook 已提交
1535 1536
void virtio_queue_notify(VirtIODevice *vdev, int n)
{
1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548
    VirtQueue *vq = &vdev->vq[n];

    if (unlikely(!vq->vring.desc || vdev->broken)) {
        return;
    }

    trace_virtio_queue_notify(vdev, vq - vdev->vq, vq);
    if (vq->handle_aio_output) {
        event_notifier_set(&vq->host_notifier);
    } else if (vq->handle_output) {
        vq->handle_output(vdev, vq);
    }
A
aliguori 已提交
1549 1550
}

1551 1552
uint16_t virtio_queue_vector(VirtIODevice *vdev, int n)
{
1553
    return n < VIRTIO_QUEUE_MAX ? vdev->vq[n].vector :
1554 1555 1556 1557 1558
        VIRTIO_NO_VECTOR;
}

void virtio_queue_set_vector(VirtIODevice *vdev, int n, uint16_t vector)
{
1559 1560
    VirtQueue *vq = &vdev->vq[n];

1561
    if (n < VIRTIO_QUEUE_MAX) {
1562 1563 1564 1565
        if (vdev->vector_queues &&
            vdev->vq[n].vector != VIRTIO_NO_VECTOR) {
            QLIST_REMOVE(vq, node);
        }
1566
        vdev->vq[n].vector = vector;
1567 1568 1569 1570 1571
        if (vdev->vector_queues &&
            vector != VIRTIO_NO_VECTOR) {
            QLIST_INSERT_HEAD(&vdev->vector_queues[vector], vq, node);
        }
    }
1572 1573
}

1574 1575
VirtQueue *virtio_add_queue(VirtIODevice *vdev, int queue_size,
                            VirtIOHandleOutput handle_output)
A
aliguori 已提交
1576 1577 1578
{
    int i;

1579
    for (i = 0; i < VIRTIO_QUEUE_MAX; i++) {
A
aliguori 已提交
1580 1581 1582 1583
        if (vdev->vq[i].vring.num == 0)
            break;
    }

1584
    if (i == VIRTIO_QUEUE_MAX || queue_size > VIRTQUEUE_MAX_SIZE)
A
aliguori 已提交
1585 1586 1587
        abort();

    vdev->vq[i].vring.num = queue_size;
C
Cornelia Huck 已提交
1588
    vdev->vq[i].vring.num_default = queue_size;
1589
    vdev->vq[i].vring.align = VIRTIO_PCI_VRING_ALIGN;
A
aliguori 已提交
1590
    vdev->vq[i].handle_output = handle_output;
M
Michael S. Tsirkin 已提交
1591
    vdev->vq[i].handle_aio_output = NULL;
A
aliguori 已提交
1592 1593 1594 1595

    return &vdev->vq[i];
}

1596 1597
void virtio_del_queue(VirtIODevice *vdev, int n)
{
1598
    if (n < 0 || n >= VIRTIO_QUEUE_MAX) {
1599 1600 1601 1602
        abort();
    }

    vdev->vq[n].vring.num = 0;
C
Cornelia Huck 已提交
1603
    vdev->vq[n].vring.num_default = 0;
1604 1605
}

P
Paolo Bonzini 已提交
1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617
static void virtio_set_isr(VirtIODevice *vdev, int value)
{
    uint8_t old = atomic_read(&vdev->isr);

    /* Do not write ISR if it does not change, so that its cacheline remains
     * shared in the common case where the guest does not read it.
     */
    if ((old & value) != value) {
        atomic_or(&vdev->isr, value);
    }
}

1618
/* Called within rcu_read_lock().  */
1619
static bool virtio_should_notify(VirtIODevice *vdev, VirtQueue *vq)
M
Michael S. Tsirkin 已提交
1620 1621 1622
{
    uint16_t old, new;
    bool v;
1623 1624
    /* We need to expose used array entries before checking used event. */
    smp_mb();
1625
    /* Always notify when queue is empty (when feature acknowledge) */
1626
    if (virtio_vdev_has_feature(vdev, VIRTIO_F_NOTIFY_ON_EMPTY) &&
1627
        !vq->inuse && virtio_queue_empty(vq)) {
M
Michael S. Tsirkin 已提交
1628 1629 1630
        return true;
    }

1631
    if (!virtio_vdev_has_feature(vdev, VIRTIO_RING_F_EVENT_IDX)) {
M
Michael S. Tsirkin 已提交
1632 1633 1634 1635 1636 1637
        return !(vring_avail_flags(vq) & VRING_AVAIL_F_NO_INTERRUPT);
    }

    v = vq->signalled_used_valid;
    vq->signalled_used_valid = true;
    old = vq->signalled_used;
1638
    new = vq->signalled_used = vq->used_idx;
1639
    return !v || vring_need_event(vring_get_used_event(vq), new, old);
M
Michael S. Tsirkin 已提交
1640 1641
}

1642 1643
void virtio_notify_irqfd(VirtIODevice *vdev, VirtQueue *vq)
{
1644 1645 1646 1647 1648 1649
    bool should_notify;
    rcu_read_lock();
    should_notify = virtio_should_notify(vdev, vq);
    rcu_read_unlock();

    if (!should_notify) {
1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673
        return;
    }

    trace_virtio_notify_irqfd(vdev, vq);

    /*
     * virtio spec 1.0 says ISR bit 0 should be ignored with MSI, but
     * windows drivers included in virtio-win 1.8.0 (circa 2015) are
     * incorrectly polling this bit during crashdump and hibernation
     * in MSI mode, causing a hang if this bit is never updated.
     * Recent releases of Windows do not really shut down, but rather
     * log out and hibernate to make the next startup faster.  Hence,
     * this manifested as a more serious hang during shutdown with
     *
     * Next driver release from 2016 fixed this problem, so working around it
     * is not a must, but it's easy to do so let's do it here.
     *
     * Note: it's safe to update ISR from any thread as it was switched
     * to an atomic operation.
     */
    virtio_set_isr(vq->vdev, 0x1);
    event_notifier_set(&vq->guest_notifier);
}

1674 1675 1676 1677 1678 1679
static void virtio_irq(VirtQueue *vq)
{
    virtio_set_isr(vq->vdev, 0x1);
    virtio_notify_vector(vq->vdev, vq->vector);
}

M
Michael S. Tsirkin 已提交
1680 1681
void virtio_notify(VirtIODevice *vdev, VirtQueue *vq)
{
1682 1683 1684 1685 1686 1687
    bool should_notify;
    rcu_read_lock();
    should_notify = virtio_should_notify(vdev, vq);
    rcu_read_unlock();

    if (!should_notify) {
A
aliguori 已提交
1688
        return;
M
Michael S. Tsirkin 已提交
1689
    }
A
aliguori 已提交
1690

1691
    trace_virtio_notify(vdev, vq);
1692
    virtio_irq(vq);
A
aliguori 已提交
1693 1694 1695 1696
}

void virtio_notify_config(VirtIODevice *vdev)
{
1697 1698 1699
    if (!(vdev->status & VIRTIO_CONFIG_S_DRIVER_OK))
        return;

P
Paolo Bonzini 已提交
1700
    virtio_set_isr(vdev, 0x3);
1701
    vdev->generation++;
1702
    virtio_notify_vector(vdev, vdev->config_vector);
A
aliguori 已提交
1703 1704
}

1705 1706 1707 1708 1709
static bool virtio_device_endian_needed(void *opaque)
{
    VirtIODevice *vdev = opaque;

    assert(vdev->device_endian != VIRTIO_DEVICE_ENDIAN_UNKNOWN);
1710
    if (!virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1)) {
1711 1712 1713 1714
        return vdev->device_endian != virtio_default_endian();
    }
    /* Devices conforming to VIRTIO 1.0 or later are always LE. */
    return vdev->device_endian != VIRTIO_DEVICE_ENDIAN_LITTLE;
1715 1716
}

G
Gerd Hoffmann 已提交
1717 1718 1719 1720 1721 1722 1723
static bool virtio_64bit_features_needed(void *opaque)
{
    VirtIODevice *vdev = opaque;

    return (vdev->host_features >> 32) != 0;
}

J
Jason Wang 已提交
1724 1725 1726 1727 1728 1729 1730
static bool virtio_virtqueue_needed(void *opaque)
{
    VirtIODevice *vdev = opaque;

    return virtio_host_has_feature(vdev, VIRTIO_F_VERSION_1);
}

C
Cornelia Huck 已提交
1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743
static bool virtio_ringsize_needed(void *opaque)
{
    VirtIODevice *vdev = opaque;
    int i;

    for (i = 0; i < VIRTIO_QUEUE_MAX; i++) {
        if (vdev->vq[i].vring.num != vdev->vq[i].vring.num_default) {
            return true;
        }
    }
    return false;
}

1744 1745 1746 1747 1748 1749 1750 1751 1752 1753
static bool virtio_extra_state_needed(void *opaque)
{
    VirtIODevice *vdev = opaque;
    BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
    VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);

    return k->has_extra_state &&
        k->has_extra_state(qbus->parent);
}

1754 1755 1756 1757 1758 1759 1760
static bool virtio_broken_needed(void *opaque)
{
    VirtIODevice *vdev = opaque;

    return vdev->broken;
}

1761
static const VMStateDescription vmstate_virtqueue = {
J
Jason Wang 已提交
1762
    .name = "virtqueue_state",
1763 1764 1765 1766 1767 1768 1769
    .version_id = 1,
    .minimum_version_id = 1,
    .fields = (VMStateField[]) {
        VMSTATE_UINT64(vring.avail, struct VirtQueue),
        VMSTATE_UINT64(vring.used, struct VirtQueue),
        VMSTATE_END_OF_LIST()
    }
J
Jason Wang 已提交
1770 1771 1772 1773 1774 1775 1776 1777
};

static const VMStateDescription vmstate_virtio_virtqueues = {
    .name = "virtio/virtqueues",
    .version_id = 1,
    .minimum_version_id = 1,
    .needed = &virtio_virtqueue_needed,
    .fields = (VMStateField[]) {
D
Dr. David Alan Gilbert 已提交
1778 1779
        VMSTATE_STRUCT_VARRAY_POINTER_KNOWN(vq, struct VirtIODevice,
                      VIRTIO_QUEUE_MAX, 0, vmstate_virtqueue, VirtQueue),
J
Jason Wang 已提交
1780 1781 1782 1783
        VMSTATE_END_OF_LIST()
    }
};

1784
static const VMStateDescription vmstate_ringsize = {
C
Cornelia Huck 已提交
1785
    .name = "ringsize_state",
1786 1787 1788 1789 1790 1791
    .version_id = 1,
    .minimum_version_id = 1,
    .fields = (VMStateField[]) {
        VMSTATE_UINT32(vring.num_default, struct VirtQueue),
        VMSTATE_END_OF_LIST()
    }
C
Cornelia Huck 已提交
1792 1793 1794 1795 1796 1797 1798 1799
};

static const VMStateDescription vmstate_virtio_ringsize = {
    .name = "virtio/ringsize",
    .version_id = 1,
    .minimum_version_id = 1,
    .needed = &virtio_ringsize_needed,
    .fields = (VMStateField[]) {
D
Dr. David Alan Gilbert 已提交
1800 1801
        VMSTATE_STRUCT_VARRAY_POINTER_KNOWN(vq, struct VirtIODevice,
                      VIRTIO_QUEUE_MAX, 0, vmstate_ringsize, VirtQueue),
C
Cornelia Huck 已提交
1802 1803 1804 1805
        VMSTATE_END_OF_LIST()
    }
};

J
Jianjun Duan 已提交
1806 1807
static int get_extra_state(QEMUFile *f, void *pv, size_t size,
                           VMStateField *field)
1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819
{
    VirtIODevice *vdev = pv;
    BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
    VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);

    if (!k->load_extra_state) {
        return -1;
    } else {
        return k->load_extra_state(qbus->parent, f);
    }
}

J
Jianjun Duan 已提交
1820 1821
static int put_extra_state(QEMUFile *f, void *pv, size_t size,
                           VMStateField *field, QJSON *vmdesc)
1822 1823 1824 1825 1826 1827
{
    VirtIODevice *vdev = pv;
    BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
    VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);

    k->save_extra_state(qbus->parent, f);
J
Jianjun Duan 已提交
1828
    return 0;
1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855
}

static const VMStateInfo vmstate_info_extra_state = {
    .name = "virtqueue_extra_state",
    .get = get_extra_state,
    .put = put_extra_state,
};

static const VMStateDescription vmstate_virtio_extra_state = {
    .name = "virtio/extra_state",
    .version_id = 1,
    .minimum_version_id = 1,
    .needed = &virtio_extra_state_needed,
    .fields = (VMStateField[]) {
        {
            .name         = "extra_state",
            .version_id   = 0,
            .field_exists = NULL,
            .size         = 0,
            .info         = &vmstate_info_extra_state,
            .flags        = VMS_SINGLE,
            .offset       = 0,
        },
        VMSTATE_END_OF_LIST()
    }
};

1856 1857 1858 1859
static const VMStateDescription vmstate_virtio_device_endian = {
    .name = "virtio/device_endian",
    .version_id = 1,
    .minimum_version_id = 1,
1860
    .needed = &virtio_device_endian_needed,
1861 1862 1863 1864 1865 1866
    .fields = (VMStateField[]) {
        VMSTATE_UINT8(device_endian, VirtIODevice),
        VMSTATE_END_OF_LIST()
    }
};

G
Gerd Hoffmann 已提交
1867 1868 1869 1870
static const VMStateDescription vmstate_virtio_64bit_features = {
    .name = "virtio/64bit_features",
    .version_id = 1,
    .minimum_version_id = 1,
1871
    .needed = &virtio_64bit_features_needed,
G
Gerd Hoffmann 已提交
1872 1873 1874 1875 1876 1877
    .fields = (VMStateField[]) {
        VMSTATE_UINT64(guest_features, VirtIODevice),
        VMSTATE_END_OF_LIST()
    }
};

1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888
static const VMStateDescription vmstate_virtio_broken = {
    .name = "virtio/broken",
    .version_id = 1,
    .minimum_version_id = 1,
    .needed = &virtio_broken_needed,
    .fields = (VMStateField[]) {
        VMSTATE_BOOL(broken, VirtIODevice),
        VMSTATE_END_OF_LIST()
    }
};

1889 1890 1891 1892 1893 1894 1895
static const VMStateDescription vmstate_virtio = {
    .name = "virtio",
    .version_id = 1,
    .minimum_version_id = 1,
    .minimum_version_id_old = 1,
    .fields = (VMStateField[]) {
        VMSTATE_END_OF_LIST()
1896
    },
1897 1898 1899
    .subsections = (const VMStateDescription*[]) {
        &vmstate_virtio_device_endian,
        &vmstate_virtio_64bit_features,
J
Jason Wang 已提交
1900
        &vmstate_virtio_virtqueues,
C
Cornelia Huck 已提交
1901
        &vmstate_virtio_ringsize,
1902
        &vmstate_virtio_broken,
1903
        &vmstate_virtio_extra_state,
1904
        NULL
1905 1906 1907
    }
};

1908
int virtio_save(VirtIODevice *vdev, QEMUFile *f)
A
aliguori 已提交
1909
{
K
KONRAD Frederic 已提交
1910 1911
    BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
    VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
1912
    VirtioDeviceClass *vdc = VIRTIO_DEVICE_GET_CLASS(vdev);
G
Gerd Hoffmann 已提交
1913
    uint32_t guest_features_lo = (vdev->guest_features & 0xffffffff);
A
aliguori 已提交
1914 1915
    int i;

K
KONRAD Frederic 已提交
1916 1917 1918
    if (k->save_config) {
        k->save_config(qbus->parent, f);
    }
A
aliguori 已提交
1919 1920 1921 1922

    qemu_put_8s(f, &vdev->status);
    qemu_put_8s(f, &vdev->isr);
    qemu_put_be16s(f, &vdev->queue_sel);
G
Gerd Hoffmann 已提交
1923
    qemu_put_be32s(f, &guest_features_lo);
A
aliguori 已提交
1924 1925 1926
    qemu_put_be32(f, vdev->config_len);
    qemu_put_buffer(f, vdev->config, vdev->config_len);

1927
    for (i = 0; i < VIRTIO_QUEUE_MAX; i++) {
A
aliguori 已提交
1928 1929 1930 1931 1932 1933
        if (vdev->vq[i].vring.num == 0)
            break;
    }

    qemu_put_be32(f, i);

1934
    for (i = 0; i < VIRTIO_QUEUE_MAX; i++) {
A
aliguori 已提交
1935 1936 1937 1938
        if (vdev->vq[i].vring.num == 0)
            break;

        qemu_put_be32(f, vdev->vq[i].vring.num);
1939 1940 1941
        if (k->has_variable_vring_alignment) {
            qemu_put_be32(f, vdev->vq[i].vring.align);
        }
1942 1943 1944 1945
        /*
         * Save desc now, the rest of the ring addresses are saved in
         * subsections for VIRTIO-1 devices.
         */
1946
        qemu_put_be64(f, vdev->vq[i].vring.desc);
A
aliguori 已提交
1947
        qemu_put_be16s(f, &vdev->vq[i].last_avail_idx);
K
KONRAD Frederic 已提交
1948 1949 1950
        if (k->save_queue) {
            k->save_queue(qbus->parent, i, f);
        }
A
aliguori 已提交
1951
    }
1952 1953 1954 1955

    if (vdc->save != NULL) {
        vdc->save(vdev, f);
    }
1956

1957
    if (vdc->vmsd) {
1958 1959 1960 1961
        int ret = vmstate_save_state(f, vdc->vmsd, vdev, NULL);
        if (ret) {
            return ret;
        }
1962 1963
    }

1964
    /* Subsections */
1965
    return vmstate_save_state(f, &vmstate_virtio, vdev, NULL);
A
aliguori 已提交
1966 1967
}

1968
/* A wrapper for use as a VMState .put function */
J
Jianjun Duan 已提交
1969 1970
static int virtio_device_put(QEMUFile *f, void *opaque, size_t size,
                              VMStateField *field, QJSON *vmdesc)
1971
{
1972
    return virtio_save(VIRTIO_DEVICE(opaque), f);
1973 1974 1975
}

/* A wrapper for use as a VMState .get function */
J
Jianjun Duan 已提交
1976 1977
static int virtio_device_get(QEMUFile *f, void *opaque, size_t size,
                             VMStateField *field)
1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990
{
    VirtIODevice *vdev = VIRTIO_DEVICE(opaque);
    DeviceClass *dc = DEVICE_CLASS(VIRTIO_DEVICE_GET_CLASS(vdev));

    return virtio_load(vdev, f, dc->vmsd->version_id);
}

const VMStateInfo  virtio_vmstate_info = {
    .name = "virtio",
    .get = virtio_device_get,
    .put = virtio_device_put,
};

1991
static int virtio_set_features_nocheck(VirtIODevice *vdev, uint64_t val)
1992
{
1993
    VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
C
Cornelia Huck 已提交
1994
    bool bad = (val & ~(vdev->host_features)) != 0;
1995

C
Cornelia Huck 已提交
1996
    val &= vdev->host_features;
1997 1998
    if (k->set_features) {
        k->set_features(vdev, val);
1999 2000 2001 2002 2003
    }
    vdev->guest_features = val;
    return bad ? -1 : 0;
}

2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015
int virtio_set_features(VirtIODevice *vdev, uint64_t val)
{
   /*
     * The driver must not attempt to set features after feature negotiation
     * has finished.
     */
    if (vdev->status & VIRTIO_CONFIG_S_FEATURES_OK) {
        return -EINVAL;
    }
    return virtio_set_features_nocheck(vdev, val);
}

2016
int virtio_load(VirtIODevice *vdev, QEMUFile *f, int version_id)
A
aliguori 已提交
2017
{
2018
    int i, ret;
2019
    int32_t config_len;
2020
    uint32_t num;
2021
    uint32_t features;
K
KONRAD Frederic 已提交
2022 2023
    BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
    VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
2024
    VirtioDeviceClass *vdc = VIRTIO_DEVICE_GET_CLASS(vdev);
A
aliguori 已提交
2025

2026 2027 2028 2029 2030 2031
    /*
     * We poison the endianness to ensure it does not get used before
     * subsections have been loaded.
     */
    vdev->device_endian = VIRTIO_DEVICE_ENDIAN_UNKNOWN;

K
KONRAD Frederic 已提交
2032 2033
    if (k->load_config) {
        ret = k->load_config(qbus->parent, f);
2034 2035 2036
        if (ret)
            return ret;
    }
A
aliguori 已提交
2037 2038 2039 2040

    qemu_get_8s(f, &vdev->status);
    qemu_get_8s(f, &vdev->isr);
    qemu_get_be16s(f, &vdev->queue_sel);
2041
    if (vdev->queue_sel >= VIRTIO_QUEUE_MAX) {
2042 2043
        return -1;
    }
2044
    qemu_get_be32s(f, &features);
2045

2046 2047 2048 2049 2050 2051 2052 2053 2054 2055
    /*
     * Temporarily set guest_features low bits - needed by
     * virtio net load code testing for VIRTIO_NET_F_CTRL_GUEST_OFFLOADS
     * VIRTIO_NET_F_GUEST_ANNOUNCE and VIRTIO_NET_F_CTRL_VQ.
     *
     * Note: devices should always test host features in future - don't create
     * new dependencies like this.
     */
    vdev->guest_features = features;

2056
    config_len = qemu_get_be32(f);
2057 2058 2059 2060 2061 2062 2063 2064 2065 2066 2067

    /*
     * There are cases where the incoming config can be bigger or smaller
     * than what we have; so load what we have space for, and skip
     * any excess that's in the stream.
     */
    qemu_get_buffer(f, vdev->config, MIN(config_len, vdev->config_len));

    while (config_len > vdev->config_len) {
        qemu_get_byte(f);
        config_len--;
2068
    }
A
aliguori 已提交
2069 2070 2071

    num = qemu_get_be32(f);

2072
    if (num > VIRTIO_QUEUE_MAX) {
2073
        error_report("Invalid number of virtqueues: 0x%x", num);
2074 2075 2076
        return -1;
    }

A
aliguori 已提交
2077 2078
    for (i = 0; i < num; i++) {
        vdev->vq[i].vring.num = qemu_get_be32(f);
2079 2080 2081
        if (k->has_variable_vring_alignment) {
            vdev->vq[i].vring.align = qemu_get_be32(f);
        }
2082
        vdev->vq[i].vring.desc = qemu_get_be64(f);
A
aliguori 已提交
2083
        qemu_get_be16s(f, &vdev->vq[i].last_avail_idx);
M
Michael S. Tsirkin 已提交
2084
        vdev->vq[i].signalled_used_valid = false;
2085
        vdev->vq[i].notification = true;
A
aliguori 已提交
2086

2087
        if (!vdev->vq[i].vring.desc && vdev->vq[i].last_avail_idx) {
M
Michael S. Tsirkin 已提交
2088
            error_report("VQ %d address 0x0 "
2089
                         "inconsistent with Host index 0x%x",
M
Michael S. Tsirkin 已提交
2090
                         i, vdev->vq[i].last_avail_idx);
2091
            return -1;
2092
        }
K
KONRAD Frederic 已提交
2093 2094
        if (k->load_queue) {
            ret = k->load_queue(qbus->parent, i, f);
2095 2096
            if (ret)
                return ret;
2097
        }
A
aliguori 已提交
2098 2099
    }

2100
    virtio_notify_vector(vdev, VIRTIO_NO_VECTOR);
2101 2102

    if (vdc->load != NULL) {
2103 2104 2105 2106
        ret = vdc->load(vdev, f, version_id);
        if (ret) {
            return ret;
        }
2107 2108
    }

2109 2110 2111 2112 2113 2114 2115
    if (vdc->vmsd) {
        ret = vmstate_load_state(f, vdc->vmsd, vdev, version_id);
        if (ret) {
            return ret;
        }
    }

2116 2117 2118 2119 2120 2121 2122 2123 2124 2125
    /* Subsections */
    ret = vmstate_load_state(f, &vmstate_virtio, vdev, 1);
    if (ret) {
        return ret;
    }

    if (vdev->device_endian == VIRTIO_DEVICE_ENDIAN_UNKNOWN) {
        vdev->device_endian = virtio_default_endian();
    }

G
Gerd Hoffmann 已提交
2126 2127 2128 2129 2130 2131 2132
    if (virtio_64bit_features_needed(vdev)) {
        /*
         * Subsection load filled vdev->guest_features.  Run them
         * through virtio_set_features to sanity-check them against
         * host_features.
         */
        uint64_t features64 = vdev->guest_features;
2133
        if (virtio_set_features_nocheck(vdev, features64) < 0) {
G
Gerd Hoffmann 已提交
2134 2135 2136 2137 2138 2139
            error_report("Features 0x%" PRIx64 " unsupported. "
                         "Allowed features: 0x%" PRIx64,
                         features64, vdev->host_features);
            return -1;
        }
    } else {
2140
        if (virtio_set_features_nocheck(vdev, features) < 0) {
G
Gerd Hoffmann 已提交
2141 2142 2143 2144 2145 2146 2147
            error_report("Features 0x%x unsupported. "
                         "Allowed features: 0x%" PRIx64,
                         features, vdev->host_features);
            return -1;
        }
    }

2148
    rcu_read_lock();
2149
    for (i = 0; i < num; i++) {
2150
        if (vdev->vq[i].vring.desc) {
2151
            uint16_t nheads;
2152 2153 2154 2155 2156 2157 2158 2159 2160 2161 2162 2163 2164

            /*
             * VIRTIO-1 devices migrate desc, used, and avail ring addresses so
             * only the region cache needs to be set up.  Legacy devices need
             * to calculate used and avail ring addresses based on the desc
             * address.
             */
            if (virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1)) {
                virtio_init_region_cache(vdev, i);
            } else {
                virtio_queue_update_rings(vdev, i);
            }

2165 2166 2167 2168 2169 2170 2171 2172 2173 2174
            nheads = vring_avail_idx(&vdev->vq[i]) - vdev->vq[i].last_avail_idx;
            /* Check it isn't doing strange things with descriptor numbers. */
            if (nheads > vdev->vq[i].vring.num) {
                error_report("VQ %d size 0x%x Guest index 0x%x "
                             "inconsistent with Host index 0x%x: delta 0x%x",
                             i, vdev->vq[i].vring.num,
                             vring_avail_idx(&vdev->vq[i]),
                             vdev->vq[i].last_avail_idx, nheads);
                return -1;
            }
2175
            vdev->vq[i].used_idx = vring_used_idx(&vdev->vq[i]);
2176
            vdev->vq[i].shadow_avail_idx = vring_avail_idx(&vdev->vq[i]);
2177 2178 2179 2180

            /*
             * Some devices migrate VirtQueueElements that have been popped
             * from the avail ring but not yet returned to the used ring.
2181 2182
             * Since max ring size < UINT16_MAX it's safe to use modulo
             * UINT16_MAX + 1 subtraction.
2183
             */
2184 2185
            vdev->vq[i].inuse = (uint16_t)(vdev->vq[i].last_avail_idx -
                                vdev->vq[i].used_idx);
2186 2187 2188 2189 2190 2191 2192 2193
            if (vdev->vq[i].inuse > vdev->vq[i].vring.num) {
                error_report("VQ %d size 0x%x < last_avail_idx 0x%x - "
                             "used_idx 0x%x",
                             i, vdev->vq[i].vring.num,
                             vdev->vq[i].last_avail_idx,
                             vdev->vq[i].used_idx);
                return -1;
            }
2194 2195
        }
    }
2196
    rcu_read_unlock();
2197 2198

    return 0;
A
aliguori 已提交
2199 2200
}

2201
void virtio_cleanup(VirtIODevice *vdev)
2202
{
2203
    qemu_del_vm_change_state_handler(vdev->vmstate);
2204 2205
}

2206
static void virtio_vmstate_change(void *opaque, int running, RunState state)
2207 2208
{
    VirtIODevice *vdev = opaque;
K
KONRAD Frederic 已提交
2209 2210
    BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
    VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
2211
    bool backend_run = running && (vdev->status & VIRTIO_CONFIG_S_DRIVER_OK);
2212
    vdev->vm_running = running;
2213 2214 2215 2216 2217

    if (backend_run) {
        virtio_set_status(vdev, vdev->status);
    }

K
KONRAD Frederic 已提交
2218 2219
    if (k->vmstate_change) {
        k->vmstate_change(qbus->parent, backend_run);
2220 2221 2222 2223 2224 2225 2226
    }

    if (!backend_run) {
        virtio_set_status(vdev, vdev->status);
    }
}

2227 2228 2229 2230 2231 2232 2233 2234 2235 2236 2237
void virtio_instance_init_common(Object *proxy_obj, void *data,
                                 size_t vdev_size, const char *vdev_name)
{
    DeviceState *vdev = data;

    object_initialize(vdev, vdev_size, vdev_name);
    object_property_add_child(proxy_obj, "virtio-backend", OBJECT(vdev), NULL);
    object_unref(OBJECT(vdev));
    qdev_alias_all_properties(vdev, proxy_obj);
}

2238 2239
void virtio_init(VirtIODevice *vdev, const char *name,
                 uint16_t device_id, size_t config_size)
A
aliguori 已提交
2240
{
2241 2242
    BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
    VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
2243
    int i;
2244 2245 2246 2247 2248 2249 2250
    int nvectors = k->query_nvectors ? k->query_nvectors(qbus->parent) : 0;

    if (nvectors) {
        vdev->vector_queues =
            g_malloc0(sizeof(*vdev->vector_queues) * nvectors);
    }

P
Paul Brook 已提交
2251
    vdev->device_id = device_id;
A
aliguori 已提交
2252
    vdev->status = 0;
P
Paolo Bonzini 已提交
2253
    atomic_set(&vdev->isr, 0);
A
aliguori 已提交
2254
    vdev->queue_sel = 0;
2255
    vdev->config_vector = VIRTIO_NO_VECTOR;
2256
    vdev->vq = g_malloc0(sizeof(VirtQueue) * VIRTIO_QUEUE_MAX);
2257
    vdev->vm_running = runstate_is_running();
2258
    vdev->broken = false;
2259
    for (i = 0; i < VIRTIO_QUEUE_MAX; i++) {
2260
        vdev->vq[i].vector = VIRTIO_NO_VECTOR;
2261
        vdev->vq[i].vdev = vdev;
2262
        vdev->vq[i].queue_index = i;
2263
    }
A
aliguori 已提交
2264 2265 2266

    vdev->name = name;
    vdev->config_len = config_size;
2267
    if (vdev->config_len) {
2268
        vdev->config = g_malloc0(config_size);
2269
    } else {
A
aliguori 已提交
2270
        vdev->config = NULL;
2271 2272 2273
    }
    vdev->vmstate = qemu_add_vm_change_state_handler(virtio_vmstate_change,
                                                     vdev);
2274
    vdev->device_endian = virtio_default_endian();
2275
    vdev->use_guest_notifier_mask = true;
2276
}
A
aliguori 已提交
2277

A
Avi Kivity 已提交
2278
hwaddr virtio_queue_get_desc_addr(VirtIODevice *vdev, int n)
2279 2280 2281 2282
{
    return vdev->vq[n].vring.desc;
}

A
Avi Kivity 已提交
2283
hwaddr virtio_queue_get_avail_addr(VirtIODevice *vdev, int n)
2284 2285 2286 2287
{
    return vdev->vq[n].vring.avail;
}

A
Avi Kivity 已提交
2288
hwaddr virtio_queue_get_used_addr(VirtIODevice *vdev, int n)
2289 2290 2291 2292
{
    return vdev->vq[n].vring.used;
}

A
Avi Kivity 已提交
2293
hwaddr virtio_queue_get_desc_size(VirtIODevice *vdev, int n)
2294 2295 2296 2297
{
    return sizeof(VRingDesc) * vdev->vq[n].vring.num;
}

A
Avi Kivity 已提交
2298
hwaddr virtio_queue_get_avail_size(VirtIODevice *vdev, int n)
2299 2300
{
    return offsetof(VRingAvail, ring) +
2301
        sizeof(uint16_t) * vdev->vq[n].vring.num;
2302 2303
}

A
Avi Kivity 已提交
2304
hwaddr virtio_queue_get_used_size(VirtIODevice *vdev, int n)
2305 2306 2307 2308 2309 2310 2311 2312 2313 2314 2315 2316 2317
{
    return offsetof(VRingUsed, ring) +
        sizeof(VRingUsedElem) * vdev->vq[n].vring.num;
}

uint16_t virtio_queue_get_last_avail_idx(VirtIODevice *vdev, int n)
{
    return vdev->vq[n].last_avail_idx;
}

void virtio_queue_set_last_avail_idx(VirtIODevice *vdev, int n, uint16_t idx)
{
    vdev->vq[n].last_avail_idx = idx;
2318
    vdev->vq[n].shadow_avail_idx = idx;
2319 2320
}

2321 2322 2323 2324 2325 2326 2327 2328 2329 2330
void virtio_queue_restore_last_avail_idx(VirtIODevice *vdev, int n)
{
    rcu_read_lock();
    if (vdev->vq[n].vring.desc) {
        vdev->vq[n].last_avail_idx = vring_used_idx(&vdev->vq[n]);
        vdev->vq[n].shadow_avail_idx = vdev->vq[n].last_avail_idx;
    }
    rcu_read_unlock();
}

2331 2332
void virtio_queue_update_used_idx(VirtIODevice *vdev, int n)
{
2333
    rcu_read_lock();
2334 2335 2336
    if (vdev->vq[n].vring.desc) {
        vdev->vq[n].used_idx = vring_used_idx(&vdev->vq[n]);
    }
2337
    rcu_read_unlock();
2338 2339
}

2340 2341 2342 2343 2344
void virtio_queue_invalidate_signalled_used(VirtIODevice *vdev, int n)
{
    vdev->vq[n].signalled_used_valid = false;
}

2345 2346 2347 2348 2349
VirtQueue *virtio_get_queue(VirtIODevice *vdev, int n)
{
    return vdev->vq + n;
}

2350 2351 2352 2353 2354
uint16_t virtio_get_queue_index(VirtQueue *vq)
{
    return vq->queue_index;
}

2355 2356 2357 2358
static void virtio_queue_guest_notifier_read(EventNotifier *n)
{
    VirtQueue *vq = container_of(n, VirtQueue, guest_notifier);
    if (event_notifier_test_and_clear(n)) {
2359
        virtio_irq(vq);
2360 2361 2362 2363 2364 2365 2366
    }
}

void virtio_queue_set_guest_notifier_fd_handler(VirtQueue *vq, bool assign,
                                                bool with_irqfd)
{
    if (assign && !with_irqfd) {
2367
        event_notifier_set_handler(&vq->guest_notifier,
2368 2369
                                   virtio_queue_guest_notifier_read);
    } else {
2370
        event_notifier_set_handler(&vq->guest_notifier, NULL);
2371 2372 2373 2374 2375 2376 2377 2378
    }
    if (!assign) {
        /* Test and clear notifier before closing it,
         * in case poll callback didn't have time to run. */
        virtio_queue_guest_notifier_read(&vq->guest_notifier);
    }
}

2379 2380 2381 2382
EventNotifier *virtio_queue_get_guest_notifier(VirtQueue *vq)
{
    return &vq->guest_notifier;
}
2383

M
Michael S. Tsirkin 已提交
2384
static void virtio_queue_host_notifier_aio_read(EventNotifier *n)
2385 2386 2387
{
    VirtQueue *vq = container_of(n, VirtQueue, host_notifier);
    if (event_notifier_test_and_clear(n)) {
M
Michael S. Tsirkin 已提交
2388
        virtio_queue_notify_aio_vq(vq);
2389 2390 2391
    }
}

2392 2393 2394 2395 2396 2397 2398
static void virtio_queue_host_notifier_aio_poll_begin(EventNotifier *n)
{
    VirtQueue *vq = container_of(n, VirtQueue, host_notifier);

    virtio_queue_set_notification(vq, 0);
}

2399 2400 2401 2402
static bool virtio_queue_host_notifier_aio_poll(void *opaque)
{
    EventNotifier *n = opaque;
    VirtQueue *vq = container_of(n, VirtQueue, host_notifier);
2403
    bool progress;
2404

2405
    if (!vq->vring.desc || virtio_queue_empty(vq)) {
2406 2407 2408
        return false;
    }

2409
    progress = virtio_queue_notify_aio_vq(vq);
2410 2411 2412

    /* In case the handler function re-enabled notifications */
    virtio_queue_set_notification(vq, 0);
2413
    return progress;
2414 2415
}

2416 2417 2418 2419 2420 2421 2422 2423
static void virtio_queue_host_notifier_aio_poll_end(EventNotifier *n)
{
    VirtQueue *vq = container_of(n, VirtQueue, host_notifier);

    /* Caller polls once more after this to catch requests that race with us */
    virtio_queue_set_notification(vq, 1);
}

2424
void virtio_queue_aio_set_host_notifier_handler(VirtQueue *vq, AioContext *ctx,
2425
                                                VirtIOHandleAIOOutput handle_output)
2426
{
2427 2428
    if (handle_output) {
        vq->handle_aio_output = handle_output;
2429
        aio_set_event_notifier(ctx, &vq->host_notifier, true,
2430 2431
                               virtio_queue_host_notifier_aio_read,
                               virtio_queue_host_notifier_aio_poll);
2432 2433 2434
        aio_set_event_notifier_poll(ctx, &vq->host_notifier,
                                    virtio_queue_host_notifier_aio_poll_begin,
                                    virtio_queue_host_notifier_aio_poll_end);
2435
    } else {
2436
        aio_set_event_notifier(ctx, &vq->host_notifier, true, NULL, NULL);
2437 2438
        /* Test and clear notifier before after disabling event,
         * in case poll callback didn't have time to run. */
M
Michael S. Tsirkin 已提交
2439
        virtio_queue_host_notifier_aio_read(&vq->host_notifier);
2440
        vq->handle_aio_output = NULL;
M
Michael S. Tsirkin 已提交
2441 2442 2443
    }
}

2444
void virtio_queue_host_notifier_read(EventNotifier *n)
M
Michael S. Tsirkin 已提交
2445 2446 2447 2448
{
    VirtQueue *vq = container_of(n, VirtQueue, host_notifier);
    if (event_notifier_test_and_clear(n)) {
        virtio_queue_notify_vq(vq);
2449 2450 2451
    }
}

2452 2453 2454 2455
EventNotifier *virtio_queue_get_host_notifier(VirtQueue *vq)
{
    return &vq->host_notifier;
}
2456

2457 2458
void virtio_device_set_child_bus_name(VirtIODevice *vdev, char *bus_name)
{
2459
    g_free(vdev->bus_name);
2460
    vdev->bus_name = g_strdup(bus_name);
2461 2462
}

2463 2464 2465 2466 2467 2468 2469 2470 2471 2472 2473 2474
void GCC_FMT_ATTR(2, 3) virtio_error(VirtIODevice *vdev, const char *fmt, ...)
{
    va_list ap;

    va_start(ap, fmt);
    error_vreport(fmt, ap);
    va_end(ap);

    if (virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1)) {
        virtio_set_status(vdev, vdev->status | VIRTIO_CONFIG_S_NEEDS_RESET);
        virtio_notify_config(vdev);
    }
2475 2476

    vdev->broken = true;
2477 2478
}

2479 2480 2481 2482 2483 2484 2485 2486 2487 2488 2489 2490 2491
static void virtio_memory_listener_commit(MemoryListener *listener)
{
    VirtIODevice *vdev = container_of(listener, VirtIODevice, listener);
    int i;

    for (i = 0; i < VIRTIO_QUEUE_MAX; i++) {
        if (vdev->vq[i].vring.num == 0) {
            break;
        }
        virtio_init_region_cache(vdev, i);
    }
}

2492 2493 2494 2495 2496 2497
static void virtio_device_realize(DeviceState *dev, Error **errp)
{
    VirtIODevice *vdev = VIRTIO_DEVICE(dev);
    VirtioDeviceClass *vdc = VIRTIO_DEVICE_GET_CLASS(dev);
    Error *err = NULL;

2498 2499 2500
    /* Devices should either use vmsd or the load/save methods */
    assert(!vdc->vmsd || !vdc->load);

2501 2502 2503 2504 2505 2506
    if (vdc->realize != NULL) {
        vdc->realize(dev, &err);
        if (err != NULL) {
            error_propagate(errp, err);
            return;
        }
2507
    }
J
Jason Wang 已提交
2508 2509 2510 2511

    virtio_bus_device_plugged(vdev, &err);
    if (err != NULL) {
        error_propagate(errp, err);
2512
        vdc->unrealize(dev, NULL);
J
Jason Wang 已提交
2513 2514
        return;
    }
2515 2516 2517

    vdev->listener.commit = virtio_memory_listener_commit;
    memory_listener_register(&vdev->listener, vdev->dma_as);
2518 2519
}

2520
static void virtio_device_unrealize(DeviceState *dev, Error **errp)
2521
{
2522
    VirtIODevice *vdev = VIRTIO_DEVICE(dev);
2523 2524
    VirtioDeviceClass *vdc = VIRTIO_DEVICE_GET_CLASS(dev);
    Error *err = NULL;
2525

2526 2527
    virtio_bus_device_unplugged(vdev);

2528 2529 2530 2531 2532 2533
    if (vdc->unrealize != NULL) {
        vdc->unrealize(dev, &err);
        if (err != NULL) {
            error_propagate(errp, err);
            return;
        }
2534
    }
2535

2536 2537
    g_free(vdev->bus_name);
    vdev->bus_name = NULL;
2538 2539
}

2540 2541 2542 2543 2544 2545 2546 2547 2548 2549 2550
static void virtio_device_free_virtqueues(VirtIODevice *vdev)
{
    int i;
    if (!vdev->vq) {
        return;
    }

    for (i = 0; i < VIRTIO_QUEUE_MAX; i++) {
        if (vdev->vq[i].vring.num == 0) {
            break;
        }
2551
        virtio_virtqueue_reset_region_cache(&vdev->vq[i]);
2552 2553 2554 2555 2556 2557 2558 2559 2560 2561 2562 2563 2564 2565 2566
    }
    g_free(vdev->vq);
}

static void virtio_device_instance_finalize(Object *obj)
{
    VirtIODevice *vdev = VIRTIO_DEVICE(obj);

    memory_listener_unregister(&vdev->listener);
    virtio_device_free_virtqueues(vdev);

    g_free(vdev->config);
    g_free(vdev->vector_queues);
}

C
Cornelia Huck 已提交
2567 2568 2569 2570 2571
static Property virtio_properties[] = {
    DEFINE_VIRTIO_COMMON_FEATURES(VirtIODevice, host_features),
    DEFINE_PROP_END_OF_LIST(),
};

2572 2573 2574 2575 2576 2577
static int virtio_device_start_ioeventfd_impl(VirtIODevice *vdev)
{
    VirtioBusState *qbus = VIRTIO_BUS(qdev_get_parent_bus(DEVICE(vdev)));
    int n, r, err;

    for (n = 0; n < VIRTIO_QUEUE_MAX; n++) {
2578
        VirtQueue *vq = &vdev->vq[n];
2579 2580 2581
        if (!virtio_queue_get_num(vdev, n)) {
            continue;
        }
2582
        r = virtio_bus_set_host_notifier(qbus, n, true);
2583 2584 2585 2586
        if (r < 0) {
            err = r;
            goto assign_error;
        }
2587
        event_notifier_set_handler(&vq->host_notifier,
2588
                                   virtio_queue_host_notifier_read);
2589 2590 2591 2592 2593 2594 2595 2596 2597
    }

    for (n = 0; n < VIRTIO_QUEUE_MAX; n++) {
        /* Kick right away to begin processing requests already in vring */
        VirtQueue *vq = &vdev->vq[n];
        if (!vq->vring.num) {
            continue;
        }
        event_notifier_set(&vq->host_notifier);
2598 2599 2600 2601 2602
    }
    return 0;

assign_error:
    while (--n >= 0) {
2603
        VirtQueue *vq = &vdev->vq[n];
2604 2605 2606 2607
        if (!virtio_queue_get_num(vdev, n)) {
            continue;
        }

2608
        event_notifier_set_handler(&vq->host_notifier, NULL);
2609
        r = virtio_bus_set_host_notifier(qbus, n, false);
2610 2611 2612 2613 2614 2615 2616 2617 2618 2619 2620 2621 2622 2623 2624 2625 2626 2627 2628
        assert(r >= 0);
    }
    return err;
}

int virtio_device_start_ioeventfd(VirtIODevice *vdev)
{
    BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
    VirtioBusState *vbus = VIRTIO_BUS(qbus);

    return virtio_bus_start_ioeventfd(vbus);
}

static void virtio_device_stop_ioeventfd_impl(VirtIODevice *vdev)
{
    VirtioBusState *qbus = VIRTIO_BUS(qdev_get_parent_bus(DEVICE(vdev)));
    int n, r;

    for (n = 0; n < VIRTIO_QUEUE_MAX; n++) {
2629 2630
        VirtQueue *vq = &vdev->vq[n];

2631 2632 2633
        if (!virtio_queue_get_num(vdev, n)) {
            continue;
        }
2634
        event_notifier_set_handler(&vq->host_notifier, NULL);
2635
        r = virtio_bus_set_host_notifier(qbus, n, false);
2636 2637 2638 2639 2640 2641 2642 2643 2644 2645 2646 2647
        assert(r >= 0);
    }
}

void virtio_device_stop_ioeventfd(VirtIODevice *vdev)
{
    BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
    VirtioBusState *vbus = VIRTIO_BUS(qbus);

    virtio_bus_stop_ioeventfd(vbus);
}

2648 2649 2650 2651 2652 2653 2654 2655 2656 2657 2658 2659 2660 2661 2662 2663
int virtio_device_grab_ioeventfd(VirtIODevice *vdev)
{
    BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
    VirtioBusState *vbus = VIRTIO_BUS(qbus);

    return virtio_bus_grab_ioeventfd(vbus);
}

void virtio_device_release_ioeventfd(VirtIODevice *vdev)
{
    BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
    VirtioBusState *vbus = VIRTIO_BUS(qbus);

    virtio_bus_release_ioeventfd(vbus);
}

2664 2665 2666
static void virtio_device_class_init(ObjectClass *klass, void *data)
{
    /* Set the default value here. */
2667
    VirtioDeviceClass *vdc = VIRTIO_DEVICE_CLASS(klass);
2668
    DeviceClass *dc = DEVICE_CLASS(klass);
2669 2670 2671

    dc->realize = virtio_device_realize;
    dc->unrealize = virtio_device_unrealize;
2672
    dc->bus_type = TYPE_VIRTIO_BUS;
C
Cornelia Huck 已提交
2673
    dc->props = virtio_properties;
2674 2675
    vdc->start_ioeventfd = virtio_device_start_ioeventfd_impl;
    vdc->stop_ioeventfd = virtio_device_stop_ioeventfd_impl;
2676 2677

    vdc->legacy_features |= VIRTIO_LEGACY_FEATURES;
2678 2679
}

2680 2681 2682 2683 2684 2685 2686 2687
bool virtio_device_ioeventfd_enabled(VirtIODevice *vdev)
{
    BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
    VirtioBusState *vbus = VIRTIO_BUS(qbus);

    return virtio_bus_ioeventfd_enabled(vbus);
}

2688 2689 2690 2691 2692
static const TypeInfo virtio_device_info = {
    .name = TYPE_VIRTIO_DEVICE,
    .parent = TYPE_DEVICE,
    .instance_size = sizeof(VirtIODevice),
    .class_init = virtio_device_class_init,
2693
    .instance_finalize = virtio_device_instance_finalize,
2694 2695 2696 2697 2698 2699 2700 2701 2702 2703
    .abstract = true,
    .class_size = sizeof(VirtioDeviceClass),
};

static void virtio_register_types(void)
{
    type_register_static(&virtio_device_info);
}

type_init(virtio_register_types)