virtio.c 51.9 KB
Newer Older
A
aliguori 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13
/*
 * Virtio Support
 *
 * Copyright IBM, Corp. 2007
 *
 * Authors:
 *  Anthony Liguori   <aliguori@us.ibm.com>
 *
 * This work is licensed under the terms of the GNU GPL, version 2.  See
 * the COPYING file in the top-level directory.
 *
 */

P
Peter Maydell 已提交
14
#include "qemu/osdep.h"
15
#include "qapi/error.h"
16 17
#include "qemu-common.h"
#include "cpu.h"
18
#include "trace.h"
19
#include "exec/address-spaces.h"
20
#include "qemu/error-report.h"
P
Paolo Bonzini 已提交
21
#include "hw/virtio/virtio.h"
22
#include "qemu/atomic.h"
P
Paolo Bonzini 已提交
23
#include "hw/virtio/virtio-bus.h"
24
#include "migration/migration.h"
25
#include "hw/virtio/virtio-access.h"
A
aliguori 已提交
26

27 28 29 30 31
/*
 * The alignment to use between consumer and producer parts of vring.
 * x86 pagesize again. This is the default, used by transports like PCI
 * which don't provide a means for the guest to tell the host the alignment.
 */
32 33
#define VIRTIO_PCI_VRING_ALIGN         4096

A
aliguori 已提交
34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64
typedef struct VRingDesc
{
    uint64_t addr;
    uint32_t len;
    uint16_t flags;
    uint16_t next;
} VRingDesc;

typedef struct VRingAvail
{
    uint16_t flags;
    uint16_t idx;
    uint16_t ring[0];
} VRingAvail;

typedef struct VRingUsedElem
{
    uint32_t id;
    uint32_t len;
} VRingUsedElem;

typedef struct VRingUsed
{
    uint16_t flags;
    uint16_t idx;
    VRingUsedElem ring[0];
} VRingUsed;

typedef struct VRing
{
    unsigned int num;
C
Cornelia Huck 已提交
65
    unsigned int num_default;
66
    unsigned int align;
A
Avi Kivity 已提交
67 68 69
    hwaddr desc;
    hwaddr avail;
    hwaddr used;
A
aliguori 已提交
70 71 72 73 74
} VRing;

struct VirtQueue
{
    VRing vring;
75 76

    /* Next head to pop */
A
aliguori 已提交
77
    uint16_t last_avail_idx;
78

79 80 81
    /* Last avail_idx read from VQ. */
    uint16_t shadow_avail_idx;

82 83
    uint16_t used_idx;

M
Michael S. Tsirkin 已提交
84 85 86 87 88 89 90 91 92
    /* Last used index value we have signalled on */
    uint16_t signalled_used;

    /* Last used index value we have signalled on */
    bool signalled_used_valid;

    /* Notification enabled? */
    bool notification;

93 94
    uint16_t queue_index;

A
aliguori 已提交
95
    int inuse;
M
Michael S. Tsirkin 已提交
96

97
    uint16_t vector;
A
aliguori 已提交
98
    void (*handle_output)(VirtIODevice *vdev, VirtQueue *vq);
M
Michael S. Tsirkin 已提交
99
    void (*handle_aio_output)(VirtIODevice *vdev, VirtQueue *vq);
100 101 102
    VirtIODevice *vdev;
    EventNotifier guest_notifier;
    EventNotifier host_notifier;
103
    QLIST_ENTRY(VirtQueue) node;
A
aliguori 已提交
104 105 106
};

/* virt queue functions */
107
void virtio_queue_update_rings(VirtIODevice *vdev, int n)
A
aliguori 已提交
108
{
109
    VRing *vring = &vdev->vq[n].vring;
P
Paul Brook 已提交
110

111 112 113 114 115 116 117 118
    if (!vring->desc) {
        /* not yet setup -> nothing to do */
        return;
    }
    vring->avail = vring->desc + vring->num * sizeof(VRingDesc);
    vring->used = vring_align(vring->avail +
                              offsetof(VRingAvail, ring[vring->num]),
                              vring->align);
A
aliguori 已提交
119 120
}

121 122
static void vring_desc_read(VirtIODevice *vdev, VRingDesc *desc,
                            hwaddr desc_pa, int i)
A
aliguori 已提交
123
{
124 125 126 127 128 129
    address_space_read(&address_space_memory, desc_pa + i * sizeof(VRingDesc),
                       MEMTXATTRS_UNSPECIFIED, (void *)desc, sizeof(VRingDesc));
    virtio_tswap64s(vdev, &desc->addr);
    virtio_tswap32s(vdev, &desc->len);
    virtio_tswap16s(vdev, &desc->flags);
    virtio_tswap16s(vdev, &desc->next);
A
aliguori 已提交
130 131 132 133
}

static inline uint16_t vring_avail_flags(VirtQueue *vq)
{
A
Avi Kivity 已提交
134
    hwaddr pa;
A
aliguori 已提交
135
    pa = vq->vring.avail + offsetof(VRingAvail, flags);
136
    return virtio_lduw_phys(vq->vdev, pa);
A
aliguori 已提交
137 138 139 140
}

static inline uint16_t vring_avail_idx(VirtQueue *vq)
{
A
Avi Kivity 已提交
141
    hwaddr pa;
A
aliguori 已提交
142
    pa = vq->vring.avail + offsetof(VRingAvail, idx);
143 144
    vq->shadow_avail_idx = virtio_lduw_phys(vq->vdev, pa);
    return vq->shadow_avail_idx;
A
aliguori 已提交
145 146 147 148
}

static inline uint16_t vring_avail_ring(VirtQueue *vq, int i)
{
A
Avi Kivity 已提交
149
    hwaddr pa;
A
aliguori 已提交
150
    pa = vq->vring.avail + offsetof(VRingAvail, ring[i]);
151
    return virtio_lduw_phys(vq->vdev, pa);
A
aliguori 已提交
152 153
}

154
static inline uint16_t vring_get_used_event(VirtQueue *vq)
M
Michael S. Tsirkin 已提交
155 156 157 158
{
    return vring_avail_ring(vq, vq->vring.num);
}

159 160
static inline void vring_used_write(VirtQueue *vq, VRingUsedElem *uelem,
                                    int i)
A
aliguori 已提交
161
{
A
Avi Kivity 已提交
162
    hwaddr pa;
163 164 165 166 167
    virtio_tswap32s(vq->vdev, &uelem->id);
    virtio_tswap32s(vq->vdev, &uelem->len);
    pa = vq->vring.used + offsetof(VRingUsed, ring[i]);
    address_space_write(&address_space_memory, pa, MEMTXATTRS_UNSPECIFIED,
                       (void *)uelem, sizeof(VRingUsedElem));
A
aliguori 已提交
168 169 170 171
}

static uint16_t vring_used_idx(VirtQueue *vq)
{
A
Avi Kivity 已提交
172
    hwaddr pa;
A
aliguori 已提交
173
    pa = vq->vring.used + offsetof(VRingUsed, idx);
174
    return virtio_lduw_phys(vq->vdev, pa);
A
aliguori 已提交
175 176
}

M
Michael S. Tsirkin 已提交
177
static inline void vring_used_idx_set(VirtQueue *vq, uint16_t val)
A
aliguori 已提交
178
{
A
Avi Kivity 已提交
179
    hwaddr pa;
A
aliguori 已提交
180
    pa = vq->vring.used + offsetof(VRingUsed, idx);
181
    virtio_stw_phys(vq->vdev, pa, val);
182
    vq->used_idx = val;
A
aliguori 已提交
183 184 185 186
}

static inline void vring_used_flags_set_bit(VirtQueue *vq, int mask)
{
187
    VirtIODevice *vdev = vq->vdev;
A
Avi Kivity 已提交
188
    hwaddr pa;
A
aliguori 已提交
189
    pa = vq->vring.used + offsetof(VRingUsed, flags);
190
    virtio_stw_phys(vdev, pa, virtio_lduw_phys(vdev, pa) | mask);
A
aliguori 已提交
191 192 193 194
}

static inline void vring_used_flags_unset_bit(VirtQueue *vq, int mask)
{
195
    VirtIODevice *vdev = vq->vdev;
A
Avi Kivity 已提交
196
    hwaddr pa;
A
aliguori 已提交
197
    pa = vq->vring.used + offsetof(VRingUsed, flags);
198
    virtio_stw_phys(vdev, pa, virtio_lduw_phys(vdev, pa) & ~mask);
A
aliguori 已提交
199 200
}

201
static inline void vring_set_avail_event(VirtQueue *vq, uint16_t val)
M
Michael S. Tsirkin 已提交
202
{
A
Avi Kivity 已提交
203
    hwaddr pa;
M
Michael S. Tsirkin 已提交
204 205 206 207
    if (!vq->notification) {
        return;
    }
    pa = vq->vring.used + offsetof(VRingUsed, ring[vq->vring.num]);
208
    virtio_stw_phys(vq->vdev, pa, val);
M
Michael S. Tsirkin 已提交
209 210
}

A
aliguori 已提交
211 212
void virtio_queue_set_notification(VirtQueue *vq, int enable)
{
M
Michael S. Tsirkin 已提交
213
    vq->notification = enable;
214
    if (virtio_vdev_has_feature(vq->vdev, VIRTIO_RING_F_EVENT_IDX)) {
215
        vring_set_avail_event(vq, vring_avail_idx(vq));
M
Michael S. Tsirkin 已提交
216
    } else if (enable) {
A
aliguori 已提交
217
        vring_used_flags_unset_bit(vq, VRING_USED_F_NO_NOTIFY);
M
Michael S. Tsirkin 已提交
218
    } else {
A
aliguori 已提交
219
        vring_used_flags_set_bit(vq, VRING_USED_F_NO_NOTIFY);
M
Michael S. Tsirkin 已提交
220
    }
221 222 223 224
    if (enable) {
        /* Expose avail event/used flags before caller checks the avail idx. */
        smp_mb();
    }
A
aliguori 已提交
225 226 227 228 229 230 231
}

int virtio_queue_ready(VirtQueue *vq)
{
    return vq->vring.avail != 0;
}

232 233
/* Fetch avail_idx from VQ memory only when we really need to know if
 * guest has added some buffers. */
A
aliguori 已提交
234 235
int virtio_queue_empty(VirtQueue *vq)
{
236 237 238 239
    if (vq->shadow_avail_idx != vq->last_avail_idx) {
        return 0;
    }

A
aliguori 已提交
240 241 242
    return vring_avail_idx(vq) == vq->last_avail_idx;
}

243 244
static void virtqueue_unmap_sg(VirtQueue *vq, const VirtQueueElement *elem,
                               unsigned int len)
A
aliguori 已提交
245 246 247 248 249 250 251 252
{
    unsigned int offset;
    int i;

    offset = 0;
    for (i = 0; i < elem->in_num; i++) {
        size_t size = MIN(len - offset, elem->in_sg[i].iov_len);

253 254 255
        cpu_physical_memory_unmap(elem->in_sg[i].iov_base,
                                  elem->in_sg[i].iov_len,
                                  1, size);
A
aliguori 已提交
256

257
        offset += size;
A
aliguori 已提交
258 259
    }

260 261 262 263
    for (i = 0; i < elem->out_num; i++)
        cpu_physical_memory_unmap(elem->out_sg[i].iov_base,
                                  elem->out_sg[i].iov_len,
                                  0, elem->out_sg[i].iov_len);
264 265
}

J
Jason Wang 已提交
266 267 268 269 270 271 272
void virtqueue_discard(VirtQueue *vq, const VirtQueueElement *elem,
                       unsigned int len)
{
    vq->last_avail_idx--;
    virtqueue_unmap_sg(vq, elem, len);
}

273 274 275
void virtqueue_fill(VirtQueue *vq, const VirtQueueElement *elem,
                    unsigned int len, unsigned int idx)
{
276 277
    VRingUsedElem uelem;

278 279 280
    trace_virtqueue_fill(vq, elem, len, idx);

    virtqueue_unmap_sg(vq, elem, len);
281

282
    idx = (idx + vq->used_idx) % vq->vring.num;
A
aliguori 已提交
283

284 285 286
    uelem.id = elem->index;
    uelem.len = len;
    vring_used_write(vq, &uelem, idx);
A
aliguori 已提交
287 288 289 290
}

void virtqueue_flush(VirtQueue *vq, unsigned int count)
{
M
Michael S. Tsirkin 已提交
291
    uint16_t old, new;
A
aliguori 已提交
292
    /* Make sure buffer is written before we update index. */
293
    smp_wmb();
294
    trace_virtqueue_flush(vq, count);
295
    old = vq->used_idx;
M
Michael S. Tsirkin 已提交
296 297
    new = old + count;
    vring_used_idx_set(vq, new);
A
aliguori 已提交
298
    vq->inuse -= count;
M
Michael S. Tsirkin 已提交
299 300
    if (unlikely((int16_t)(new - vq->signalled_used) < (uint16_t)(new - old)))
        vq->signalled_used_valid = false;
A
aliguori 已提交
301 302 303 304 305 306 307 308 309 310 311 312 313 314
}

void virtqueue_push(VirtQueue *vq, const VirtQueueElement *elem,
                    unsigned int len)
{
    virtqueue_fill(vq, elem, len, 0);
    virtqueue_flush(vq, 1);
}

static int virtqueue_num_heads(VirtQueue *vq, unsigned int idx)
{
    uint16_t num_heads = vring_avail_idx(vq) - idx;

    /* Check it isn't doing very strange things with descriptor numbers. */
A
aliguori 已提交
315
    if (num_heads > vq->vring.num) {
316
        error_report("Guest moved used index from %u to %u",
317
                     idx, vq->shadow_avail_idx);
A
aliguori 已提交
318 319
        exit(1);
    }
320 321 322 323 324
    /* On success, callers read a descriptor at vq->last_avail_idx.
     * Make sure descriptor read does not bypass avail index read. */
    if (num_heads) {
        smp_rmb();
    }
A
aliguori 已提交
325 326 327 328 329 330 331 332 333 334 335 336 337

    return num_heads;
}

static unsigned int virtqueue_get_head(VirtQueue *vq, unsigned int idx)
{
    unsigned int head;

    /* Grab the next descriptor number they're advertising, and increment
     * the index we've seen. */
    head = vring_avail_ring(vq, idx % vq->vring.num);

    /* If their number is silly, that's a fatal mistake. */
A
aliguori 已提交
338
    if (head >= vq->vring.num) {
339
        error_report("Guest says index %u is available", head);
A
aliguori 已提交
340 341
        exit(1);
    }
A
aliguori 已提交
342 343 344 345

    return head;
}

346 347
static unsigned virtqueue_read_next_desc(VirtIODevice *vdev, VRingDesc *desc,
                                         hwaddr desc_pa, unsigned int max)
A
aliguori 已提交
348 349 350 351
{
    unsigned int next;

    /* If this descriptor says it doesn't chain, we're done. */
352
    if (!(desc->flags & VRING_DESC_F_NEXT)) {
353
        return max;
354
    }
A
aliguori 已提交
355 356

    /* Check they're not leading us off end of descriptors. */
357
    next = desc->next;
A
aliguori 已提交
358
    /* Make sure compiler knows to grab that: we don't want it changing! */
359
    smp_wmb();
A
aliguori 已提交
360

361
    if (next >= max) {
362
        error_report("Desc next is %u", next);
A
aliguori 已提交
363 364
        exit(1);
    }
A
aliguori 已提交
365

366
    vring_desc_read(vdev, desc, desc_pa, next);
A
aliguori 已提交
367 368 369
    return next;
}

370
void virtqueue_get_avail_bytes(VirtQueue *vq, unsigned int *in_bytes,
371 372
                               unsigned int *out_bytes,
                               unsigned max_in_bytes, unsigned max_out_bytes)
A
aliguori 已提交
373
{
374
    unsigned int idx;
375
    unsigned int total_bufs, in_total, out_total;
A
aliguori 已提交
376 377 378

    idx = vq->last_avail_idx;

379
    total_bufs = in_total = out_total = 0;
A
aliguori 已提交
380
    while (virtqueue_num_heads(vq, idx)) {
381
        VirtIODevice *vdev = vq->vdev;
382
        unsigned int max, num_bufs, indirect = 0;
383
        VRingDesc desc;
A
Avi Kivity 已提交
384
        hwaddr desc_pa;
A
aliguori 已提交
385 386
        int i;

387 388
        max = vq->vring.num;
        num_bufs = total_bufs;
A
aliguori 已提交
389
        i = virtqueue_get_head(vq, idx++);
390
        desc_pa = vq->vring.desc;
391
        vring_desc_read(vdev, &desc, desc_pa, i);
392

393 394
        if (desc.flags & VRING_DESC_F_INDIRECT) {
            if (desc.len % sizeof(VRingDesc)) {
395
                error_report("Invalid size for indirect buffer table");
396 397 398 399 400
                exit(1);
            }

            /* If we've got too many, that implies a descriptor loop. */
            if (num_bufs >= max) {
401
                error_report("Looped descriptor");
402 403 404 405 406
                exit(1);
            }

            /* loop over the indirect descriptor table */
            indirect = 1;
407 408
            max = desc.len / sizeof(VRingDesc);
            desc_pa = desc.addr;
409
            num_bufs = i = 0;
410
            vring_desc_read(vdev, &desc, desc_pa, i);
411 412
        }

A
aliguori 已提交
413 414
        do {
            /* If we've got too many, that implies a descriptor loop. */
415
            if (++num_bufs > max) {
416
                error_report("Looped descriptor");
A
aliguori 已提交
417 418
                exit(1);
            }
A
aliguori 已提交
419

420 421
            if (desc.flags & VRING_DESC_F_WRITE) {
                in_total += desc.len;
A
aliguori 已提交
422
            } else {
423
                out_total += desc.len;
A
aliguori 已提交
424
            }
425 426 427
            if (in_total >= max_in_bytes && out_total >= max_out_bytes) {
                goto done;
            }
428
        } while ((i = virtqueue_read_next_desc(vdev, &desc, desc_pa, max)) != max);
429 430 431 432 433

        if (!indirect)
            total_bufs = num_bufs;
        else
            total_bufs++;
A
aliguori 已提交
434
    }
435
done:
436 437 438 439 440 441 442
    if (in_bytes) {
        *in_bytes = in_total;
    }
    if (out_bytes) {
        *out_bytes = out_total;
    }
}
A
aliguori 已提交
443

444 445 446 447 448
int virtqueue_avail_bytes(VirtQueue *vq, unsigned int in_bytes,
                          unsigned int out_bytes)
{
    unsigned int in_total, out_total;

449 450
    virtqueue_get_avail_bytes(vq, &in_total, &out_total, in_bytes, out_bytes);
    return in_bytes <= in_total && out_bytes <= out_total;
A
aliguori 已提交
451 452
}

453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478
static void virtqueue_map_desc(unsigned int *p_num_sg, hwaddr *addr, struct iovec *iov,
                               unsigned int max_num_sg, bool is_write,
                               hwaddr pa, size_t sz)
{
    unsigned num_sg = *p_num_sg;
    assert(num_sg <= max_num_sg);

    while (sz) {
        hwaddr len = sz;

        if (num_sg == max_num_sg) {
            error_report("virtio: too many write descriptors in indirect table");
            exit(1);
        }

        iov[num_sg].iov_base = cpu_physical_memory_map(pa, &len, is_write);
        iov[num_sg].iov_len = len;
        addr[num_sg] = pa;

        sz -= len;
        pa += len;
        num_sg++;
    }
    *p_num_sg = num_sg;
}

M
Michael S. Tsirkin 已提交
479 480 481
static void virtqueue_map_iovec(struct iovec *sg, hwaddr *addr,
                                unsigned int *num_sg, unsigned int max_size,
                                int is_write)
K
Kevin Wolf 已提交
482 483
{
    unsigned int i;
A
Avi Kivity 已提交
484
    hwaddr len;
K
Kevin Wolf 已提交
485

M
Michael S. Tsirkin 已提交
486 487 488 489 490 491 492 493 494 495 496 497 498
    /* Note: this function MUST validate input, some callers
     * are passing in num_sg values received over the network.
     */
    /* TODO: teach all callers that this can fail, and return failure instead
     * of asserting here.
     * When we do, we might be able to re-enable NDEBUG below.
     */
#ifdef NDEBUG
#error building with NDEBUG is not supported
#endif
    assert(*num_sg <= max_size);

    for (i = 0; i < *num_sg; i++) {
K
Kevin Wolf 已提交
499 500
        len = sg[i].iov_len;
        sg[i].iov_base = cpu_physical_memory_map(addr[i], &len, is_write);
M
Michael S. Tsirkin 已提交
501
        if (!sg[i].iov_base) {
M
Michael Tokarev 已提交
502
            error_report("virtio: error trying to map MMIO memory");
K
Kevin Wolf 已提交
503 504
            exit(1);
        }
505 506
        if (len != sg[i].iov_len) {
            error_report("virtio: unexpected memory split");
M
Michael S. Tsirkin 已提交
507 508
            exit(1);
        }
K
Kevin Wolf 已提交
509 510 511
    }
}

M
Michael S. Tsirkin 已提交
512 513 514
void virtqueue_map(VirtQueueElement *elem)
{
    virtqueue_map_iovec(elem->in_sg, elem->in_addr, &elem->in_num,
515
                        VIRTQUEUE_MAX_SIZE, 1);
M
Michael S. Tsirkin 已提交
516
    virtqueue_map_iovec(elem->out_sg, elem->out_addr, &elem->out_num,
517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538
                        VIRTQUEUE_MAX_SIZE, 0);
}

void *virtqueue_alloc_element(size_t sz, unsigned out_num, unsigned in_num)
{
    VirtQueueElement *elem;
    size_t in_addr_ofs = QEMU_ALIGN_UP(sz, __alignof__(elem->in_addr[0]));
    size_t out_addr_ofs = in_addr_ofs + in_num * sizeof(elem->in_addr[0]);
    size_t out_addr_end = out_addr_ofs + out_num * sizeof(elem->out_addr[0]);
    size_t in_sg_ofs = QEMU_ALIGN_UP(out_addr_end, __alignof__(elem->in_sg[0]));
    size_t out_sg_ofs = in_sg_ofs + in_num * sizeof(elem->in_sg[0]);
    size_t out_sg_end = out_sg_ofs + out_num * sizeof(elem->out_sg[0]);

    assert(sz >= sizeof(VirtQueueElement));
    elem = g_malloc(out_sg_end);
    elem->out_num = out_num;
    elem->in_num = in_num;
    elem->in_addr = (void *)elem + in_addr_ofs;
    elem->out_addr = (void *)elem + out_addr_ofs;
    elem->in_sg = (void *)elem + in_sg_ofs;
    elem->out_sg = (void *)elem + out_sg_ofs;
    return elem;
M
Michael S. Tsirkin 已提交
539 540
}

541
void *virtqueue_pop(VirtQueue *vq, size_t sz)
A
aliguori 已提交
542
{
543
    unsigned int i, head, max;
A
Avi Kivity 已提交
544
    hwaddr desc_pa = vq->vring.desc;
545
    VirtIODevice *vdev = vq->vdev;
546
    VirtQueueElement *elem;
547 548 549
    unsigned out_num, in_num;
    hwaddr addr[VIRTQUEUE_MAX_SIZE];
    struct iovec iov[VIRTQUEUE_MAX_SIZE];
550
    VRingDesc desc;
A
aliguori 已提交
551

552
    if (virtio_queue_empty(vq)) {
553 554
        return NULL;
    }
555 556 557
    /* Needed after virtio_queue_empty(), see comment in
     * virtqueue_num_heads(). */
    smp_rmb();
A
aliguori 已提交
558 559

    /* When we start there are none of either input nor output. */
560
    out_num = in_num = 0;
A
aliguori 已提交
561

562 563
    max = vq->vring.num;

A
aliguori 已提交
564
    i = head = virtqueue_get_head(vq, vq->last_avail_idx++);
565
    if (virtio_vdev_has_feature(vdev, VIRTIO_RING_F_EVENT_IDX)) {
566
        vring_set_avail_event(vq, vq->last_avail_idx);
M
Michael S. Tsirkin 已提交
567
    }
568

569 570 571
    vring_desc_read(vdev, &desc, desc_pa, i);
    if (desc.flags & VRING_DESC_F_INDIRECT) {
        if (desc.len % sizeof(VRingDesc)) {
572
            error_report("Invalid size for indirect buffer table");
573 574 575 576
            exit(1);
        }

        /* loop over the indirect descriptor table */
577 578
        max = desc.len / sizeof(VRingDesc);
        desc_pa = desc.addr;
579
        i = 0;
580
        vring_desc_read(vdev, &desc, desc_pa, i);
581 582
    }

K
Kevin Wolf 已提交
583
    /* Collect all the descriptors */
A
aliguori 已提交
584
    do {
585
        if (desc.flags & VRING_DESC_F_WRITE) {
586
            virtqueue_map_desc(&in_num, addr + out_num, iov + out_num,
587
                               VIRTQUEUE_MAX_SIZE - out_num, true, desc.addr, desc.len);
K
Kevin Wolf 已提交
588
        } else {
589 590
            if (in_num) {
                error_report("Incorrect order for descriptors");
591 592
                exit(1);
            }
593
            virtqueue_map_desc(&out_num, addr, iov,
594
                               VIRTQUEUE_MAX_SIZE, false, desc.addr, desc.len);
K
Kevin Wolf 已提交
595
        }
A
aliguori 已提交
596 597

        /* If we've got too many, that implies a descriptor loop. */
598
        if ((in_num + out_num) > max) {
599
            error_report("Looped descriptor");
A
aliguori 已提交
600 601
            exit(1);
        }
602
    } while ((i = virtqueue_read_next_desc(vdev, &desc, desc_pa, max)) != max);
A
aliguori 已提交
603

604 605
    /* Now copy what we have collected and mapped */
    elem = virtqueue_alloc_element(sz, out_num, in_num);
A
aliguori 已提交
606
    elem->index = head;
607 608 609 610 611 612 613 614
    for (i = 0; i < out_num; i++) {
        elem->out_addr[i] = addr[i];
        elem->out_sg[i] = iov[i];
    }
    for (i = 0; i < in_num; i++) {
        elem->in_addr[i] = addr[out_num + i];
        elem->in_sg[i] = iov[out_num + i];
    }
A
aliguori 已提交
615 616 617

    vq->inuse++;

618
    trace_virtqueue_pop(vq, elem, elem->in_num, elem->out_num);
619
    return elem;
A
aliguori 已提交
620 621
}

622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638
/* Reading and writing a structure directly to QEMUFile is *awful*, but
 * it is what QEMU has always done by mistake.  We can change it sooner
 * or later by bumping the version number of the affected vm states.
 * In the meanwhile, since the in-memory layout of VirtQueueElement
 * has changed, we need to marshal to and from the layout that was
 * used before the change.
 */
typedef struct VirtQueueElementOld {
    unsigned int index;
    unsigned int out_num;
    unsigned int in_num;
    hwaddr in_addr[VIRTQUEUE_MAX_SIZE];
    hwaddr out_addr[VIRTQUEUE_MAX_SIZE];
    struct iovec in_sg[VIRTQUEUE_MAX_SIZE];
    struct iovec out_sg[VIRTQUEUE_MAX_SIZE];
} VirtQueueElementOld;

639 640
void *qemu_get_virtqueue_element(QEMUFile *f, size_t sz)
{
641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669
    VirtQueueElement *elem;
    VirtQueueElementOld data;
    int i;

    qemu_get_buffer(f, (uint8_t *)&data, sizeof(VirtQueueElementOld));

    elem = virtqueue_alloc_element(sz, data.out_num, data.in_num);
    elem->index = data.index;

    for (i = 0; i < elem->in_num; i++) {
        elem->in_addr[i] = data.in_addr[i];
    }

    for (i = 0; i < elem->out_num; i++) {
        elem->out_addr[i] = data.out_addr[i];
    }

    for (i = 0; i < elem->in_num; i++) {
        /* Base is overwritten by virtqueue_map.  */
        elem->in_sg[i].iov_base = 0;
        elem->in_sg[i].iov_len = data.in_sg[i].iov_len;
    }

    for (i = 0; i < elem->out_num; i++) {
        /* Base is overwritten by virtqueue_map.  */
        elem->out_sg[i].iov_base = 0;
        elem->out_sg[i].iov_len = data.out_sg[i].iov_len;
    }

670 671 672 673 674 675
    virtqueue_map(elem);
    return elem;
}

void qemu_put_virtqueue_element(QEMUFile *f, VirtQueueElement *elem)
{
676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702
    VirtQueueElementOld data;
    int i;

    memset(&data, 0, sizeof(data));
    data.index = elem->index;
    data.in_num = elem->in_num;
    data.out_num = elem->out_num;

    for (i = 0; i < elem->in_num; i++) {
        data.in_addr[i] = elem->in_addr[i];
    }

    for (i = 0; i < elem->out_num; i++) {
        data.out_addr[i] = elem->out_addr[i];
    }

    for (i = 0; i < elem->in_num; i++) {
        /* Base is overwritten by virtqueue_map when loading.  Do not
         * save it, as it would leak the QEMU address space layout.  */
        data.in_sg[i].iov_len = elem->in_sg[i].iov_len;
    }

    for (i = 0; i < elem->out_num; i++) {
        /* Do not save iov_base as above.  */
        data.out_sg[i].iov_len = elem->out_sg[i].iov_len;
    }
    qemu_put_buffer(f, (uint8_t *)&data, sizeof(VirtQueueElementOld));
703 704
}

A
aliguori 已提交
705
/* virtio device */
706 707
static void virtio_notify_vector(VirtIODevice *vdev, uint16_t vector)
{
K
KONRAD Frederic 已提交
708 709 710 711 712
    BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
    VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);

    if (k->notify) {
        k->notify(qbus->parent, vector);
713 714
    }
}
A
aliguori 已提交
715

P
Paul Brook 已提交
716
void virtio_update_irq(VirtIODevice *vdev)
A
aliguori 已提交
717
{
718
    virtio_notify_vector(vdev, VIRTIO_NO_VECTOR);
A
aliguori 已提交
719 720
}

721 722 723 724 725 726 727 728 729 730 731 732
static int virtio_validate_features(VirtIODevice *vdev)
{
    VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);

    if (k->validate_features) {
        return k->validate_features(vdev);
    } else {
        return 0;
    }
}

int virtio_set_status(VirtIODevice *vdev, uint8_t val)
733
{
734
    VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
735 736
    trace_virtio_set_status(vdev, val);

737
    if (virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1)) {
738 739 740 741 742 743 744 745 746
        if (!(vdev->status & VIRTIO_CONFIG_S_FEATURES_OK) &&
            val & VIRTIO_CONFIG_S_FEATURES_OK) {
            int ret = virtio_validate_features(vdev);

            if (ret) {
                return ret;
            }
        }
    }
747 748
    if (k->set_status) {
        k->set_status(vdev, val);
749 750
    }
    vdev->status = val;
751
    return 0;
752 753
}

754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774
bool target_words_bigendian(void);
static enum virtio_device_endian virtio_default_endian(void)
{
    if (target_words_bigendian()) {
        return VIRTIO_DEVICE_ENDIAN_BIG;
    } else {
        return VIRTIO_DEVICE_ENDIAN_LITTLE;
    }
}

static enum virtio_device_endian virtio_current_cpu_endian(void)
{
    CPUClass *cc = CPU_GET_CLASS(current_cpu);

    if (cc->virtio_is_big_endian(current_cpu)) {
        return VIRTIO_DEVICE_ENDIAN_BIG;
    } else {
        return VIRTIO_DEVICE_ENDIAN_LITTLE;
    }
}

P
Paul Brook 已提交
775
void virtio_reset(void *opaque)
A
aliguori 已提交
776 777
{
    VirtIODevice *vdev = opaque;
778
    VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
A
aliguori 已提交
779 780
    int i;

781
    virtio_set_status(vdev, 0);
782 783 784 785 786 787 788
    if (current_cpu) {
        /* Guest initiated reset */
        vdev->device_endian = virtio_current_cpu_endian();
    } else {
        /* System reset */
        vdev->device_endian = virtio_default_endian();
    }
789

790 791 792
    if (k->reset) {
        k->reset(vdev);
    }
A
aliguori 已提交
793

794
    vdev->guest_features = 0;
A
aliguori 已提交
795 796 797
    vdev->queue_sel = 0;
    vdev->status = 0;
    vdev->isr = 0;
798 799
    vdev->config_vector = VIRTIO_NO_VECTOR;
    virtio_notify_vector(vdev, vdev->config_vector);
A
aliguori 已提交
800

801
    for(i = 0; i < VIRTIO_QUEUE_MAX; i++) {
A
aliguori 已提交
802 803 804 805
        vdev->vq[i].vring.desc = 0;
        vdev->vq[i].vring.avail = 0;
        vdev->vq[i].vring.used = 0;
        vdev->vq[i].last_avail_idx = 0;
806
        vdev->vq[i].shadow_avail_idx = 0;
807
        vdev->vq[i].used_idx = 0;
808
        virtio_queue_set_vector(vdev, i, VIRTIO_NO_VECTOR);
M
Michael S. Tsirkin 已提交
809 810 811
        vdev->vq[i].signalled_used = 0;
        vdev->vq[i].signalled_used_valid = false;
        vdev->vq[i].notification = true;
C
Cornelia Huck 已提交
812
        vdev->vq[i].vring.num = vdev->vq[i].vring.num_default;
A
aliguori 已提交
813 814 815
    }
}

P
Paul Brook 已提交
816
uint32_t virtio_config_readb(VirtIODevice *vdev, uint32_t addr)
A
aliguori 已提交
817
{
818
    VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
A
aliguori 已提交
819 820
    uint8_t val;

821
    if (addr + sizeof(val) > vdev->config_len) {
A
aliguori 已提交
822
        return (uint32_t)-1;
823 824 825
    }

    k->get_config(vdev, vdev->config);
A
aliguori 已提交
826

827
    val = ldub_p(vdev->config + addr);
A
aliguori 已提交
828 829 830
    return val;
}

P
Paul Brook 已提交
831
uint32_t virtio_config_readw(VirtIODevice *vdev, uint32_t addr)
A
aliguori 已提交
832
{
833
    VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
A
aliguori 已提交
834 835
    uint16_t val;

836
    if (addr + sizeof(val) > vdev->config_len) {
A
aliguori 已提交
837
        return (uint32_t)-1;
838 839 840
    }

    k->get_config(vdev, vdev->config);
A
aliguori 已提交
841

842
    val = lduw_p(vdev->config + addr);
A
aliguori 已提交
843 844 845
    return val;
}

P
Paul Brook 已提交
846
uint32_t virtio_config_readl(VirtIODevice *vdev, uint32_t addr)
A
aliguori 已提交
847
{
848
    VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
A
aliguori 已提交
849 850
    uint32_t val;

851
    if (addr + sizeof(val) > vdev->config_len) {
A
aliguori 已提交
852
        return (uint32_t)-1;
853 854 855
    }

    k->get_config(vdev, vdev->config);
A
aliguori 已提交
856

857
    val = ldl_p(vdev->config + addr);
A
aliguori 已提交
858 859 860
    return val;
}

P
Paul Brook 已提交
861
void virtio_config_writeb(VirtIODevice *vdev, uint32_t addr, uint32_t data)
A
aliguori 已提交
862
{
863
    VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
A
aliguori 已提交
864 865
    uint8_t val = data;

866
    if (addr + sizeof(val) > vdev->config_len) {
A
aliguori 已提交
867
        return;
868
    }
A
aliguori 已提交
869

870
    stb_p(vdev->config + addr, val);
A
aliguori 已提交
871

872 873 874
    if (k->set_config) {
        k->set_config(vdev, vdev->config);
    }
A
aliguori 已提交
875 876
}

P
Paul Brook 已提交
877
void virtio_config_writew(VirtIODevice *vdev, uint32_t addr, uint32_t data)
A
aliguori 已提交
878
{
879
    VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
A
aliguori 已提交
880 881
    uint16_t val = data;

882
    if (addr + sizeof(val) > vdev->config_len) {
A
aliguori 已提交
883
        return;
884
    }
A
aliguori 已提交
885

886
    stw_p(vdev->config + addr, val);
A
aliguori 已提交
887

888 889 890
    if (k->set_config) {
        k->set_config(vdev, vdev->config);
    }
A
aliguori 已提交
891 892
}

P
Paul Brook 已提交
893
void virtio_config_writel(VirtIODevice *vdev, uint32_t addr, uint32_t data)
A
aliguori 已提交
894
{
895
    VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
A
aliguori 已提交
896 897
    uint32_t val = data;

898
    if (addr + sizeof(val) > vdev->config_len) {
A
aliguori 已提交
899
        return;
900
    }
A
aliguori 已提交
901

902
    stl_p(vdev->config + addr, val);
A
aliguori 已提交
903

904 905 906
    if (k->set_config) {
        k->set_config(vdev, vdev->config);
    }
A
aliguori 已提交
907 908
}

909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004
uint32_t virtio_config_modern_readb(VirtIODevice *vdev, uint32_t addr)
{
    VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
    uint8_t val;

    if (addr + sizeof(val) > vdev->config_len) {
        return (uint32_t)-1;
    }

    k->get_config(vdev, vdev->config);

    val = ldub_p(vdev->config + addr);
    return val;
}

uint32_t virtio_config_modern_readw(VirtIODevice *vdev, uint32_t addr)
{
    VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
    uint16_t val;

    if (addr + sizeof(val) > vdev->config_len) {
        return (uint32_t)-1;
    }

    k->get_config(vdev, vdev->config);

    val = lduw_le_p(vdev->config + addr);
    return val;
}

uint32_t virtio_config_modern_readl(VirtIODevice *vdev, uint32_t addr)
{
    VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
    uint32_t val;

    if (addr + sizeof(val) > vdev->config_len) {
        return (uint32_t)-1;
    }

    k->get_config(vdev, vdev->config);

    val = ldl_le_p(vdev->config + addr);
    return val;
}

void virtio_config_modern_writeb(VirtIODevice *vdev,
                                 uint32_t addr, uint32_t data)
{
    VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
    uint8_t val = data;

    if (addr + sizeof(val) > vdev->config_len) {
        return;
    }

    stb_p(vdev->config + addr, val);

    if (k->set_config) {
        k->set_config(vdev, vdev->config);
    }
}

void virtio_config_modern_writew(VirtIODevice *vdev,
                                 uint32_t addr, uint32_t data)
{
    VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
    uint16_t val = data;

    if (addr + sizeof(val) > vdev->config_len) {
        return;
    }

    stw_le_p(vdev->config + addr, val);

    if (k->set_config) {
        k->set_config(vdev, vdev->config);
    }
}

void virtio_config_modern_writel(VirtIODevice *vdev,
                                 uint32_t addr, uint32_t data)
{
    VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
    uint32_t val = data;

    if (addr + sizeof(val) > vdev->config_len) {
        return;
    }

    stl_le_p(vdev->config + addr, val);

    if (k->set_config) {
        k->set_config(vdev, vdev->config);
    }
}

A
Avi Kivity 已提交
1005
void virtio_queue_set_addr(VirtIODevice *vdev, int n, hwaddr addr)
A
aliguori 已提交
1006
{
1007 1008
    vdev->vq[n].vring.desc = addr;
    virtio_queue_update_rings(vdev, n);
P
Paul Brook 已提交
1009 1010
}

A
Avi Kivity 已提交
1011
hwaddr virtio_queue_get_addr(VirtIODevice *vdev, int n)
P
Paul Brook 已提交
1012
{
1013 1014 1015 1016 1017 1018 1019 1020 1021
    return vdev->vq[n].vring.desc;
}

void virtio_queue_set_rings(VirtIODevice *vdev, int n, hwaddr desc,
                            hwaddr avail, hwaddr used)
{
    vdev->vq[n].vring.desc = desc;
    vdev->vq[n].vring.avail = avail;
    vdev->vq[n].vring.used = used;
P
Paul Brook 已提交
1022 1023
}

1024 1025
void virtio_queue_set_num(VirtIODevice *vdev, int n, int num)
{
1026 1027 1028 1029 1030 1031 1032
    /* Don't allow guest to flip queue between existent and
     * nonexistent states, or to set it to an invalid size.
     */
    if (!!num != !!vdev->vq[n].vring.num ||
        num > VIRTQUEUE_MAX_SIZE ||
        num < 0) {
        return;
1033
    }
1034
    vdev->vq[n].vring.num = num;
1035 1036
}

1037 1038 1039 1040 1041 1042 1043 1044 1045 1046
VirtQueue *virtio_vector_first_queue(VirtIODevice *vdev, uint16_t vector)
{
    return QLIST_FIRST(&vdev->vector_queues[vector]);
}

VirtQueue *virtio_vector_next_queue(VirtQueue *vq)
{
    return QLIST_NEXT(vq, node);
}

P
Paul Brook 已提交
1047 1048 1049 1050
int virtio_queue_get_num(VirtIODevice *vdev, int n)
{
    return vdev->vq[n].vring.num;
}
A
aliguori 已提交
1051

1052 1053 1054 1055
int virtio_get_num_queues(VirtIODevice *vdev)
{
    int i;

1056
    for (i = 0; i < VIRTIO_QUEUE_MAX; i++) {
1057 1058 1059 1060 1061 1062 1063 1064
        if (!virtio_queue_get_num(vdev, i)) {
            break;
        }
    }

    return i;
}

P
Paolo Bonzini 已提交
1065 1066 1067
int virtio_queue_get_id(VirtQueue *vq)
{
    VirtIODevice *vdev = vq->vdev;
1068
    assert(vq >= &vdev->vq[0] && vq < &vdev->vq[VIRTIO_QUEUE_MAX]);
P
Paolo Bonzini 已提交
1069 1070 1071
    return vq - &vdev->vq[0];
}

1072 1073 1074 1075 1076
void virtio_queue_set_align(VirtIODevice *vdev, int n, int align)
{
    BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
    VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);

1077
    /* virtio-1 compliant devices cannot change the alignment */
1078
    if (virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1)) {
1079 1080 1081
        error_report("tried to modify queue alignment for virtio-1 device");
        return;
    }
1082 1083 1084 1085 1086 1087 1088
    /* Check that the transport told us it was going to do this
     * (so a buggy transport will immediately assert rather than
     * silently failing to migrate this state)
     */
    assert(k->has_variable_vring_alignment);

    vdev->vq[n].vring.align = align;
1089
    virtio_queue_update_rings(vdev, n);
1090 1091
}

M
Michael S. Tsirkin 已提交
1092 1093 1094 1095 1096 1097 1098 1099 1100 1101
static void virtio_queue_notify_aio_vq(VirtQueue *vq)
{
    if (vq->vring.desc && vq->handle_aio_output) {
        VirtIODevice *vdev = vq->vdev;

        trace_virtio_queue_notify(vdev, vq - vdev->vq, vq);
        vq->handle_aio_output(vdev, vq);
    }
}

1102
static void virtio_queue_notify_vq(VirtQueue *vq)
1103
{
1104
    if (vq->vring.desc && vq->handle_output) {
1105
        VirtIODevice *vdev = vq->vdev;
1106

1107 1108 1109 1110 1111
        trace_virtio_queue_notify(vdev, vq - vdev->vq, vq);
        vq->handle_output(vdev, vq);
    }
}

P
Paul Brook 已提交
1112 1113
void virtio_queue_notify(VirtIODevice *vdev, int n)
{
1114
    virtio_queue_notify_vq(&vdev->vq[n]);
A
aliguori 已提交
1115 1116
}

1117 1118
uint16_t virtio_queue_vector(VirtIODevice *vdev, int n)
{
1119
    return n < VIRTIO_QUEUE_MAX ? vdev->vq[n].vector :
1120 1121 1122 1123 1124
        VIRTIO_NO_VECTOR;
}

void virtio_queue_set_vector(VirtIODevice *vdev, int n, uint16_t vector)
{
1125 1126
    VirtQueue *vq = &vdev->vq[n];

1127
    if (n < VIRTIO_QUEUE_MAX) {
1128 1129 1130 1131
        if (vdev->vector_queues &&
            vdev->vq[n].vector != VIRTIO_NO_VECTOR) {
            QLIST_REMOVE(vq, node);
        }
1132
        vdev->vq[n].vector = vector;
1133 1134 1135 1136 1137
        if (vdev->vector_queues &&
            vector != VIRTIO_NO_VECTOR) {
            QLIST_INSERT_HEAD(&vdev->vector_queues[vector], vq, node);
        }
    }
1138 1139
}

A
aliguori 已提交
1140 1141 1142 1143 1144
VirtQueue *virtio_add_queue(VirtIODevice *vdev, int queue_size,
                            void (*handle_output)(VirtIODevice *, VirtQueue *))
{
    int i;

1145
    for (i = 0; i < VIRTIO_QUEUE_MAX; i++) {
A
aliguori 已提交
1146 1147 1148 1149
        if (vdev->vq[i].vring.num == 0)
            break;
    }

1150
    if (i == VIRTIO_QUEUE_MAX || queue_size > VIRTQUEUE_MAX_SIZE)
A
aliguori 已提交
1151 1152 1153
        abort();

    vdev->vq[i].vring.num = queue_size;
C
Cornelia Huck 已提交
1154
    vdev->vq[i].vring.num_default = queue_size;
1155
    vdev->vq[i].vring.align = VIRTIO_PCI_VRING_ALIGN;
A
aliguori 已提交
1156
    vdev->vq[i].handle_output = handle_output;
M
Michael S. Tsirkin 已提交
1157
    vdev->vq[i].handle_aio_output = NULL;
A
aliguori 已提交
1158 1159 1160 1161

    return &vdev->vq[i];
}

M
Michael S. Tsirkin 已提交
1162 1163 1164 1165 1166 1167 1168 1169
void virtio_set_queue_aio(VirtQueue *vq,
                          void (*handle_output)(VirtIODevice *, VirtQueue *))
{
    assert(vq->handle_output);

    vq->handle_aio_output = handle_output;
}

1170 1171
void virtio_del_queue(VirtIODevice *vdev, int n)
{
1172
    if (n < 0 || n >= VIRTIO_QUEUE_MAX) {
1173 1174 1175 1176
        abort();
    }

    vdev->vq[n].vring.num = 0;
C
Cornelia Huck 已提交
1177
    vdev->vq[n].vring.num_default = 0;
1178 1179
}

1180 1181
void virtio_irq(VirtQueue *vq)
{
1182
    trace_virtio_irq(vq);
1183 1184 1185 1186
    vq->vdev->isr |= 0x01;
    virtio_notify_vector(vq->vdev, vq->vector);
}

1187
bool virtio_should_notify(VirtIODevice *vdev, VirtQueue *vq)
M
Michael S. Tsirkin 已提交
1188 1189 1190
{
    uint16_t old, new;
    bool v;
1191 1192
    /* We need to expose used array entries before checking used event. */
    smp_mb();
1193
    /* Always notify when queue is empty (when feature acknowledge) */
1194
    if (virtio_vdev_has_feature(vdev, VIRTIO_F_NOTIFY_ON_EMPTY) &&
1195
        !vq->inuse && virtio_queue_empty(vq)) {
M
Michael S. Tsirkin 已提交
1196 1197 1198
        return true;
    }

1199
    if (!virtio_vdev_has_feature(vdev, VIRTIO_RING_F_EVENT_IDX)) {
M
Michael S. Tsirkin 已提交
1200 1201 1202 1203 1204 1205
        return !(vring_avail_flags(vq) & VRING_AVAIL_F_NO_INTERRUPT);
    }

    v = vq->signalled_used_valid;
    vq->signalled_used_valid = true;
    old = vq->signalled_used;
1206
    new = vq->signalled_used = vq->used_idx;
1207
    return !v || vring_need_event(vring_get_used_event(vq), new, old);
M
Michael S. Tsirkin 已提交
1208 1209 1210 1211
}

void virtio_notify(VirtIODevice *vdev, VirtQueue *vq)
{
1212
    if (!virtio_should_notify(vdev, vq)) {
A
aliguori 已提交
1213
        return;
M
Michael S. Tsirkin 已提交
1214
    }
A
aliguori 已提交
1215

1216
    trace_virtio_notify(vdev, vq);
A
aliguori 已提交
1217
    vdev->isr |= 0x01;
1218
    virtio_notify_vector(vdev, vq->vector);
A
aliguori 已提交
1219 1220 1221 1222
}

void virtio_notify_config(VirtIODevice *vdev)
{
1223 1224 1225
    if (!(vdev->status & VIRTIO_CONFIG_S_DRIVER_OK))
        return;

A
aliguori 已提交
1226
    vdev->isr |= 0x03;
1227
    vdev->generation++;
1228
    virtio_notify_vector(vdev, vdev->config_vector);
A
aliguori 已提交
1229 1230
}

1231 1232 1233 1234 1235
static bool virtio_device_endian_needed(void *opaque)
{
    VirtIODevice *vdev = opaque;

    assert(vdev->device_endian != VIRTIO_DEVICE_ENDIAN_UNKNOWN);
1236
    if (!virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1)) {
1237 1238 1239 1240
        return vdev->device_endian != virtio_default_endian();
    }
    /* Devices conforming to VIRTIO 1.0 or later are always LE. */
    return vdev->device_endian != VIRTIO_DEVICE_ENDIAN_LITTLE;
1241 1242
}

G
Gerd Hoffmann 已提交
1243 1244 1245 1246 1247 1248 1249
static bool virtio_64bit_features_needed(void *opaque)
{
    VirtIODevice *vdev = opaque;

    return (vdev->host_features >> 32) != 0;
}

J
Jason Wang 已提交
1250 1251 1252 1253 1254 1255 1256
static bool virtio_virtqueue_needed(void *opaque)
{
    VirtIODevice *vdev = opaque;

    return virtio_host_has_feature(vdev, VIRTIO_F_VERSION_1);
}

C
Cornelia Huck 已提交
1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269
static bool virtio_ringsize_needed(void *opaque)
{
    VirtIODevice *vdev = opaque;
    int i;

    for (i = 0; i < VIRTIO_QUEUE_MAX; i++) {
        if (vdev->vq[i].vring.num != vdev->vq[i].vring.num_default) {
            return true;
        }
    }
    return false;
}

1270 1271 1272 1273 1274 1275 1276 1277 1278 1279
static bool virtio_extra_state_needed(void *opaque)
{
    VirtIODevice *vdev = opaque;
    BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
    VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);

    return k->has_extra_state &&
        k->has_extra_state(qbus->parent);
}

1280
static const VMStateDescription vmstate_virtqueue = {
J
Jason Wang 已提交
1281
    .name = "virtqueue_state",
1282 1283 1284 1285 1286 1287 1288
    .version_id = 1,
    .minimum_version_id = 1,
    .fields = (VMStateField[]) {
        VMSTATE_UINT64(vring.avail, struct VirtQueue),
        VMSTATE_UINT64(vring.used, struct VirtQueue),
        VMSTATE_END_OF_LIST()
    }
J
Jason Wang 已提交
1289 1290 1291 1292 1293 1294 1295 1296
};

static const VMStateDescription vmstate_virtio_virtqueues = {
    .name = "virtio/virtqueues",
    .version_id = 1,
    .minimum_version_id = 1,
    .needed = &virtio_virtqueue_needed,
    .fields = (VMStateField[]) {
D
Dr. David Alan Gilbert 已提交
1297 1298
        VMSTATE_STRUCT_VARRAY_POINTER_KNOWN(vq, struct VirtIODevice,
                      VIRTIO_QUEUE_MAX, 0, vmstate_virtqueue, VirtQueue),
J
Jason Wang 已提交
1299 1300 1301 1302
        VMSTATE_END_OF_LIST()
    }
};

1303
static const VMStateDescription vmstate_ringsize = {
C
Cornelia Huck 已提交
1304
    .name = "ringsize_state",
1305 1306 1307 1308 1309 1310
    .version_id = 1,
    .minimum_version_id = 1,
    .fields = (VMStateField[]) {
        VMSTATE_UINT32(vring.num_default, struct VirtQueue),
        VMSTATE_END_OF_LIST()
    }
C
Cornelia Huck 已提交
1311 1312 1313 1314 1315 1316 1317 1318
};

static const VMStateDescription vmstate_virtio_ringsize = {
    .name = "virtio/ringsize",
    .version_id = 1,
    .minimum_version_id = 1,
    .needed = &virtio_ringsize_needed,
    .fields = (VMStateField[]) {
D
Dr. David Alan Gilbert 已提交
1319 1320
        VMSTATE_STRUCT_VARRAY_POINTER_KNOWN(vq, struct VirtIODevice,
                      VIRTIO_QUEUE_MAX, 0, vmstate_ringsize, VirtQueue),
C
Cornelia Huck 已提交
1321 1322 1323 1324
        VMSTATE_END_OF_LIST()
    }
};

1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371
static int get_extra_state(QEMUFile *f, void *pv, size_t size)
{
    VirtIODevice *vdev = pv;
    BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
    VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);

    if (!k->load_extra_state) {
        return -1;
    } else {
        return k->load_extra_state(qbus->parent, f);
    }
}

static void put_extra_state(QEMUFile *f, void *pv, size_t size)
{
    VirtIODevice *vdev = pv;
    BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
    VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);

    k->save_extra_state(qbus->parent, f);
}

static const VMStateInfo vmstate_info_extra_state = {
    .name = "virtqueue_extra_state",
    .get = get_extra_state,
    .put = put_extra_state,
};

static const VMStateDescription vmstate_virtio_extra_state = {
    .name = "virtio/extra_state",
    .version_id = 1,
    .minimum_version_id = 1,
    .needed = &virtio_extra_state_needed,
    .fields = (VMStateField[]) {
        {
            .name         = "extra_state",
            .version_id   = 0,
            .field_exists = NULL,
            .size         = 0,
            .info         = &vmstate_info_extra_state,
            .flags        = VMS_SINGLE,
            .offset       = 0,
        },
        VMSTATE_END_OF_LIST()
    }
};

1372 1373 1374 1375
static const VMStateDescription vmstate_virtio_device_endian = {
    .name = "virtio/device_endian",
    .version_id = 1,
    .minimum_version_id = 1,
1376
    .needed = &virtio_device_endian_needed,
1377 1378 1379 1380 1381 1382
    .fields = (VMStateField[]) {
        VMSTATE_UINT8(device_endian, VirtIODevice),
        VMSTATE_END_OF_LIST()
    }
};

G
Gerd Hoffmann 已提交
1383 1384 1385 1386
static const VMStateDescription vmstate_virtio_64bit_features = {
    .name = "virtio/64bit_features",
    .version_id = 1,
    .minimum_version_id = 1,
1387
    .needed = &virtio_64bit_features_needed,
G
Gerd Hoffmann 已提交
1388 1389 1390 1391 1392 1393
    .fields = (VMStateField[]) {
        VMSTATE_UINT64(guest_features, VirtIODevice),
        VMSTATE_END_OF_LIST()
    }
};

1394 1395 1396 1397 1398 1399 1400
static const VMStateDescription vmstate_virtio = {
    .name = "virtio",
    .version_id = 1,
    .minimum_version_id = 1,
    .minimum_version_id_old = 1,
    .fields = (VMStateField[]) {
        VMSTATE_END_OF_LIST()
1401
    },
1402 1403 1404
    .subsections = (const VMStateDescription*[]) {
        &vmstate_virtio_device_endian,
        &vmstate_virtio_64bit_features,
J
Jason Wang 已提交
1405
        &vmstate_virtio_virtqueues,
C
Cornelia Huck 已提交
1406
        &vmstate_virtio_ringsize,
1407
        &vmstate_virtio_extra_state,
1408
        NULL
1409 1410 1411
    }
};

A
aliguori 已提交
1412 1413
void virtio_save(VirtIODevice *vdev, QEMUFile *f)
{
K
KONRAD Frederic 已提交
1414 1415
    BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
    VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
1416
    VirtioDeviceClass *vdc = VIRTIO_DEVICE_GET_CLASS(vdev);
G
Gerd Hoffmann 已提交
1417
    uint32_t guest_features_lo = (vdev->guest_features & 0xffffffff);
A
aliguori 已提交
1418 1419
    int i;

K
KONRAD Frederic 已提交
1420 1421 1422
    if (k->save_config) {
        k->save_config(qbus->parent, f);
    }
A
aliguori 已提交
1423 1424 1425 1426

    qemu_put_8s(f, &vdev->status);
    qemu_put_8s(f, &vdev->isr);
    qemu_put_be16s(f, &vdev->queue_sel);
G
Gerd Hoffmann 已提交
1427
    qemu_put_be32s(f, &guest_features_lo);
A
aliguori 已提交
1428 1429 1430
    qemu_put_be32(f, vdev->config_len);
    qemu_put_buffer(f, vdev->config, vdev->config_len);

1431
    for (i = 0; i < VIRTIO_QUEUE_MAX; i++) {
A
aliguori 已提交
1432 1433 1434 1435 1436 1437
        if (vdev->vq[i].vring.num == 0)
            break;
    }

    qemu_put_be32(f, i);

1438
    for (i = 0; i < VIRTIO_QUEUE_MAX; i++) {
A
aliguori 已提交
1439 1440 1441 1442
        if (vdev->vq[i].vring.num == 0)
            break;

        qemu_put_be32(f, vdev->vq[i].vring.num);
1443 1444 1445
        if (k->has_variable_vring_alignment) {
            qemu_put_be32(f, vdev->vq[i].vring.align);
        }
1446 1447
        /* XXX virtio-1 devices */
        qemu_put_be64(f, vdev->vq[i].vring.desc);
A
aliguori 已提交
1448
        qemu_put_be16s(f, &vdev->vq[i].last_avail_idx);
K
KONRAD Frederic 已提交
1449 1450 1451
        if (k->save_queue) {
            k->save_queue(qbus->parent, i, f);
        }
A
aliguori 已提交
1452
    }
1453 1454 1455 1456

    if (vdc->save != NULL) {
        vdc->save(vdev, f);
    }
1457 1458

    /* Subsections */
1459
    vmstate_save_state(f, &vmstate_virtio, vdev, NULL);
A
aliguori 已提交
1460 1461
}

1462
static int virtio_set_features_nocheck(VirtIODevice *vdev, uint64_t val)
1463
{
1464
    VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
C
Cornelia Huck 已提交
1465
    bool bad = (val & ~(vdev->host_features)) != 0;
1466

C
Cornelia Huck 已提交
1467
    val &= vdev->host_features;
1468 1469
    if (k->set_features) {
        k->set_features(vdev, val);
1470 1471 1472 1473 1474
    }
    vdev->guest_features = val;
    return bad ? -1 : 0;
}

1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486
int virtio_set_features(VirtIODevice *vdev, uint64_t val)
{
   /*
     * The driver must not attempt to set features after feature negotiation
     * has finished.
     */
    if (vdev->status & VIRTIO_CONFIG_S_FEATURES_OK) {
        return -EINVAL;
    }
    return virtio_set_features_nocheck(vdev, val);
}

1487
int virtio_load(VirtIODevice *vdev, QEMUFile *f, int version_id)
A
aliguori 已提交
1488
{
1489
    int i, ret;
1490
    int32_t config_len;
1491
    uint32_t num;
1492
    uint32_t features;
K
KONRAD Frederic 已提交
1493 1494
    BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
    VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
1495
    VirtioDeviceClass *vdc = VIRTIO_DEVICE_GET_CLASS(vdev);
A
aliguori 已提交
1496

1497 1498 1499 1500 1501 1502
    /*
     * We poison the endianness to ensure it does not get used before
     * subsections have been loaded.
     */
    vdev->device_endian = VIRTIO_DEVICE_ENDIAN_UNKNOWN;

K
KONRAD Frederic 已提交
1503 1504
    if (k->load_config) {
        ret = k->load_config(qbus->parent, f);
1505 1506 1507
        if (ret)
            return ret;
    }
A
aliguori 已提交
1508 1509 1510 1511

    qemu_get_8s(f, &vdev->status);
    qemu_get_8s(f, &vdev->isr);
    qemu_get_be16s(f, &vdev->queue_sel);
1512
    if (vdev->queue_sel >= VIRTIO_QUEUE_MAX) {
1513 1514
        return -1;
    }
1515
    qemu_get_be32s(f, &features);
1516

1517
    config_len = qemu_get_be32(f);
1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528

    /*
     * There are cases where the incoming config can be bigger or smaller
     * than what we have; so load what we have space for, and skip
     * any excess that's in the stream.
     */
    qemu_get_buffer(f, vdev->config, MIN(config_len, vdev->config_len));

    while (config_len > vdev->config_len) {
        qemu_get_byte(f);
        config_len--;
1529
    }
A
aliguori 已提交
1530 1531 1532

    num = qemu_get_be32(f);

1533
    if (num > VIRTIO_QUEUE_MAX) {
1534
        error_report("Invalid number of virtqueues: 0x%x", num);
1535 1536 1537
        return -1;
    }

A
aliguori 已提交
1538 1539
    for (i = 0; i < num; i++) {
        vdev->vq[i].vring.num = qemu_get_be32(f);
1540 1541 1542
        if (k->has_variable_vring_alignment) {
            vdev->vq[i].vring.align = qemu_get_be32(f);
        }
1543
        vdev->vq[i].vring.desc = qemu_get_be64(f);
A
aliguori 已提交
1544
        qemu_get_be16s(f, &vdev->vq[i].last_avail_idx);
M
Michael S. Tsirkin 已提交
1545 1546
        vdev->vq[i].signalled_used_valid = false;
        vdev->vq[i].notification = true;
A
aliguori 已提交
1547

1548 1549 1550
        if (vdev->vq[i].vring.desc) {
            /* XXX virtio-1 devices */
            virtio_queue_update_rings(vdev, i);
M
Michael S. Tsirkin 已提交
1551 1552
        } else if (vdev->vq[i].last_avail_idx) {
            error_report("VQ %d address 0x0 "
1553
                         "inconsistent with Host index 0x%x",
M
Michael S. Tsirkin 已提交
1554 1555
                         i, vdev->vq[i].last_avail_idx);
                return -1;
1556
	}
K
KONRAD Frederic 已提交
1557 1558
        if (k->load_queue) {
            ret = k->load_queue(qbus->parent, i, f);
1559 1560
            if (ret)
                return ret;
1561
        }
A
aliguori 已提交
1562 1563
    }

1564
    virtio_notify_vector(vdev, VIRTIO_NO_VECTOR);
1565 1566

    if (vdc->load != NULL) {
1567 1568 1569 1570
        ret = vdc->load(vdev, f, version_id);
        if (ret) {
            return ret;
        }
1571 1572
    }

1573 1574 1575 1576 1577 1578 1579 1580 1581 1582
    /* Subsections */
    ret = vmstate_load_state(f, &vmstate_virtio, vdev, 1);
    if (ret) {
        return ret;
    }

    if (vdev->device_endian == VIRTIO_DEVICE_ENDIAN_UNKNOWN) {
        vdev->device_endian = virtio_default_endian();
    }

G
Gerd Hoffmann 已提交
1583 1584 1585 1586 1587 1588 1589
    if (virtio_64bit_features_needed(vdev)) {
        /*
         * Subsection load filled vdev->guest_features.  Run them
         * through virtio_set_features to sanity-check them against
         * host_features.
         */
        uint64_t features64 = vdev->guest_features;
1590
        if (virtio_set_features_nocheck(vdev, features64) < 0) {
G
Gerd Hoffmann 已提交
1591 1592 1593 1594 1595 1596
            error_report("Features 0x%" PRIx64 " unsupported. "
                         "Allowed features: 0x%" PRIx64,
                         features64, vdev->host_features);
            return -1;
        }
    } else {
1597
        if (virtio_set_features_nocheck(vdev, features) < 0) {
G
Gerd Hoffmann 已提交
1598 1599 1600 1601 1602 1603 1604
            error_report("Features 0x%x unsupported. "
                         "Allowed features: 0x%" PRIx64,
                         features, vdev->host_features);
            return -1;
        }
    }

1605
    for (i = 0; i < num; i++) {
1606
        if (vdev->vq[i].vring.desc) {
1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617
            uint16_t nheads;
            nheads = vring_avail_idx(&vdev->vq[i]) - vdev->vq[i].last_avail_idx;
            /* Check it isn't doing strange things with descriptor numbers. */
            if (nheads > vdev->vq[i].vring.num) {
                error_report("VQ %d size 0x%x Guest index 0x%x "
                             "inconsistent with Host index 0x%x: delta 0x%x",
                             i, vdev->vq[i].vring.num,
                             vring_avail_idx(&vdev->vq[i]),
                             vdev->vq[i].last_avail_idx, nheads);
                return -1;
            }
1618
            vdev->vq[i].used_idx = vring_used_idx(&vdev->vq[i]);
1619
            vdev->vq[i].shadow_avail_idx = vring_avail_idx(&vdev->vq[i]);
1620 1621 1622 1623
        }
    }

    return 0;
A
aliguori 已提交
1624 1625
}

1626
void virtio_cleanup(VirtIODevice *vdev)
1627
{
1628
    qemu_del_vm_change_state_handler(vdev->vmstate);
1629
    g_free(vdev->config);
1630
    g_free(vdev->vq);
1631
    g_free(vdev->vector_queues);
1632 1633
}

1634
static void virtio_vmstate_change(void *opaque, int running, RunState state)
1635 1636
{
    VirtIODevice *vdev = opaque;
K
KONRAD Frederic 已提交
1637 1638
    BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
    VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
1639
    bool backend_run = running && (vdev->status & VIRTIO_CONFIG_S_DRIVER_OK);
1640
    vdev->vm_running = running;
1641 1642 1643 1644 1645

    if (backend_run) {
        virtio_set_status(vdev, vdev->status);
    }

K
KONRAD Frederic 已提交
1646 1647
    if (k->vmstate_change) {
        k->vmstate_change(qbus->parent, backend_run);
1648 1649 1650 1651 1652 1653 1654
    }

    if (!backend_run) {
        virtio_set_status(vdev, vdev->status);
    }
}

1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665
void virtio_instance_init_common(Object *proxy_obj, void *data,
                                 size_t vdev_size, const char *vdev_name)
{
    DeviceState *vdev = data;

    object_initialize(vdev, vdev_size, vdev_name);
    object_property_add_child(proxy_obj, "virtio-backend", OBJECT(vdev), NULL);
    object_unref(OBJECT(vdev));
    qdev_alias_all_properties(vdev, proxy_obj);
}

1666 1667
void virtio_init(VirtIODevice *vdev, const char *name,
                 uint16_t device_id, size_t config_size)
A
aliguori 已提交
1668
{
1669 1670
    BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
    VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
1671
    int i;
1672 1673 1674 1675 1676 1677 1678
    int nvectors = k->query_nvectors ? k->query_nvectors(qbus->parent) : 0;

    if (nvectors) {
        vdev->vector_queues =
            g_malloc0(sizeof(*vdev->vector_queues) * nvectors);
    }

P
Paul Brook 已提交
1679
    vdev->device_id = device_id;
A
aliguori 已提交
1680 1681 1682
    vdev->status = 0;
    vdev->isr = 0;
    vdev->queue_sel = 0;
1683
    vdev->config_vector = VIRTIO_NO_VECTOR;
1684
    vdev->vq = g_malloc0(sizeof(VirtQueue) * VIRTIO_QUEUE_MAX);
1685
    vdev->vm_running = runstate_is_running();
1686
    for (i = 0; i < VIRTIO_QUEUE_MAX; i++) {
1687
        vdev->vq[i].vector = VIRTIO_NO_VECTOR;
1688
        vdev->vq[i].vdev = vdev;
1689
        vdev->vq[i].queue_index = i;
1690
    }
A
aliguori 已提交
1691 1692 1693

    vdev->name = name;
    vdev->config_len = config_size;
1694
    if (vdev->config_len) {
1695
        vdev->config = g_malloc0(config_size);
1696
    } else {
A
aliguori 已提交
1697
        vdev->config = NULL;
1698 1699 1700
    }
    vdev->vmstate = qemu_add_vm_change_state_handler(virtio_vmstate_change,
                                                     vdev);
1701
    vdev->device_endian = virtio_default_endian();
1702
    vdev->use_guest_notifier_mask = true;
1703
}
A
aliguori 已提交
1704

A
Avi Kivity 已提交
1705
hwaddr virtio_queue_get_desc_addr(VirtIODevice *vdev, int n)
1706 1707 1708 1709
{
    return vdev->vq[n].vring.desc;
}

A
Avi Kivity 已提交
1710
hwaddr virtio_queue_get_avail_addr(VirtIODevice *vdev, int n)
1711 1712 1713 1714
{
    return vdev->vq[n].vring.avail;
}

A
Avi Kivity 已提交
1715
hwaddr virtio_queue_get_used_addr(VirtIODevice *vdev, int n)
1716 1717 1718 1719
{
    return vdev->vq[n].vring.used;
}

A
Avi Kivity 已提交
1720
hwaddr virtio_queue_get_ring_addr(VirtIODevice *vdev, int n)
1721 1722 1723 1724
{
    return vdev->vq[n].vring.desc;
}

A
Avi Kivity 已提交
1725
hwaddr virtio_queue_get_desc_size(VirtIODevice *vdev, int n)
1726 1727 1728 1729
{
    return sizeof(VRingDesc) * vdev->vq[n].vring.num;
}

A
Avi Kivity 已提交
1730
hwaddr virtio_queue_get_avail_size(VirtIODevice *vdev, int n)
1731 1732
{
    return offsetof(VRingAvail, ring) +
1733
        sizeof(uint16_t) * vdev->vq[n].vring.num;
1734 1735
}

A
Avi Kivity 已提交
1736
hwaddr virtio_queue_get_used_size(VirtIODevice *vdev, int n)
1737 1738 1739 1740 1741
{
    return offsetof(VRingUsed, ring) +
        sizeof(VRingUsedElem) * vdev->vq[n].vring.num;
}

A
Avi Kivity 已提交
1742
hwaddr virtio_queue_get_ring_size(VirtIODevice *vdev, int n)
1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755
{
    return vdev->vq[n].vring.used - vdev->vq[n].vring.desc +
	    virtio_queue_get_used_size(vdev, n);
}

uint16_t virtio_queue_get_last_avail_idx(VirtIODevice *vdev, int n)
{
    return vdev->vq[n].last_avail_idx;
}

void virtio_queue_set_last_avail_idx(VirtIODevice *vdev, int n, uint16_t idx)
{
    vdev->vq[n].last_avail_idx = idx;
1756
    vdev->vq[n].shadow_avail_idx = idx;
1757 1758
}

1759 1760 1761 1762 1763
void virtio_queue_invalidate_signalled_used(VirtIODevice *vdev, int n)
{
    vdev->vq[n].signalled_used_valid = false;
}

1764 1765 1766 1767 1768
VirtQueue *virtio_get_queue(VirtIODevice *vdev, int n)
{
    return vdev->vq + n;
}

1769 1770 1771 1772 1773
uint16_t virtio_get_queue_index(VirtQueue *vq)
{
    return vq->queue_index;
}

1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797
static void virtio_queue_guest_notifier_read(EventNotifier *n)
{
    VirtQueue *vq = container_of(n, VirtQueue, guest_notifier);
    if (event_notifier_test_and_clear(n)) {
        virtio_irq(vq);
    }
}

void virtio_queue_set_guest_notifier_fd_handler(VirtQueue *vq, bool assign,
                                                bool with_irqfd)
{
    if (assign && !with_irqfd) {
        event_notifier_set_handler(&vq->guest_notifier,
                                   virtio_queue_guest_notifier_read);
    } else {
        event_notifier_set_handler(&vq->guest_notifier, NULL);
    }
    if (!assign) {
        /* Test and clear notifier before closing it,
         * in case poll callback didn't have time to run. */
        virtio_queue_guest_notifier_read(&vq->guest_notifier);
    }
}

1798 1799 1800 1801
EventNotifier *virtio_queue_get_guest_notifier(VirtQueue *vq)
{
    return &vq->guest_notifier;
}
1802

M
Michael S. Tsirkin 已提交
1803
static void virtio_queue_host_notifier_aio_read(EventNotifier *n)
1804 1805 1806
{
    VirtQueue *vq = container_of(n, VirtQueue, host_notifier);
    if (event_notifier_test_and_clear(n)) {
M
Michael S. Tsirkin 已提交
1807
        virtio_queue_notify_aio_vq(vq);
1808 1809 1810
    }
}

1811 1812 1813 1814 1815
void virtio_queue_aio_set_host_notifier_handler(VirtQueue *vq, AioContext *ctx,
                                                bool assign, bool set_handler)
{
    if (assign && set_handler) {
        aio_set_event_notifier(ctx, &vq->host_notifier, true,
M
Michael S. Tsirkin 已提交
1816
                               virtio_queue_host_notifier_aio_read);
1817 1818 1819 1820 1821 1822
    } else {
        aio_set_event_notifier(ctx, &vq->host_notifier, true, NULL);
    }
    if (!assign) {
        /* Test and clear notifier before after disabling event,
         * in case poll callback didn't have time to run. */
M
Michael S. Tsirkin 已提交
1823 1824 1825 1826 1827 1828 1829 1830 1831
        virtio_queue_host_notifier_aio_read(&vq->host_notifier);
    }
}

static void virtio_queue_host_notifier_read(EventNotifier *n)
{
    VirtQueue *vq = container_of(n, VirtQueue, host_notifier);
    if (event_notifier_test_and_clear(n)) {
        virtio_queue_notify_vq(vq);
1832 1833 1834
    }
}

P
Paolo Bonzini 已提交
1835 1836
void virtio_queue_set_host_notifier_fd_handler(VirtQueue *vq, bool assign,
                                               bool set_handler)
1837
{
P
Paolo Bonzini 已提交
1838
    if (assign && set_handler) {
1839 1840 1841 1842
        event_notifier_set_handler(&vq->host_notifier,
                                   virtio_queue_host_notifier_read);
    } else {
        event_notifier_set_handler(&vq->host_notifier, NULL);
P
Paolo Bonzini 已提交
1843 1844
    }
    if (!assign) {
1845 1846 1847 1848 1849 1850
        /* Test and clear notifier before after disabling event,
         * in case poll callback didn't have time to run. */
        virtio_queue_host_notifier_read(&vq->host_notifier);
    }
}

1851 1852 1853 1854
EventNotifier *virtio_queue_get_host_notifier(VirtQueue *vq)
{
    return &vq->host_notifier;
}
1855

1856 1857
void virtio_device_set_child_bus_name(VirtIODevice *vdev, char *bus_name)
{
1858
    g_free(vdev->bus_name);
1859
    vdev->bus_name = g_strdup(bus_name);
1860 1861
}

1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873
static void virtio_device_realize(DeviceState *dev, Error **errp)
{
    VirtIODevice *vdev = VIRTIO_DEVICE(dev);
    VirtioDeviceClass *vdc = VIRTIO_DEVICE_GET_CLASS(dev);
    Error *err = NULL;

    if (vdc->realize != NULL) {
        vdc->realize(dev, &err);
        if (err != NULL) {
            error_propagate(errp, err);
            return;
        }
1874
    }
J
Jason Wang 已提交
1875 1876 1877 1878 1879 1880

    virtio_bus_device_plugged(vdev, &err);
    if (err != NULL) {
        error_propagate(errp, err);
        return;
    }
1881 1882
}

1883
static void virtio_device_unrealize(DeviceState *dev, Error **errp)
1884
{
1885
    VirtIODevice *vdev = VIRTIO_DEVICE(dev);
1886 1887
    VirtioDeviceClass *vdc = VIRTIO_DEVICE_GET_CLASS(dev);
    Error *err = NULL;
1888

1889 1890
    virtio_bus_device_unplugged(vdev);

1891 1892 1893 1894 1895 1896
    if (vdc->unrealize != NULL) {
        vdc->unrealize(dev, &err);
        if (err != NULL) {
            error_propagate(errp, err);
            return;
        }
1897
    }
1898

1899 1900
    g_free(vdev->bus_name);
    vdev->bus_name = NULL;
1901 1902
}

C
Cornelia Huck 已提交
1903 1904 1905 1906 1907
static Property virtio_properties[] = {
    DEFINE_VIRTIO_COMMON_FEATURES(VirtIODevice, host_features),
    DEFINE_PROP_END_OF_LIST(),
};

1908 1909 1910 1911
static void virtio_device_class_init(ObjectClass *klass, void *data)
{
    /* Set the default value here. */
    DeviceClass *dc = DEVICE_CLASS(klass);
1912 1913 1914

    dc->realize = virtio_device_realize;
    dc->unrealize = virtio_device_unrealize;
1915
    dc->bus_type = TYPE_VIRTIO_BUS;
C
Cornelia Huck 已提交
1916
    dc->props = virtio_properties;
1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933
}

static const TypeInfo virtio_device_info = {
    .name = TYPE_VIRTIO_DEVICE,
    .parent = TYPE_DEVICE,
    .instance_size = sizeof(VirtIODevice),
    .class_init = virtio_device_class_init,
    .abstract = true,
    .class_size = sizeof(VirtioDeviceClass),
};

static void virtio_register_types(void)
{
    type_register_static(&virtio_device_info);
}

type_init(virtio_register_types)