You need to sign in or sign up before continuing.
virtio.c 54.8 KB
Newer Older
A
aliguori 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13
/*
 * Virtio Support
 *
 * Copyright IBM, Corp. 2007
 *
 * Authors:
 *  Anthony Liguori   <aliguori@us.ibm.com>
 *
 * This work is licensed under the terms of the GNU GPL, version 2.  See
 * the COPYING file in the top-level directory.
 *
 */

P
Peter Maydell 已提交
14
#include "qemu/osdep.h"
15
#include "qapi/error.h"
16 17
#include "qemu-common.h"
#include "cpu.h"
18
#include "trace.h"
19
#include "exec/address-spaces.h"
20
#include "qemu/error-report.h"
P
Paolo Bonzini 已提交
21
#include "hw/virtio/virtio.h"
22
#include "qemu/atomic.h"
P
Paolo Bonzini 已提交
23
#include "hw/virtio/virtio-bus.h"
24
#include "migration/migration.h"
25
#include "hw/virtio/virtio-access.h"
A
aliguori 已提交
26

27 28 29 30 31
/*
 * The alignment to use between consumer and producer parts of vring.
 * x86 pagesize again. This is the default, used by transports like PCI
 * which don't provide a means for the guest to tell the host the alignment.
 */
32 33
#define VIRTIO_PCI_VRING_ALIGN         4096

A
aliguori 已提交
34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64
typedef struct VRingDesc
{
    uint64_t addr;
    uint32_t len;
    uint16_t flags;
    uint16_t next;
} VRingDesc;

typedef struct VRingAvail
{
    uint16_t flags;
    uint16_t idx;
    uint16_t ring[0];
} VRingAvail;

typedef struct VRingUsedElem
{
    uint32_t id;
    uint32_t len;
} VRingUsedElem;

typedef struct VRingUsed
{
    uint16_t flags;
    uint16_t idx;
    VRingUsedElem ring[0];
} VRingUsed;

typedef struct VRing
{
    unsigned int num;
C
Cornelia Huck 已提交
65
    unsigned int num_default;
66
    unsigned int align;
A
Avi Kivity 已提交
67 68 69
    hwaddr desc;
    hwaddr avail;
    hwaddr used;
A
aliguori 已提交
70 71 72 73 74
} VRing;

struct VirtQueue
{
    VRing vring;
75 76

    /* Next head to pop */
A
aliguori 已提交
77
    uint16_t last_avail_idx;
78

79 80 81
    /* Last avail_idx read from VQ. */
    uint16_t shadow_avail_idx;

82 83
    uint16_t used_idx;

M
Michael S. Tsirkin 已提交
84 85 86 87 88 89 90 91 92
    /* Last used index value we have signalled on */
    uint16_t signalled_used;

    /* Last used index value we have signalled on */
    bool signalled_used_valid;

    /* Notification enabled? */
    bool notification;

93 94
    uint16_t queue_index;

A
aliguori 已提交
95
    int inuse;
M
Michael S. Tsirkin 已提交
96

97
    uint16_t vector;
98 99
    VirtIOHandleOutput handle_output;
    VirtIOHandleOutput handle_aio_output;
100
    bool use_aio;
101 102 103
    VirtIODevice *vdev;
    EventNotifier guest_notifier;
    EventNotifier host_notifier;
104
    QLIST_ENTRY(VirtQueue) node;
A
aliguori 已提交
105 106 107
};

/* virt queue functions */
108
void virtio_queue_update_rings(VirtIODevice *vdev, int n)
A
aliguori 已提交
109
{
110
    VRing *vring = &vdev->vq[n].vring;
P
Paul Brook 已提交
111

112 113 114 115 116 117 118 119
    if (!vring->desc) {
        /* not yet setup -> nothing to do */
        return;
    }
    vring->avail = vring->desc + vring->num * sizeof(VRingDesc);
    vring->used = vring_align(vring->avail +
                              offsetof(VRingAvail, ring[vring->num]),
                              vring->align);
A
aliguori 已提交
120 121
}

122 123
static void vring_desc_read(VirtIODevice *vdev, VRingDesc *desc,
                            hwaddr desc_pa, int i)
A
aliguori 已提交
124
{
125 126 127 128 129 130
    address_space_read(&address_space_memory, desc_pa + i * sizeof(VRingDesc),
                       MEMTXATTRS_UNSPECIFIED, (void *)desc, sizeof(VRingDesc));
    virtio_tswap64s(vdev, &desc->addr);
    virtio_tswap32s(vdev, &desc->len);
    virtio_tswap16s(vdev, &desc->flags);
    virtio_tswap16s(vdev, &desc->next);
A
aliguori 已提交
131 132 133 134
}

static inline uint16_t vring_avail_flags(VirtQueue *vq)
{
A
Avi Kivity 已提交
135
    hwaddr pa;
A
aliguori 已提交
136
    pa = vq->vring.avail + offsetof(VRingAvail, flags);
137
    return virtio_lduw_phys(vq->vdev, pa);
A
aliguori 已提交
138 139 140 141
}

static inline uint16_t vring_avail_idx(VirtQueue *vq)
{
A
Avi Kivity 已提交
142
    hwaddr pa;
A
aliguori 已提交
143
    pa = vq->vring.avail + offsetof(VRingAvail, idx);
144 145
    vq->shadow_avail_idx = virtio_lduw_phys(vq->vdev, pa);
    return vq->shadow_avail_idx;
A
aliguori 已提交
146 147 148 149
}

static inline uint16_t vring_avail_ring(VirtQueue *vq, int i)
{
A
Avi Kivity 已提交
150
    hwaddr pa;
A
aliguori 已提交
151
    pa = vq->vring.avail + offsetof(VRingAvail, ring[i]);
152
    return virtio_lduw_phys(vq->vdev, pa);
A
aliguori 已提交
153 154
}

155
static inline uint16_t vring_get_used_event(VirtQueue *vq)
M
Michael S. Tsirkin 已提交
156 157 158 159
{
    return vring_avail_ring(vq, vq->vring.num);
}

160 161
static inline void vring_used_write(VirtQueue *vq, VRingUsedElem *uelem,
                                    int i)
A
aliguori 已提交
162
{
A
Avi Kivity 已提交
163
    hwaddr pa;
164 165 166 167 168
    virtio_tswap32s(vq->vdev, &uelem->id);
    virtio_tswap32s(vq->vdev, &uelem->len);
    pa = vq->vring.used + offsetof(VRingUsed, ring[i]);
    address_space_write(&address_space_memory, pa, MEMTXATTRS_UNSPECIFIED,
                       (void *)uelem, sizeof(VRingUsedElem));
A
aliguori 已提交
169 170 171 172
}

static uint16_t vring_used_idx(VirtQueue *vq)
{
A
Avi Kivity 已提交
173
    hwaddr pa;
A
aliguori 已提交
174
    pa = vq->vring.used + offsetof(VRingUsed, idx);
175
    return virtio_lduw_phys(vq->vdev, pa);
A
aliguori 已提交
176 177
}

M
Michael S. Tsirkin 已提交
178
static inline void vring_used_idx_set(VirtQueue *vq, uint16_t val)
A
aliguori 已提交
179
{
A
Avi Kivity 已提交
180
    hwaddr pa;
A
aliguori 已提交
181
    pa = vq->vring.used + offsetof(VRingUsed, idx);
182
    virtio_stw_phys(vq->vdev, pa, val);
183
    vq->used_idx = val;
A
aliguori 已提交
184 185 186 187
}

static inline void vring_used_flags_set_bit(VirtQueue *vq, int mask)
{
188
    VirtIODevice *vdev = vq->vdev;
A
Avi Kivity 已提交
189
    hwaddr pa;
A
aliguori 已提交
190
    pa = vq->vring.used + offsetof(VRingUsed, flags);
191
    virtio_stw_phys(vdev, pa, virtio_lduw_phys(vdev, pa) | mask);
A
aliguori 已提交
192 193 194 195
}

static inline void vring_used_flags_unset_bit(VirtQueue *vq, int mask)
{
196
    VirtIODevice *vdev = vq->vdev;
A
Avi Kivity 已提交
197
    hwaddr pa;
A
aliguori 已提交
198
    pa = vq->vring.used + offsetof(VRingUsed, flags);
199
    virtio_stw_phys(vdev, pa, virtio_lduw_phys(vdev, pa) & ~mask);
A
aliguori 已提交
200 201
}

202
static inline void vring_set_avail_event(VirtQueue *vq, uint16_t val)
M
Michael S. Tsirkin 已提交
203
{
A
Avi Kivity 已提交
204
    hwaddr pa;
M
Michael S. Tsirkin 已提交
205 206 207 208
    if (!vq->notification) {
        return;
    }
    pa = vq->vring.used + offsetof(VRingUsed, ring[vq->vring.num]);
209
    virtio_stw_phys(vq->vdev, pa, val);
M
Michael S. Tsirkin 已提交
210 211
}

A
aliguori 已提交
212 213
void virtio_queue_set_notification(VirtQueue *vq, int enable)
{
M
Michael S. Tsirkin 已提交
214
    vq->notification = enable;
215
    if (virtio_vdev_has_feature(vq->vdev, VIRTIO_RING_F_EVENT_IDX)) {
216
        vring_set_avail_event(vq, vring_avail_idx(vq));
M
Michael S. Tsirkin 已提交
217
    } else if (enable) {
A
aliguori 已提交
218
        vring_used_flags_unset_bit(vq, VRING_USED_F_NO_NOTIFY);
M
Michael S. Tsirkin 已提交
219
    } else {
A
aliguori 已提交
220
        vring_used_flags_set_bit(vq, VRING_USED_F_NO_NOTIFY);
M
Michael S. Tsirkin 已提交
221
    }
222 223 224 225
    if (enable) {
        /* Expose avail event/used flags before caller checks the avail idx. */
        smp_mb();
    }
A
aliguori 已提交
226 227 228 229 230 231 232
}

int virtio_queue_ready(VirtQueue *vq)
{
    return vq->vring.avail != 0;
}

233 234
/* Fetch avail_idx from VQ memory only when we really need to know if
 * guest has added some buffers. */
A
aliguori 已提交
235 236
int virtio_queue_empty(VirtQueue *vq)
{
237 238 239 240
    if (vq->shadow_avail_idx != vq->last_avail_idx) {
        return 0;
    }

A
aliguori 已提交
241 242 243
    return vring_avail_idx(vq) == vq->last_avail_idx;
}

244 245
static void virtqueue_unmap_sg(VirtQueue *vq, const VirtQueueElement *elem,
                               unsigned int len)
A
aliguori 已提交
246 247 248 249 250 251 252 253
{
    unsigned int offset;
    int i;

    offset = 0;
    for (i = 0; i < elem->in_num; i++) {
        size_t size = MIN(len - offset, elem->in_sg[i].iov_len);

254 255 256
        cpu_physical_memory_unmap(elem->in_sg[i].iov_base,
                                  elem->in_sg[i].iov_len,
                                  1, size);
A
aliguori 已提交
257

258
        offset += size;
A
aliguori 已提交
259 260
    }

261 262 263 264
    for (i = 0; i < elem->out_num; i++)
        cpu_physical_memory_unmap(elem->out_sg[i].iov_base,
                                  elem->out_sg[i].iov_len,
                                  0, elem->out_sg[i].iov_len);
265 266
}

J
Jason Wang 已提交
267 268 269 270
void virtqueue_discard(VirtQueue *vq, const VirtQueueElement *elem,
                       unsigned int len)
{
    vq->last_avail_idx--;
271
    vq->inuse--;
J
Jason Wang 已提交
272 273 274
    virtqueue_unmap_sg(vq, elem, len);
}

S
Stefan Hajnoczi 已提交
275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296
/* virtqueue_rewind:
 * @vq: The #VirtQueue
 * @num: Number of elements to push back
 *
 * Pretend that elements weren't popped from the virtqueue.  The next
 * virtqueue_pop() will refetch the oldest element.
 *
 * Use virtqueue_discard() instead if you have a VirtQueueElement.
 *
 * Returns: true on success, false if @num is greater than the number of in use
 * elements.
 */
bool virtqueue_rewind(VirtQueue *vq, unsigned int num)
{
    if (num > vq->inuse) {
        return false;
    }
    vq->last_avail_idx -= num;
    vq->inuse -= num;
    return true;
}

297 298 299
void virtqueue_fill(VirtQueue *vq, const VirtQueueElement *elem,
                    unsigned int len, unsigned int idx)
{
300 301
    VRingUsedElem uelem;

302 303 304
    trace_virtqueue_fill(vq, elem, len, idx);

    virtqueue_unmap_sg(vq, elem, len);
305

306
    idx = (idx + vq->used_idx) % vq->vring.num;
A
aliguori 已提交
307

308 309 310
    uelem.id = elem->index;
    uelem.len = len;
    vring_used_write(vq, &uelem, idx);
A
aliguori 已提交
311 312 313 314
}

void virtqueue_flush(VirtQueue *vq, unsigned int count)
{
M
Michael S. Tsirkin 已提交
315
    uint16_t old, new;
A
aliguori 已提交
316
    /* Make sure buffer is written before we update index. */
317
    smp_wmb();
318
    trace_virtqueue_flush(vq, count);
319
    old = vq->used_idx;
M
Michael S. Tsirkin 已提交
320 321
    new = old + count;
    vring_used_idx_set(vq, new);
A
aliguori 已提交
322
    vq->inuse -= count;
M
Michael S. Tsirkin 已提交
323 324
    if (unlikely((int16_t)(new - vq->signalled_used) < (uint16_t)(new - old)))
        vq->signalled_used_valid = false;
A
aliguori 已提交
325 326 327 328 329 330 331 332 333 334 335 336 337 338
}

void virtqueue_push(VirtQueue *vq, const VirtQueueElement *elem,
                    unsigned int len)
{
    virtqueue_fill(vq, elem, len, 0);
    virtqueue_flush(vq, 1);
}

static int virtqueue_num_heads(VirtQueue *vq, unsigned int idx)
{
    uint16_t num_heads = vring_avail_idx(vq) - idx;

    /* Check it isn't doing very strange things with descriptor numbers. */
A
aliguori 已提交
339
    if (num_heads > vq->vring.num) {
340
        error_report("Guest moved used index from %u to %u",
341
                     idx, vq->shadow_avail_idx);
A
aliguori 已提交
342 343
        exit(1);
    }
344 345 346 347 348
    /* On success, callers read a descriptor at vq->last_avail_idx.
     * Make sure descriptor read does not bypass avail index read. */
    if (num_heads) {
        smp_rmb();
    }
A
aliguori 已提交
349 350 351 352 353 354 355 356 357 358 359 360 361

    return num_heads;
}

static unsigned int virtqueue_get_head(VirtQueue *vq, unsigned int idx)
{
    unsigned int head;

    /* Grab the next descriptor number they're advertising, and increment
     * the index we've seen. */
    head = vring_avail_ring(vq, idx % vq->vring.num);

    /* If their number is silly, that's a fatal mistake. */
A
aliguori 已提交
362
    if (head >= vq->vring.num) {
363
        error_report("Guest says index %u is available", head);
A
aliguori 已提交
364 365
        exit(1);
    }
A
aliguori 已提交
366 367 368 369

    return head;
}

370 371
static unsigned virtqueue_read_next_desc(VirtIODevice *vdev, VRingDesc *desc,
                                         hwaddr desc_pa, unsigned int max)
A
aliguori 已提交
372 373 374 375
{
    unsigned int next;

    /* If this descriptor says it doesn't chain, we're done. */
376
    if (!(desc->flags & VRING_DESC_F_NEXT)) {
377
        return max;
378
    }
A
aliguori 已提交
379 380

    /* Check they're not leading us off end of descriptors. */
381
    next = desc->next;
A
aliguori 已提交
382
    /* Make sure compiler knows to grab that: we don't want it changing! */
383
    smp_wmb();
A
aliguori 已提交
384

385
    if (next >= max) {
386
        error_report("Desc next is %u", next);
A
aliguori 已提交
387 388
        exit(1);
    }
A
aliguori 已提交
389

390
    vring_desc_read(vdev, desc, desc_pa, next);
A
aliguori 已提交
391 392 393
    return next;
}

394
void virtqueue_get_avail_bytes(VirtQueue *vq, unsigned int *in_bytes,
395 396
                               unsigned int *out_bytes,
                               unsigned max_in_bytes, unsigned max_out_bytes)
A
aliguori 已提交
397
{
398
    unsigned int idx;
399
    unsigned int total_bufs, in_total, out_total;
A
aliguori 已提交
400 401 402

    idx = vq->last_avail_idx;

403
    total_bufs = in_total = out_total = 0;
A
aliguori 已提交
404
    while (virtqueue_num_heads(vq, idx)) {
405
        VirtIODevice *vdev = vq->vdev;
406
        unsigned int max, num_bufs, indirect = 0;
407
        VRingDesc desc;
A
Avi Kivity 已提交
408
        hwaddr desc_pa;
A
aliguori 已提交
409 410
        int i;

411 412
        max = vq->vring.num;
        num_bufs = total_bufs;
A
aliguori 已提交
413
        i = virtqueue_get_head(vq, idx++);
414
        desc_pa = vq->vring.desc;
415
        vring_desc_read(vdev, &desc, desc_pa, i);
416

417 418
        if (desc.flags & VRING_DESC_F_INDIRECT) {
            if (desc.len % sizeof(VRingDesc)) {
419
                error_report("Invalid size for indirect buffer table");
420 421 422 423 424
                exit(1);
            }

            /* If we've got too many, that implies a descriptor loop. */
            if (num_bufs >= max) {
425
                error_report("Looped descriptor");
426 427 428 429 430
                exit(1);
            }

            /* loop over the indirect descriptor table */
            indirect = 1;
431 432
            max = desc.len / sizeof(VRingDesc);
            desc_pa = desc.addr;
433
            num_bufs = i = 0;
434
            vring_desc_read(vdev, &desc, desc_pa, i);
435 436
        }

A
aliguori 已提交
437 438
        do {
            /* If we've got too many, that implies a descriptor loop. */
439
            if (++num_bufs > max) {
440
                error_report("Looped descriptor");
A
aliguori 已提交
441 442
                exit(1);
            }
A
aliguori 已提交
443

444 445
            if (desc.flags & VRING_DESC_F_WRITE) {
                in_total += desc.len;
A
aliguori 已提交
446
            } else {
447
                out_total += desc.len;
A
aliguori 已提交
448
            }
449 450 451
            if (in_total >= max_in_bytes && out_total >= max_out_bytes) {
                goto done;
            }
452
        } while ((i = virtqueue_read_next_desc(vdev, &desc, desc_pa, max)) != max);
453 454 455 456 457

        if (!indirect)
            total_bufs = num_bufs;
        else
            total_bufs++;
A
aliguori 已提交
458
    }
459
done:
460 461 462 463 464 465 466
    if (in_bytes) {
        *in_bytes = in_total;
    }
    if (out_bytes) {
        *out_bytes = out_total;
    }
}
A
aliguori 已提交
467

468 469 470 471 472
int virtqueue_avail_bytes(VirtQueue *vq, unsigned int in_bytes,
                          unsigned int out_bytes)
{
    unsigned int in_total, out_total;

473 474
    virtqueue_get_avail_bytes(vq, &in_total, &out_total, in_bytes, out_bytes);
    return in_bytes <= in_total && out_bytes <= out_total;
A
aliguori 已提交
475 476
}

477 478 479 480 481 482 483
static void virtqueue_map_desc(unsigned int *p_num_sg, hwaddr *addr, struct iovec *iov,
                               unsigned int max_num_sg, bool is_write,
                               hwaddr pa, size_t sz)
{
    unsigned num_sg = *p_num_sg;
    assert(num_sg <= max_num_sg);

484 485 486 487 488
    if (!sz) {
        error_report("virtio: zero sized buffers are not allowed");
        exit(1);
    }

489 490 491 492 493 494 495 496 497
    while (sz) {
        hwaddr len = sz;

        if (num_sg == max_num_sg) {
            error_report("virtio: too many write descriptors in indirect table");
            exit(1);
        }

        iov[num_sg].iov_base = cpu_physical_memory_map(pa, &len, is_write);
498 499 500 501 502
        if (!iov[num_sg].iov_base) {
            error_report("virtio: bogus descriptor or out of resources");
            exit(1);
        }

503 504 505 506 507 508 509 510 511 512
        iov[num_sg].iov_len = len;
        addr[num_sg] = pa;

        sz -= len;
        pa += len;
        num_sg++;
    }
    *p_num_sg = num_sg;
}

M
Michael S. Tsirkin 已提交
513 514 515
static void virtqueue_map_iovec(struct iovec *sg, hwaddr *addr,
                                unsigned int *num_sg, unsigned int max_size,
                                int is_write)
K
Kevin Wolf 已提交
516 517
{
    unsigned int i;
A
Avi Kivity 已提交
518
    hwaddr len;
K
Kevin Wolf 已提交
519

M
Michael S. Tsirkin 已提交
520 521 522 523 524 525 526 527 528 529 530 531 532
    /* Note: this function MUST validate input, some callers
     * are passing in num_sg values received over the network.
     */
    /* TODO: teach all callers that this can fail, and return failure instead
     * of asserting here.
     * When we do, we might be able to re-enable NDEBUG below.
     */
#ifdef NDEBUG
#error building with NDEBUG is not supported
#endif
    assert(*num_sg <= max_size);

    for (i = 0; i < *num_sg; i++) {
K
Kevin Wolf 已提交
533 534
        len = sg[i].iov_len;
        sg[i].iov_base = cpu_physical_memory_map(addr[i], &len, is_write);
M
Michael S. Tsirkin 已提交
535
        if (!sg[i].iov_base) {
M
Michael Tokarev 已提交
536
            error_report("virtio: error trying to map MMIO memory");
K
Kevin Wolf 已提交
537 538
            exit(1);
        }
539 540
        if (len != sg[i].iov_len) {
            error_report("virtio: unexpected memory split");
M
Michael S. Tsirkin 已提交
541 542
            exit(1);
        }
K
Kevin Wolf 已提交
543 544 545
    }
}

M
Michael S. Tsirkin 已提交
546 547 548
void virtqueue_map(VirtQueueElement *elem)
{
    virtqueue_map_iovec(elem->in_sg, elem->in_addr, &elem->in_num,
549
                        VIRTQUEUE_MAX_SIZE, 1);
M
Michael S. Tsirkin 已提交
550
    virtqueue_map_iovec(elem->out_sg, elem->out_addr, &elem->out_num,
551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572
                        VIRTQUEUE_MAX_SIZE, 0);
}

void *virtqueue_alloc_element(size_t sz, unsigned out_num, unsigned in_num)
{
    VirtQueueElement *elem;
    size_t in_addr_ofs = QEMU_ALIGN_UP(sz, __alignof__(elem->in_addr[0]));
    size_t out_addr_ofs = in_addr_ofs + in_num * sizeof(elem->in_addr[0]);
    size_t out_addr_end = out_addr_ofs + out_num * sizeof(elem->out_addr[0]);
    size_t in_sg_ofs = QEMU_ALIGN_UP(out_addr_end, __alignof__(elem->in_sg[0]));
    size_t out_sg_ofs = in_sg_ofs + in_num * sizeof(elem->in_sg[0]);
    size_t out_sg_end = out_sg_ofs + out_num * sizeof(elem->out_sg[0]);

    assert(sz >= sizeof(VirtQueueElement));
    elem = g_malloc(out_sg_end);
    elem->out_num = out_num;
    elem->in_num = in_num;
    elem->in_addr = (void *)elem + in_addr_ofs;
    elem->out_addr = (void *)elem + out_addr_ofs;
    elem->in_sg = (void *)elem + in_sg_ofs;
    elem->out_sg = (void *)elem + out_sg_ofs;
    return elem;
M
Michael S. Tsirkin 已提交
573 574
}

575
void *virtqueue_pop(VirtQueue *vq, size_t sz)
A
aliguori 已提交
576
{
577
    unsigned int i, head, max;
A
Avi Kivity 已提交
578
    hwaddr desc_pa = vq->vring.desc;
579
    VirtIODevice *vdev = vq->vdev;
580
    VirtQueueElement *elem;
581 582 583
    unsigned out_num, in_num;
    hwaddr addr[VIRTQUEUE_MAX_SIZE];
    struct iovec iov[VIRTQUEUE_MAX_SIZE];
584
    VRingDesc desc;
A
aliguori 已提交
585

586
    if (virtio_queue_empty(vq)) {
587 588
        return NULL;
    }
589 590 591
    /* Needed after virtio_queue_empty(), see comment in
     * virtqueue_num_heads(). */
    smp_rmb();
A
aliguori 已提交
592 593

    /* When we start there are none of either input nor output. */
594
    out_num = in_num = 0;
A
aliguori 已提交
595

596 597
    max = vq->vring.num;

598 599 600 601 602
    if (vq->inuse >= vq->vring.num) {
        error_report("Virtqueue size exceeded");
        exit(1);
    }

A
aliguori 已提交
603
    i = head = virtqueue_get_head(vq, vq->last_avail_idx++);
604
    if (virtio_vdev_has_feature(vdev, VIRTIO_RING_F_EVENT_IDX)) {
605
        vring_set_avail_event(vq, vq->last_avail_idx);
M
Michael S. Tsirkin 已提交
606
    }
607

608 609 610
    vring_desc_read(vdev, &desc, desc_pa, i);
    if (desc.flags & VRING_DESC_F_INDIRECT) {
        if (desc.len % sizeof(VRingDesc)) {
611
            error_report("Invalid size for indirect buffer table");
612 613 614 615
            exit(1);
        }

        /* loop over the indirect descriptor table */
616 617
        max = desc.len / sizeof(VRingDesc);
        desc_pa = desc.addr;
618
        i = 0;
619
        vring_desc_read(vdev, &desc, desc_pa, i);
620 621
    }

K
Kevin Wolf 已提交
622
    /* Collect all the descriptors */
A
aliguori 已提交
623
    do {
624
        if (desc.flags & VRING_DESC_F_WRITE) {
625
            virtqueue_map_desc(&in_num, addr + out_num, iov + out_num,
626
                               VIRTQUEUE_MAX_SIZE - out_num, true, desc.addr, desc.len);
K
Kevin Wolf 已提交
627
        } else {
628 629
            if (in_num) {
                error_report("Incorrect order for descriptors");
630 631
                exit(1);
            }
632
            virtqueue_map_desc(&out_num, addr, iov,
633
                               VIRTQUEUE_MAX_SIZE, false, desc.addr, desc.len);
K
Kevin Wolf 已提交
634
        }
A
aliguori 已提交
635 636

        /* If we've got too many, that implies a descriptor loop. */
637
        if ((in_num + out_num) > max) {
638
            error_report("Looped descriptor");
A
aliguori 已提交
639 640
            exit(1);
        }
641
    } while ((i = virtqueue_read_next_desc(vdev, &desc, desc_pa, max)) != max);
A
aliguori 已提交
642

643 644
    /* Now copy what we have collected and mapped */
    elem = virtqueue_alloc_element(sz, out_num, in_num);
A
aliguori 已提交
645
    elem->index = head;
646 647 648 649 650 651 652 653
    for (i = 0; i < out_num; i++) {
        elem->out_addr[i] = addr[i];
        elem->out_sg[i] = iov[i];
    }
    for (i = 0; i < in_num; i++) {
        elem->in_addr[i] = addr[out_num + i];
        elem->in_sg[i] = iov[out_num + i];
    }
A
aliguori 已提交
654 655 656

    vq->inuse++;

657
    trace_virtqueue_pop(vq, elem, elem->in_num, elem->out_num);
658
    return elem;
A
aliguori 已提交
659 660
}

661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677
/* Reading and writing a structure directly to QEMUFile is *awful*, but
 * it is what QEMU has always done by mistake.  We can change it sooner
 * or later by bumping the version number of the affected vm states.
 * In the meanwhile, since the in-memory layout of VirtQueueElement
 * has changed, we need to marshal to and from the layout that was
 * used before the change.
 */
typedef struct VirtQueueElementOld {
    unsigned int index;
    unsigned int out_num;
    unsigned int in_num;
    hwaddr in_addr[VIRTQUEUE_MAX_SIZE];
    hwaddr out_addr[VIRTQUEUE_MAX_SIZE];
    struct iovec in_sg[VIRTQUEUE_MAX_SIZE];
    struct iovec out_sg[VIRTQUEUE_MAX_SIZE];
} VirtQueueElementOld;

678 679
void *qemu_get_virtqueue_element(QEMUFile *f, size_t sz)
{
680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708
    VirtQueueElement *elem;
    VirtQueueElementOld data;
    int i;

    qemu_get_buffer(f, (uint8_t *)&data, sizeof(VirtQueueElementOld));

    elem = virtqueue_alloc_element(sz, data.out_num, data.in_num);
    elem->index = data.index;

    for (i = 0; i < elem->in_num; i++) {
        elem->in_addr[i] = data.in_addr[i];
    }

    for (i = 0; i < elem->out_num; i++) {
        elem->out_addr[i] = data.out_addr[i];
    }

    for (i = 0; i < elem->in_num; i++) {
        /* Base is overwritten by virtqueue_map.  */
        elem->in_sg[i].iov_base = 0;
        elem->in_sg[i].iov_len = data.in_sg[i].iov_len;
    }

    for (i = 0; i < elem->out_num; i++) {
        /* Base is overwritten by virtqueue_map.  */
        elem->out_sg[i].iov_base = 0;
        elem->out_sg[i].iov_len = data.out_sg[i].iov_len;
    }

709 710 711 712 713 714
    virtqueue_map(elem);
    return elem;
}

void qemu_put_virtqueue_element(QEMUFile *f, VirtQueueElement *elem)
{
715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741
    VirtQueueElementOld data;
    int i;

    memset(&data, 0, sizeof(data));
    data.index = elem->index;
    data.in_num = elem->in_num;
    data.out_num = elem->out_num;

    for (i = 0; i < elem->in_num; i++) {
        data.in_addr[i] = elem->in_addr[i];
    }

    for (i = 0; i < elem->out_num; i++) {
        data.out_addr[i] = elem->out_addr[i];
    }

    for (i = 0; i < elem->in_num; i++) {
        /* Base is overwritten by virtqueue_map when loading.  Do not
         * save it, as it would leak the QEMU address space layout.  */
        data.in_sg[i].iov_len = elem->in_sg[i].iov_len;
    }

    for (i = 0; i < elem->out_num; i++) {
        /* Do not save iov_base as above.  */
        data.out_sg[i].iov_len = elem->out_sg[i].iov_len;
    }
    qemu_put_buffer(f, (uint8_t *)&data, sizeof(VirtQueueElementOld));
742 743
}

A
aliguori 已提交
744
/* virtio device */
745 746
static void virtio_notify_vector(VirtIODevice *vdev, uint16_t vector)
{
K
KONRAD Frederic 已提交
747 748 749 750 751
    BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
    VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);

    if (k->notify) {
        k->notify(qbus->parent, vector);
752 753
    }
}
A
aliguori 已提交
754

P
Paul Brook 已提交
755
void virtio_update_irq(VirtIODevice *vdev)
A
aliguori 已提交
756
{
757
    virtio_notify_vector(vdev, VIRTIO_NO_VECTOR);
A
aliguori 已提交
758 759
}

760 761 762 763 764 765 766 767 768 769 770 771
static int virtio_validate_features(VirtIODevice *vdev)
{
    VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);

    if (k->validate_features) {
        return k->validate_features(vdev);
    } else {
        return 0;
    }
}

int virtio_set_status(VirtIODevice *vdev, uint8_t val)
772
{
773
    VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
774 775
    trace_virtio_set_status(vdev, val);

776
    if (virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1)) {
777 778 779 780 781 782 783 784 785
        if (!(vdev->status & VIRTIO_CONFIG_S_FEATURES_OK) &&
            val & VIRTIO_CONFIG_S_FEATURES_OK) {
            int ret = virtio_validate_features(vdev);

            if (ret) {
                return ret;
            }
        }
    }
786 787
    if (k->set_status) {
        k->set_status(vdev, val);
788 789
    }
    vdev->status = val;
790
    return 0;
791 792
}

793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813
bool target_words_bigendian(void);
static enum virtio_device_endian virtio_default_endian(void)
{
    if (target_words_bigendian()) {
        return VIRTIO_DEVICE_ENDIAN_BIG;
    } else {
        return VIRTIO_DEVICE_ENDIAN_LITTLE;
    }
}

static enum virtio_device_endian virtio_current_cpu_endian(void)
{
    CPUClass *cc = CPU_GET_CLASS(current_cpu);

    if (cc->virtio_is_big_endian(current_cpu)) {
        return VIRTIO_DEVICE_ENDIAN_BIG;
    } else {
        return VIRTIO_DEVICE_ENDIAN_LITTLE;
    }
}

P
Paul Brook 已提交
814
void virtio_reset(void *opaque)
A
aliguori 已提交
815 816
{
    VirtIODevice *vdev = opaque;
817
    VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
A
aliguori 已提交
818 819
    int i;

820
    virtio_set_status(vdev, 0);
821 822 823 824 825 826 827
    if (current_cpu) {
        /* Guest initiated reset */
        vdev->device_endian = virtio_current_cpu_endian();
    } else {
        /* System reset */
        vdev->device_endian = virtio_default_endian();
    }
828

829 830 831
    if (k->reset) {
        k->reset(vdev);
    }
A
aliguori 已提交
832

833
    vdev->guest_features = 0;
A
aliguori 已提交
834 835 836
    vdev->queue_sel = 0;
    vdev->status = 0;
    vdev->isr = 0;
837 838
    vdev->config_vector = VIRTIO_NO_VECTOR;
    virtio_notify_vector(vdev, vdev->config_vector);
A
aliguori 已提交
839

840
    for(i = 0; i < VIRTIO_QUEUE_MAX; i++) {
A
aliguori 已提交
841 842 843 844
        vdev->vq[i].vring.desc = 0;
        vdev->vq[i].vring.avail = 0;
        vdev->vq[i].vring.used = 0;
        vdev->vq[i].last_avail_idx = 0;
845
        vdev->vq[i].shadow_avail_idx = 0;
846
        vdev->vq[i].used_idx = 0;
847
        virtio_queue_set_vector(vdev, i, VIRTIO_NO_VECTOR);
M
Michael S. Tsirkin 已提交
848 849 850
        vdev->vq[i].signalled_used = 0;
        vdev->vq[i].signalled_used_valid = false;
        vdev->vq[i].notification = true;
C
Cornelia Huck 已提交
851
        vdev->vq[i].vring.num = vdev->vq[i].vring.num_default;
852
        vdev->vq[i].inuse = 0;
A
aliguori 已提交
853 854 855
    }
}

P
Paul Brook 已提交
856
uint32_t virtio_config_readb(VirtIODevice *vdev, uint32_t addr)
A
aliguori 已提交
857
{
858
    VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
A
aliguori 已提交
859 860
    uint8_t val;

861
    if (addr + sizeof(val) > vdev->config_len) {
A
aliguori 已提交
862
        return (uint32_t)-1;
863 864 865
    }

    k->get_config(vdev, vdev->config);
A
aliguori 已提交
866

867
    val = ldub_p(vdev->config + addr);
A
aliguori 已提交
868 869 870
    return val;
}

P
Paul Brook 已提交
871
uint32_t virtio_config_readw(VirtIODevice *vdev, uint32_t addr)
A
aliguori 已提交
872
{
873
    VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
A
aliguori 已提交
874 875
    uint16_t val;

876
    if (addr + sizeof(val) > vdev->config_len) {
A
aliguori 已提交
877
        return (uint32_t)-1;
878 879 880
    }

    k->get_config(vdev, vdev->config);
A
aliguori 已提交
881

882
    val = lduw_p(vdev->config + addr);
A
aliguori 已提交
883 884 885
    return val;
}

P
Paul Brook 已提交
886
uint32_t virtio_config_readl(VirtIODevice *vdev, uint32_t addr)
A
aliguori 已提交
887
{
888
    VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
A
aliguori 已提交
889 890
    uint32_t val;

891
    if (addr + sizeof(val) > vdev->config_len) {
A
aliguori 已提交
892
        return (uint32_t)-1;
893 894 895
    }

    k->get_config(vdev, vdev->config);
A
aliguori 已提交
896

897
    val = ldl_p(vdev->config + addr);
A
aliguori 已提交
898 899 900
    return val;
}

P
Paul Brook 已提交
901
void virtio_config_writeb(VirtIODevice *vdev, uint32_t addr, uint32_t data)
A
aliguori 已提交
902
{
903
    VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
A
aliguori 已提交
904 905
    uint8_t val = data;

906
    if (addr + sizeof(val) > vdev->config_len) {
A
aliguori 已提交
907
        return;
908
    }
A
aliguori 已提交
909

910
    stb_p(vdev->config + addr, val);
A
aliguori 已提交
911

912 913 914
    if (k->set_config) {
        k->set_config(vdev, vdev->config);
    }
A
aliguori 已提交
915 916
}

P
Paul Brook 已提交
917
void virtio_config_writew(VirtIODevice *vdev, uint32_t addr, uint32_t data)
A
aliguori 已提交
918
{
919
    VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
A
aliguori 已提交
920 921
    uint16_t val = data;

922
    if (addr + sizeof(val) > vdev->config_len) {
A
aliguori 已提交
923
        return;
924
    }
A
aliguori 已提交
925

926
    stw_p(vdev->config + addr, val);
A
aliguori 已提交
927

928 929 930
    if (k->set_config) {
        k->set_config(vdev, vdev->config);
    }
A
aliguori 已提交
931 932
}

P
Paul Brook 已提交
933
void virtio_config_writel(VirtIODevice *vdev, uint32_t addr, uint32_t data)
A
aliguori 已提交
934
{
935
    VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
A
aliguori 已提交
936 937
    uint32_t val = data;

938
    if (addr + sizeof(val) > vdev->config_len) {
A
aliguori 已提交
939
        return;
940
    }
A
aliguori 已提交
941

942
    stl_p(vdev->config + addr, val);
A
aliguori 已提交
943

944 945 946
    if (k->set_config) {
        k->set_config(vdev, vdev->config);
    }
A
aliguori 已提交
947 948
}

949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044
uint32_t virtio_config_modern_readb(VirtIODevice *vdev, uint32_t addr)
{
    VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
    uint8_t val;

    if (addr + sizeof(val) > vdev->config_len) {
        return (uint32_t)-1;
    }

    k->get_config(vdev, vdev->config);

    val = ldub_p(vdev->config + addr);
    return val;
}

uint32_t virtio_config_modern_readw(VirtIODevice *vdev, uint32_t addr)
{
    VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
    uint16_t val;

    if (addr + sizeof(val) > vdev->config_len) {
        return (uint32_t)-1;
    }

    k->get_config(vdev, vdev->config);

    val = lduw_le_p(vdev->config + addr);
    return val;
}

uint32_t virtio_config_modern_readl(VirtIODevice *vdev, uint32_t addr)
{
    VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
    uint32_t val;

    if (addr + sizeof(val) > vdev->config_len) {
        return (uint32_t)-1;
    }

    k->get_config(vdev, vdev->config);

    val = ldl_le_p(vdev->config + addr);
    return val;
}

void virtio_config_modern_writeb(VirtIODevice *vdev,
                                 uint32_t addr, uint32_t data)
{
    VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
    uint8_t val = data;

    if (addr + sizeof(val) > vdev->config_len) {
        return;
    }

    stb_p(vdev->config + addr, val);

    if (k->set_config) {
        k->set_config(vdev, vdev->config);
    }
}

void virtio_config_modern_writew(VirtIODevice *vdev,
                                 uint32_t addr, uint32_t data)
{
    VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
    uint16_t val = data;

    if (addr + sizeof(val) > vdev->config_len) {
        return;
    }

    stw_le_p(vdev->config + addr, val);

    if (k->set_config) {
        k->set_config(vdev, vdev->config);
    }
}

void virtio_config_modern_writel(VirtIODevice *vdev,
                                 uint32_t addr, uint32_t data)
{
    VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
    uint32_t val = data;

    if (addr + sizeof(val) > vdev->config_len) {
        return;
    }

    stl_le_p(vdev->config + addr, val);

    if (k->set_config) {
        k->set_config(vdev, vdev->config);
    }
}

A
Avi Kivity 已提交
1045
void virtio_queue_set_addr(VirtIODevice *vdev, int n, hwaddr addr)
A
aliguori 已提交
1046
{
1047 1048
    vdev->vq[n].vring.desc = addr;
    virtio_queue_update_rings(vdev, n);
P
Paul Brook 已提交
1049 1050
}

A
Avi Kivity 已提交
1051
hwaddr virtio_queue_get_addr(VirtIODevice *vdev, int n)
P
Paul Brook 已提交
1052
{
1053 1054 1055 1056 1057 1058 1059 1060 1061
    return vdev->vq[n].vring.desc;
}

void virtio_queue_set_rings(VirtIODevice *vdev, int n, hwaddr desc,
                            hwaddr avail, hwaddr used)
{
    vdev->vq[n].vring.desc = desc;
    vdev->vq[n].vring.avail = avail;
    vdev->vq[n].vring.used = used;
P
Paul Brook 已提交
1062 1063
}

1064 1065
void virtio_queue_set_num(VirtIODevice *vdev, int n, int num)
{
1066 1067 1068 1069 1070 1071 1072
    /* Don't allow guest to flip queue between existent and
     * nonexistent states, or to set it to an invalid size.
     */
    if (!!num != !!vdev->vq[n].vring.num ||
        num > VIRTQUEUE_MAX_SIZE ||
        num < 0) {
        return;
1073
    }
1074
    vdev->vq[n].vring.num = num;
1075 1076
}

1077 1078 1079 1080 1081 1082 1083 1084 1085 1086
VirtQueue *virtio_vector_first_queue(VirtIODevice *vdev, uint16_t vector)
{
    return QLIST_FIRST(&vdev->vector_queues[vector]);
}

VirtQueue *virtio_vector_next_queue(VirtQueue *vq)
{
    return QLIST_NEXT(vq, node);
}

P
Paul Brook 已提交
1087 1088 1089 1090
int virtio_queue_get_num(VirtIODevice *vdev, int n)
{
    return vdev->vq[n].vring.num;
}
A
aliguori 已提交
1091

1092 1093 1094 1095
int virtio_get_num_queues(VirtIODevice *vdev)
{
    int i;

1096
    for (i = 0; i < VIRTIO_QUEUE_MAX; i++) {
1097 1098 1099 1100 1101 1102 1103 1104
        if (!virtio_queue_get_num(vdev, i)) {
            break;
        }
    }

    return i;
}

1105 1106 1107 1108 1109
void virtio_queue_set_align(VirtIODevice *vdev, int n, int align)
{
    BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
    VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);

1110
    /* virtio-1 compliant devices cannot change the alignment */
1111
    if (virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1)) {
1112 1113 1114
        error_report("tried to modify queue alignment for virtio-1 device");
        return;
    }
1115 1116 1117 1118 1119 1120 1121
    /* Check that the transport told us it was going to do this
     * (so a buggy transport will immediately assert rather than
     * silently failing to migrate this state)
     */
    assert(k->has_variable_vring_alignment);

    vdev->vq[n].vring.align = align;
1122
    virtio_queue_update_rings(vdev, n);
1123 1124
}

M
Michael S. Tsirkin 已提交
1125 1126 1127 1128 1129 1130 1131 1132 1133 1134
static void virtio_queue_notify_aio_vq(VirtQueue *vq)
{
    if (vq->vring.desc && vq->handle_aio_output) {
        VirtIODevice *vdev = vq->vdev;

        trace_virtio_queue_notify(vdev, vq - vdev->vq, vq);
        vq->handle_aio_output(vdev, vq);
    }
}

1135
static void virtio_queue_notify_vq(VirtQueue *vq)
1136
{
1137
    if (vq->vring.desc && vq->handle_output) {
1138
        VirtIODevice *vdev = vq->vdev;
1139

1140 1141 1142 1143 1144
        trace_virtio_queue_notify(vdev, vq - vdev->vq, vq);
        vq->handle_output(vdev, vq);
    }
}

P
Paul Brook 已提交
1145 1146
void virtio_queue_notify(VirtIODevice *vdev, int n)
{
1147
    virtio_queue_notify_vq(&vdev->vq[n]);
A
aliguori 已提交
1148 1149
}

1150 1151
uint16_t virtio_queue_vector(VirtIODevice *vdev, int n)
{
1152
    return n < VIRTIO_QUEUE_MAX ? vdev->vq[n].vector :
1153 1154 1155 1156 1157
        VIRTIO_NO_VECTOR;
}

void virtio_queue_set_vector(VirtIODevice *vdev, int n, uint16_t vector)
{
1158 1159
    VirtQueue *vq = &vdev->vq[n];

1160
    if (n < VIRTIO_QUEUE_MAX) {
1161 1162 1163 1164
        if (vdev->vector_queues &&
            vdev->vq[n].vector != VIRTIO_NO_VECTOR) {
            QLIST_REMOVE(vq, node);
        }
1165
        vdev->vq[n].vector = vector;
1166 1167 1168 1169 1170
        if (vdev->vector_queues &&
            vector != VIRTIO_NO_VECTOR) {
            QLIST_INSERT_HEAD(&vdev->vector_queues[vector], vq, node);
        }
    }
1171 1172
}

1173 1174 1175
static VirtQueue *virtio_add_queue_internal(VirtIODevice *vdev, int queue_size,
                                            VirtIOHandleOutput handle_output,
                                            bool use_aio)
A
aliguori 已提交
1176 1177 1178
{
    int i;

1179
    for (i = 0; i < VIRTIO_QUEUE_MAX; i++) {
A
aliguori 已提交
1180 1181 1182 1183
        if (vdev->vq[i].vring.num == 0)
            break;
    }

1184
    if (i == VIRTIO_QUEUE_MAX || queue_size > VIRTQUEUE_MAX_SIZE)
A
aliguori 已提交
1185 1186 1187
        abort();

    vdev->vq[i].vring.num = queue_size;
C
Cornelia Huck 已提交
1188
    vdev->vq[i].vring.num_default = queue_size;
1189
    vdev->vq[i].vring.align = VIRTIO_PCI_VRING_ALIGN;
A
aliguori 已提交
1190
    vdev->vq[i].handle_output = handle_output;
M
Michael S. Tsirkin 已提交
1191
    vdev->vq[i].handle_aio_output = NULL;
1192
    vdev->vq[i].use_aio = use_aio;
A
aliguori 已提交
1193 1194 1195 1196

    return &vdev->vq[i];
}

1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213
/* Add a virt queue and mark AIO.
 * An AIO queue will use the AioContext based event interface instead of the
 * default IOHandler and EventNotifier interface.
 */
VirtQueue *virtio_add_queue_aio(VirtIODevice *vdev, int queue_size,
                                VirtIOHandleOutput handle_output)
{
    return virtio_add_queue_internal(vdev, queue_size, handle_output, true);
}

/* Add a normal virt queue (on the contrary to the AIO version above. */
VirtQueue *virtio_add_queue(VirtIODevice *vdev, int queue_size,
                            VirtIOHandleOutput handle_output)
{
    return virtio_add_queue_internal(vdev, queue_size, handle_output, false);
}

1214 1215
void virtio_del_queue(VirtIODevice *vdev, int n)
{
1216
    if (n < 0 || n >= VIRTIO_QUEUE_MAX) {
1217 1218 1219 1220
        abort();
    }

    vdev->vq[n].vring.num = 0;
C
Cornelia Huck 已提交
1221
    vdev->vq[n].vring.num_default = 0;
1222 1223
}

1224 1225
void virtio_irq(VirtQueue *vq)
{
1226
    trace_virtio_irq(vq);
1227 1228 1229 1230
    vq->vdev->isr |= 0x01;
    virtio_notify_vector(vq->vdev, vq->vector);
}

1231
bool virtio_should_notify(VirtIODevice *vdev, VirtQueue *vq)
M
Michael S. Tsirkin 已提交
1232 1233 1234
{
    uint16_t old, new;
    bool v;
1235 1236
    /* We need to expose used array entries before checking used event. */
    smp_mb();
1237
    /* Always notify when queue is empty (when feature acknowledge) */
1238
    if (virtio_vdev_has_feature(vdev, VIRTIO_F_NOTIFY_ON_EMPTY) &&
1239
        !vq->inuse && virtio_queue_empty(vq)) {
M
Michael S. Tsirkin 已提交
1240 1241 1242
        return true;
    }

1243
    if (!virtio_vdev_has_feature(vdev, VIRTIO_RING_F_EVENT_IDX)) {
M
Michael S. Tsirkin 已提交
1244 1245 1246 1247 1248 1249
        return !(vring_avail_flags(vq) & VRING_AVAIL_F_NO_INTERRUPT);
    }

    v = vq->signalled_used_valid;
    vq->signalled_used_valid = true;
    old = vq->signalled_used;
1250
    new = vq->signalled_used = vq->used_idx;
1251
    return !v || vring_need_event(vring_get_used_event(vq), new, old);
M
Michael S. Tsirkin 已提交
1252 1253 1254 1255
}

void virtio_notify(VirtIODevice *vdev, VirtQueue *vq)
{
1256
    if (!virtio_should_notify(vdev, vq)) {
A
aliguori 已提交
1257
        return;
M
Michael S. Tsirkin 已提交
1258
    }
A
aliguori 已提交
1259

1260
    trace_virtio_notify(vdev, vq);
A
aliguori 已提交
1261
    vdev->isr |= 0x01;
1262
    virtio_notify_vector(vdev, vq->vector);
A
aliguori 已提交
1263 1264 1265 1266
}

void virtio_notify_config(VirtIODevice *vdev)
{
1267 1268 1269
    if (!(vdev->status & VIRTIO_CONFIG_S_DRIVER_OK))
        return;

A
aliguori 已提交
1270
    vdev->isr |= 0x03;
1271
    vdev->generation++;
1272
    virtio_notify_vector(vdev, vdev->config_vector);
A
aliguori 已提交
1273 1274
}

1275 1276 1277 1278 1279
static bool virtio_device_endian_needed(void *opaque)
{
    VirtIODevice *vdev = opaque;

    assert(vdev->device_endian != VIRTIO_DEVICE_ENDIAN_UNKNOWN);
1280
    if (!virtio_vdev_has_feature(vdev, VIRTIO_F_VERSION_1)) {
1281 1282 1283 1284
        return vdev->device_endian != virtio_default_endian();
    }
    /* Devices conforming to VIRTIO 1.0 or later are always LE. */
    return vdev->device_endian != VIRTIO_DEVICE_ENDIAN_LITTLE;
1285 1286
}

G
Gerd Hoffmann 已提交
1287 1288 1289 1290 1291 1292 1293
static bool virtio_64bit_features_needed(void *opaque)
{
    VirtIODevice *vdev = opaque;

    return (vdev->host_features >> 32) != 0;
}

J
Jason Wang 已提交
1294 1295 1296 1297 1298 1299 1300
static bool virtio_virtqueue_needed(void *opaque)
{
    VirtIODevice *vdev = opaque;

    return virtio_host_has_feature(vdev, VIRTIO_F_VERSION_1);
}

C
Cornelia Huck 已提交
1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313
static bool virtio_ringsize_needed(void *opaque)
{
    VirtIODevice *vdev = opaque;
    int i;

    for (i = 0; i < VIRTIO_QUEUE_MAX; i++) {
        if (vdev->vq[i].vring.num != vdev->vq[i].vring.num_default) {
            return true;
        }
    }
    return false;
}

1314 1315 1316 1317 1318 1319 1320 1321 1322 1323
static bool virtio_extra_state_needed(void *opaque)
{
    VirtIODevice *vdev = opaque;
    BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
    VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);

    return k->has_extra_state &&
        k->has_extra_state(qbus->parent);
}

1324
static const VMStateDescription vmstate_virtqueue = {
J
Jason Wang 已提交
1325
    .name = "virtqueue_state",
1326 1327 1328 1329 1330 1331 1332
    .version_id = 1,
    .minimum_version_id = 1,
    .fields = (VMStateField[]) {
        VMSTATE_UINT64(vring.avail, struct VirtQueue),
        VMSTATE_UINT64(vring.used, struct VirtQueue),
        VMSTATE_END_OF_LIST()
    }
J
Jason Wang 已提交
1333 1334 1335 1336 1337 1338 1339 1340
};

static const VMStateDescription vmstate_virtio_virtqueues = {
    .name = "virtio/virtqueues",
    .version_id = 1,
    .minimum_version_id = 1,
    .needed = &virtio_virtqueue_needed,
    .fields = (VMStateField[]) {
D
Dr. David Alan Gilbert 已提交
1341 1342
        VMSTATE_STRUCT_VARRAY_POINTER_KNOWN(vq, struct VirtIODevice,
                      VIRTIO_QUEUE_MAX, 0, vmstate_virtqueue, VirtQueue),
J
Jason Wang 已提交
1343 1344 1345 1346
        VMSTATE_END_OF_LIST()
    }
};

1347
static const VMStateDescription vmstate_ringsize = {
C
Cornelia Huck 已提交
1348
    .name = "ringsize_state",
1349 1350 1351 1352 1353 1354
    .version_id = 1,
    .minimum_version_id = 1,
    .fields = (VMStateField[]) {
        VMSTATE_UINT32(vring.num_default, struct VirtQueue),
        VMSTATE_END_OF_LIST()
    }
C
Cornelia Huck 已提交
1355 1356 1357 1358 1359 1360 1361 1362
};

static const VMStateDescription vmstate_virtio_ringsize = {
    .name = "virtio/ringsize",
    .version_id = 1,
    .minimum_version_id = 1,
    .needed = &virtio_ringsize_needed,
    .fields = (VMStateField[]) {
D
Dr. David Alan Gilbert 已提交
1363 1364
        VMSTATE_STRUCT_VARRAY_POINTER_KNOWN(vq, struct VirtIODevice,
                      VIRTIO_QUEUE_MAX, 0, vmstate_ringsize, VirtQueue),
C
Cornelia Huck 已提交
1365 1366 1367 1368
        VMSTATE_END_OF_LIST()
    }
};

1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415
static int get_extra_state(QEMUFile *f, void *pv, size_t size)
{
    VirtIODevice *vdev = pv;
    BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
    VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);

    if (!k->load_extra_state) {
        return -1;
    } else {
        return k->load_extra_state(qbus->parent, f);
    }
}

static void put_extra_state(QEMUFile *f, void *pv, size_t size)
{
    VirtIODevice *vdev = pv;
    BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
    VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);

    k->save_extra_state(qbus->parent, f);
}

static const VMStateInfo vmstate_info_extra_state = {
    .name = "virtqueue_extra_state",
    .get = get_extra_state,
    .put = put_extra_state,
};

static const VMStateDescription vmstate_virtio_extra_state = {
    .name = "virtio/extra_state",
    .version_id = 1,
    .minimum_version_id = 1,
    .needed = &virtio_extra_state_needed,
    .fields = (VMStateField[]) {
        {
            .name         = "extra_state",
            .version_id   = 0,
            .field_exists = NULL,
            .size         = 0,
            .info         = &vmstate_info_extra_state,
            .flags        = VMS_SINGLE,
            .offset       = 0,
        },
        VMSTATE_END_OF_LIST()
    }
};

1416 1417 1418 1419
static const VMStateDescription vmstate_virtio_device_endian = {
    .name = "virtio/device_endian",
    .version_id = 1,
    .minimum_version_id = 1,
1420
    .needed = &virtio_device_endian_needed,
1421 1422 1423 1424 1425 1426
    .fields = (VMStateField[]) {
        VMSTATE_UINT8(device_endian, VirtIODevice),
        VMSTATE_END_OF_LIST()
    }
};

G
Gerd Hoffmann 已提交
1427 1428 1429 1430
static const VMStateDescription vmstate_virtio_64bit_features = {
    .name = "virtio/64bit_features",
    .version_id = 1,
    .minimum_version_id = 1,
1431
    .needed = &virtio_64bit_features_needed,
G
Gerd Hoffmann 已提交
1432 1433 1434 1435 1436 1437
    .fields = (VMStateField[]) {
        VMSTATE_UINT64(guest_features, VirtIODevice),
        VMSTATE_END_OF_LIST()
    }
};

1438 1439 1440 1441 1442 1443 1444
static const VMStateDescription vmstate_virtio = {
    .name = "virtio",
    .version_id = 1,
    .minimum_version_id = 1,
    .minimum_version_id_old = 1,
    .fields = (VMStateField[]) {
        VMSTATE_END_OF_LIST()
1445
    },
1446 1447 1448
    .subsections = (const VMStateDescription*[]) {
        &vmstate_virtio_device_endian,
        &vmstate_virtio_64bit_features,
J
Jason Wang 已提交
1449
        &vmstate_virtio_virtqueues,
C
Cornelia Huck 已提交
1450
        &vmstate_virtio_ringsize,
1451
        &vmstate_virtio_extra_state,
1452
        NULL
1453 1454 1455
    }
};

A
aliguori 已提交
1456 1457
void virtio_save(VirtIODevice *vdev, QEMUFile *f)
{
K
KONRAD Frederic 已提交
1458 1459
    BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
    VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
1460
    VirtioDeviceClass *vdc = VIRTIO_DEVICE_GET_CLASS(vdev);
G
Gerd Hoffmann 已提交
1461
    uint32_t guest_features_lo = (vdev->guest_features & 0xffffffff);
A
aliguori 已提交
1462 1463
    int i;

K
KONRAD Frederic 已提交
1464 1465 1466
    if (k->save_config) {
        k->save_config(qbus->parent, f);
    }
A
aliguori 已提交
1467 1468 1469 1470

    qemu_put_8s(f, &vdev->status);
    qemu_put_8s(f, &vdev->isr);
    qemu_put_be16s(f, &vdev->queue_sel);
G
Gerd Hoffmann 已提交
1471
    qemu_put_be32s(f, &guest_features_lo);
A
aliguori 已提交
1472 1473 1474
    qemu_put_be32(f, vdev->config_len);
    qemu_put_buffer(f, vdev->config, vdev->config_len);

1475
    for (i = 0; i < VIRTIO_QUEUE_MAX; i++) {
A
aliguori 已提交
1476 1477 1478 1479 1480 1481
        if (vdev->vq[i].vring.num == 0)
            break;
    }

    qemu_put_be32(f, i);

1482
    for (i = 0; i < VIRTIO_QUEUE_MAX; i++) {
A
aliguori 已提交
1483 1484 1485 1486
        if (vdev->vq[i].vring.num == 0)
            break;

        qemu_put_be32(f, vdev->vq[i].vring.num);
1487 1488 1489
        if (k->has_variable_vring_alignment) {
            qemu_put_be32(f, vdev->vq[i].vring.align);
        }
1490 1491
        /* XXX virtio-1 devices */
        qemu_put_be64(f, vdev->vq[i].vring.desc);
A
aliguori 已提交
1492
        qemu_put_be16s(f, &vdev->vq[i].last_avail_idx);
K
KONRAD Frederic 已提交
1493 1494 1495
        if (k->save_queue) {
            k->save_queue(qbus->parent, i, f);
        }
A
aliguori 已提交
1496
    }
1497 1498 1499 1500

    if (vdc->save != NULL) {
        vdc->save(vdev, f);
    }
1501 1502

    /* Subsections */
1503
    vmstate_save_state(f, &vmstate_virtio, vdev, NULL);
A
aliguori 已提交
1504 1505
}

1506 1507 1508 1509 1510 1511
/* A wrapper for use as a VMState .put function */
void virtio_vmstate_save(QEMUFile *f, void *opaque, size_t size)
{
    virtio_save(VIRTIO_DEVICE(opaque), f);
}

1512
static int virtio_set_features_nocheck(VirtIODevice *vdev, uint64_t val)
1513
{
1514
    VirtioDeviceClass *k = VIRTIO_DEVICE_GET_CLASS(vdev);
C
Cornelia Huck 已提交
1515
    bool bad = (val & ~(vdev->host_features)) != 0;
1516

C
Cornelia Huck 已提交
1517
    val &= vdev->host_features;
1518 1519
    if (k->set_features) {
        k->set_features(vdev, val);
1520 1521 1522 1523 1524
    }
    vdev->guest_features = val;
    return bad ? -1 : 0;
}

1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536
int virtio_set_features(VirtIODevice *vdev, uint64_t val)
{
   /*
     * The driver must not attempt to set features after feature negotiation
     * has finished.
     */
    if (vdev->status & VIRTIO_CONFIG_S_FEATURES_OK) {
        return -EINVAL;
    }
    return virtio_set_features_nocheck(vdev, val);
}

1537
int virtio_load(VirtIODevice *vdev, QEMUFile *f, int version_id)
A
aliguori 已提交
1538
{
1539
    int i, ret;
1540
    int32_t config_len;
1541
    uint32_t num;
1542
    uint32_t features;
K
KONRAD Frederic 已提交
1543 1544
    BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
    VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
1545
    VirtioDeviceClass *vdc = VIRTIO_DEVICE_GET_CLASS(vdev);
A
aliguori 已提交
1546

1547 1548 1549 1550 1551 1552
    /*
     * We poison the endianness to ensure it does not get used before
     * subsections have been loaded.
     */
    vdev->device_endian = VIRTIO_DEVICE_ENDIAN_UNKNOWN;

K
KONRAD Frederic 已提交
1553 1554
    if (k->load_config) {
        ret = k->load_config(qbus->parent, f);
1555 1556 1557
        if (ret)
            return ret;
    }
A
aliguori 已提交
1558 1559 1560 1561

    qemu_get_8s(f, &vdev->status);
    qemu_get_8s(f, &vdev->isr);
    qemu_get_be16s(f, &vdev->queue_sel);
1562
    if (vdev->queue_sel >= VIRTIO_QUEUE_MAX) {
1563 1564
        return -1;
    }
1565
    qemu_get_be32s(f, &features);
1566

1567 1568 1569 1570 1571 1572 1573 1574 1575 1576
    /*
     * Temporarily set guest_features low bits - needed by
     * virtio net load code testing for VIRTIO_NET_F_CTRL_GUEST_OFFLOADS
     * VIRTIO_NET_F_GUEST_ANNOUNCE and VIRTIO_NET_F_CTRL_VQ.
     *
     * Note: devices should always test host features in future - don't create
     * new dependencies like this.
     */
    vdev->guest_features = features;

1577
    config_len = qemu_get_be32(f);
1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588

    /*
     * There are cases where the incoming config can be bigger or smaller
     * than what we have; so load what we have space for, and skip
     * any excess that's in the stream.
     */
    qemu_get_buffer(f, vdev->config, MIN(config_len, vdev->config_len));

    while (config_len > vdev->config_len) {
        qemu_get_byte(f);
        config_len--;
1589
    }
A
aliguori 已提交
1590 1591 1592

    num = qemu_get_be32(f);

1593
    if (num > VIRTIO_QUEUE_MAX) {
1594
        error_report("Invalid number of virtqueues: 0x%x", num);
1595 1596 1597
        return -1;
    }

A
aliguori 已提交
1598 1599
    for (i = 0; i < num; i++) {
        vdev->vq[i].vring.num = qemu_get_be32(f);
1600 1601 1602
        if (k->has_variable_vring_alignment) {
            vdev->vq[i].vring.align = qemu_get_be32(f);
        }
1603
        vdev->vq[i].vring.desc = qemu_get_be64(f);
A
aliguori 已提交
1604
        qemu_get_be16s(f, &vdev->vq[i].last_avail_idx);
M
Michael S. Tsirkin 已提交
1605 1606
        vdev->vq[i].signalled_used_valid = false;
        vdev->vq[i].notification = true;
A
aliguori 已提交
1607

1608 1609 1610
        if (vdev->vq[i].vring.desc) {
            /* XXX virtio-1 devices */
            virtio_queue_update_rings(vdev, i);
M
Michael S. Tsirkin 已提交
1611 1612
        } else if (vdev->vq[i].last_avail_idx) {
            error_report("VQ %d address 0x0 "
1613
                         "inconsistent with Host index 0x%x",
M
Michael S. Tsirkin 已提交
1614 1615
                         i, vdev->vq[i].last_avail_idx);
                return -1;
1616
	}
K
KONRAD Frederic 已提交
1617 1618
        if (k->load_queue) {
            ret = k->load_queue(qbus->parent, i, f);
1619 1620
            if (ret)
                return ret;
1621
        }
A
aliguori 已提交
1622 1623
    }

1624
    virtio_notify_vector(vdev, VIRTIO_NO_VECTOR);
1625 1626

    if (vdc->load != NULL) {
1627 1628 1629 1630
        ret = vdc->load(vdev, f, version_id);
        if (ret) {
            return ret;
        }
1631 1632
    }

1633 1634 1635 1636 1637 1638 1639 1640 1641 1642
    /* Subsections */
    ret = vmstate_load_state(f, &vmstate_virtio, vdev, 1);
    if (ret) {
        return ret;
    }

    if (vdev->device_endian == VIRTIO_DEVICE_ENDIAN_UNKNOWN) {
        vdev->device_endian = virtio_default_endian();
    }

G
Gerd Hoffmann 已提交
1643 1644 1645 1646 1647 1648 1649
    if (virtio_64bit_features_needed(vdev)) {
        /*
         * Subsection load filled vdev->guest_features.  Run them
         * through virtio_set_features to sanity-check them against
         * host_features.
         */
        uint64_t features64 = vdev->guest_features;
1650
        if (virtio_set_features_nocheck(vdev, features64) < 0) {
G
Gerd Hoffmann 已提交
1651 1652 1653 1654 1655 1656
            error_report("Features 0x%" PRIx64 " unsupported. "
                         "Allowed features: 0x%" PRIx64,
                         features64, vdev->host_features);
            return -1;
        }
    } else {
1657
        if (virtio_set_features_nocheck(vdev, features) < 0) {
G
Gerd Hoffmann 已提交
1658 1659 1660 1661 1662 1663 1664
            error_report("Features 0x%x unsupported. "
                         "Allowed features: 0x%" PRIx64,
                         features, vdev->host_features);
            return -1;
        }
    }

1665
    for (i = 0; i < num; i++) {
1666
        if (vdev->vq[i].vring.desc) {
1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677
            uint16_t nheads;
            nheads = vring_avail_idx(&vdev->vq[i]) - vdev->vq[i].last_avail_idx;
            /* Check it isn't doing strange things with descriptor numbers. */
            if (nheads > vdev->vq[i].vring.num) {
                error_report("VQ %d size 0x%x Guest index 0x%x "
                             "inconsistent with Host index 0x%x: delta 0x%x",
                             i, vdev->vq[i].vring.num,
                             vring_avail_idx(&vdev->vq[i]),
                             vdev->vq[i].last_avail_idx, nheads);
                return -1;
            }
1678
            vdev->vq[i].used_idx = vring_used_idx(&vdev->vq[i]);
1679
            vdev->vq[i].shadow_avail_idx = vring_avail_idx(&vdev->vq[i]);
1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694

            /*
             * Some devices migrate VirtQueueElements that have been popped
             * from the avail ring but not yet returned to the used ring.
             */
            vdev->vq[i].inuse = vdev->vq[i].last_avail_idx -
                                vdev->vq[i].used_idx;
            if (vdev->vq[i].inuse > vdev->vq[i].vring.num) {
                error_report("VQ %d size 0x%x < last_avail_idx 0x%x - "
                             "used_idx 0x%x",
                             i, vdev->vq[i].vring.num,
                             vdev->vq[i].last_avail_idx,
                             vdev->vq[i].used_idx);
                return -1;
            }
1695 1696 1697 1698
        }
    }

    return 0;
A
aliguori 已提交
1699 1700
}

1701
void virtio_cleanup(VirtIODevice *vdev)
1702
{
1703
    qemu_del_vm_change_state_handler(vdev->vmstate);
1704
    g_free(vdev->config);
1705
    g_free(vdev->vq);
1706
    g_free(vdev->vector_queues);
1707 1708
}

1709
static void virtio_vmstate_change(void *opaque, int running, RunState state)
1710 1711
{
    VirtIODevice *vdev = opaque;
K
KONRAD Frederic 已提交
1712 1713
    BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
    VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
1714
    bool backend_run = running && (vdev->status & VIRTIO_CONFIG_S_DRIVER_OK);
1715
    vdev->vm_running = running;
1716 1717 1718 1719 1720

    if (backend_run) {
        virtio_set_status(vdev, vdev->status);
    }

K
KONRAD Frederic 已提交
1721 1722
    if (k->vmstate_change) {
        k->vmstate_change(qbus->parent, backend_run);
1723 1724 1725 1726 1727 1728 1729
    }

    if (!backend_run) {
        virtio_set_status(vdev, vdev->status);
    }
}

1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740
void virtio_instance_init_common(Object *proxy_obj, void *data,
                                 size_t vdev_size, const char *vdev_name)
{
    DeviceState *vdev = data;

    object_initialize(vdev, vdev_size, vdev_name);
    object_property_add_child(proxy_obj, "virtio-backend", OBJECT(vdev), NULL);
    object_unref(OBJECT(vdev));
    qdev_alias_all_properties(vdev, proxy_obj);
}

1741 1742
void virtio_init(VirtIODevice *vdev, const char *name,
                 uint16_t device_id, size_t config_size)
A
aliguori 已提交
1743
{
1744 1745
    BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
    VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
1746
    int i;
1747 1748 1749 1750 1751 1752 1753
    int nvectors = k->query_nvectors ? k->query_nvectors(qbus->parent) : 0;

    if (nvectors) {
        vdev->vector_queues =
            g_malloc0(sizeof(*vdev->vector_queues) * nvectors);
    }

P
Paul Brook 已提交
1754
    vdev->device_id = device_id;
A
aliguori 已提交
1755 1756 1757
    vdev->status = 0;
    vdev->isr = 0;
    vdev->queue_sel = 0;
1758
    vdev->config_vector = VIRTIO_NO_VECTOR;
1759
    vdev->vq = g_malloc0(sizeof(VirtQueue) * VIRTIO_QUEUE_MAX);
1760
    vdev->vm_running = runstate_is_running();
1761
    for (i = 0; i < VIRTIO_QUEUE_MAX; i++) {
1762
        vdev->vq[i].vector = VIRTIO_NO_VECTOR;
1763
        vdev->vq[i].vdev = vdev;
1764
        vdev->vq[i].queue_index = i;
1765
    }
A
aliguori 已提交
1766 1767 1768

    vdev->name = name;
    vdev->config_len = config_size;
1769
    if (vdev->config_len) {
1770
        vdev->config = g_malloc0(config_size);
1771
    } else {
A
aliguori 已提交
1772
        vdev->config = NULL;
1773 1774 1775
    }
    vdev->vmstate = qemu_add_vm_change_state_handler(virtio_vmstate_change,
                                                     vdev);
1776
    vdev->device_endian = virtio_default_endian();
1777
    vdev->use_guest_notifier_mask = true;
1778
}
A
aliguori 已提交
1779

A
Avi Kivity 已提交
1780
hwaddr virtio_queue_get_desc_addr(VirtIODevice *vdev, int n)
1781 1782 1783 1784
{
    return vdev->vq[n].vring.desc;
}

A
Avi Kivity 已提交
1785
hwaddr virtio_queue_get_avail_addr(VirtIODevice *vdev, int n)
1786 1787 1788 1789
{
    return vdev->vq[n].vring.avail;
}

A
Avi Kivity 已提交
1790
hwaddr virtio_queue_get_used_addr(VirtIODevice *vdev, int n)
1791 1792 1793 1794
{
    return vdev->vq[n].vring.used;
}

A
Avi Kivity 已提交
1795
hwaddr virtio_queue_get_ring_addr(VirtIODevice *vdev, int n)
1796 1797 1798 1799
{
    return vdev->vq[n].vring.desc;
}

A
Avi Kivity 已提交
1800
hwaddr virtio_queue_get_desc_size(VirtIODevice *vdev, int n)
1801 1802 1803 1804
{
    return sizeof(VRingDesc) * vdev->vq[n].vring.num;
}

A
Avi Kivity 已提交
1805
hwaddr virtio_queue_get_avail_size(VirtIODevice *vdev, int n)
1806 1807
{
    return offsetof(VRingAvail, ring) +
1808
        sizeof(uint16_t) * vdev->vq[n].vring.num;
1809 1810
}

A
Avi Kivity 已提交
1811
hwaddr virtio_queue_get_used_size(VirtIODevice *vdev, int n)
1812 1813 1814 1815 1816
{
    return offsetof(VRingUsed, ring) +
        sizeof(VRingUsedElem) * vdev->vq[n].vring.num;
}

A
Avi Kivity 已提交
1817
hwaddr virtio_queue_get_ring_size(VirtIODevice *vdev, int n)
1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830
{
    return vdev->vq[n].vring.used - vdev->vq[n].vring.desc +
	    virtio_queue_get_used_size(vdev, n);
}

uint16_t virtio_queue_get_last_avail_idx(VirtIODevice *vdev, int n)
{
    return vdev->vq[n].last_avail_idx;
}

void virtio_queue_set_last_avail_idx(VirtIODevice *vdev, int n, uint16_t idx)
{
    vdev->vq[n].last_avail_idx = idx;
1831
    vdev->vq[n].shadow_avail_idx = idx;
1832 1833
}

1834 1835 1836 1837 1838
void virtio_queue_invalidate_signalled_used(VirtIODevice *vdev, int n)
{
    vdev->vq[n].signalled_used_valid = false;
}

1839 1840 1841 1842 1843
VirtQueue *virtio_get_queue(VirtIODevice *vdev, int n)
{
    return vdev->vq + n;
}

1844 1845 1846 1847 1848
uint16_t virtio_get_queue_index(VirtQueue *vq)
{
    return vq->queue_index;
}

1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860
static void virtio_queue_guest_notifier_read(EventNotifier *n)
{
    VirtQueue *vq = container_of(n, VirtQueue, guest_notifier);
    if (event_notifier_test_and_clear(n)) {
        virtio_irq(vq);
    }
}

void virtio_queue_set_guest_notifier_fd_handler(VirtQueue *vq, bool assign,
                                                bool with_irqfd)
{
    if (assign && !with_irqfd) {
1861
        event_notifier_set_handler(&vq->guest_notifier, false,
1862 1863
                                   virtio_queue_guest_notifier_read);
    } else {
1864
        event_notifier_set_handler(&vq->guest_notifier, false, NULL);
1865 1866 1867 1868 1869 1870 1871 1872
    }
    if (!assign) {
        /* Test and clear notifier before closing it,
         * in case poll callback didn't have time to run. */
        virtio_queue_guest_notifier_read(&vq->guest_notifier);
    }
}

1873 1874 1875 1876
EventNotifier *virtio_queue_get_guest_notifier(VirtQueue *vq)
{
    return &vq->guest_notifier;
}
1877

M
Michael S. Tsirkin 已提交
1878
static void virtio_queue_host_notifier_aio_read(EventNotifier *n)
1879 1880 1881
{
    VirtQueue *vq = container_of(n, VirtQueue, host_notifier);
    if (event_notifier_test_and_clear(n)) {
M
Michael S. Tsirkin 已提交
1882
        virtio_queue_notify_aio_vq(vq);
1883 1884 1885
    }
}

1886
void virtio_queue_aio_set_host_notifier_handler(VirtQueue *vq, AioContext *ctx,
1887
                                                VirtIOHandleOutput handle_output)
1888
{
1889 1890
    if (handle_output) {
        vq->handle_aio_output = handle_output;
1891
        aio_set_event_notifier(ctx, &vq->host_notifier, true,
M
Michael S. Tsirkin 已提交
1892
                               virtio_queue_host_notifier_aio_read);
1893 1894 1895 1896
    } else {
        aio_set_event_notifier(ctx, &vq->host_notifier, true, NULL);
        /* Test and clear notifier before after disabling event,
         * in case poll callback didn't have time to run. */
M
Michael S. Tsirkin 已提交
1897
        virtio_queue_host_notifier_aio_read(&vq->host_notifier);
1898
        vq->handle_aio_output = NULL;
M
Michael S. Tsirkin 已提交
1899 1900 1901 1902 1903 1904 1905 1906
    }
}

static void virtio_queue_host_notifier_read(EventNotifier *n)
{
    VirtQueue *vq = container_of(n, VirtQueue, host_notifier);
    if (event_notifier_test_and_clear(n)) {
        virtio_queue_notify_vq(vq);
1907 1908 1909
    }
}

P
Paolo Bonzini 已提交
1910 1911
void virtio_queue_set_host_notifier_fd_handler(VirtQueue *vq, bool assign,
                                               bool set_handler)
1912
{
1913
    AioContext *ctx = qemu_get_aio_context();
P
Paolo Bonzini 已提交
1914
    if (assign && set_handler) {
1915 1916
        if (vq->use_aio) {
            aio_set_event_notifier(ctx, &vq->host_notifier, true,
1917
                                   virtio_queue_host_notifier_read);
1918 1919 1920 1921
        } else {
            event_notifier_set_handler(&vq->host_notifier, true,
                                       virtio_queue_host_notifier_read);
        }
1922
    } else {
1923 1924 1925 1926 1927
        if (vq->use_aio) {
            aio_set_event_notifier(ctx, &vq->host_notifier, true, NULL);
        } else {
            event_notifier_set_handler(&vq->host_notifier, true, NULL);
        }
P
Paolo Bonzini 已提交
1928 1929
    }
    if (!assign) {
1930 1931 1932 1933 1934 1935
        /* Test and clear notifier before after disabling event,
         * in case poll callback didn't have time to run. */
        virtio_queue_host_notifier_read(&vq->host_notifier);
    }
}

1936 1937 1938 1939
EventNotifier *virtio_queue_get_host_notifier(VirtQueue *vq)
{
    return &vq->host_notifier;
}
1940

1941 1942
void virtio_device_set_child_bus_name(VirtIODevice *vdev, char *bus_name)
{
1943
    g_free(vdev->bus_name);
1944
    vdev->bus_name = g_strdup(bus_name);
1945 1946
}

1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958
static void virtio_device_realize(DeviceState *dev, Error **errp)
{
    VirtIODevice *vdev = VIRTIO_DEVICE(dev);
    VirtioDeviceClass *vdc = VIRTIO_DEVICE_GET_CLASS(dev);
    Error *err = NULL;

    if (vdc->realize != NULL) {
        vdc->realize(dev, &err);
        if (err != NULL) {
            error_propagate(errp, err);
            return;
        }
1959
    }
J
Jason Wang 已提交
1960 1961 1962 1963 1964 1965

    virtio_bus_device_plugged(vdev, &err);
    if (err != NULL) {
        error_propagate(errp, err);
        return;
    }
1966 1967
}

1968
static void virtio_device_unrealize(DeviceState *dev, Error **errp)
1969
{
1970
    VirtIODevice *vdev = VIRTIO_DEVICE(dev);
1971 1972
    VirtioDeviceClass *vdc = VIRTIO_DEVICE_GET_CLASS(dev);
    Error *err = NULL;
1973

1974 1975
    virtio_bus_device_unplugged(vdev);

1976 1977 1978 1979 1980 1981
    if (vdc->unrealize != NULL) {
        vdc->unrealize(dev, &err);
        if (err != NULL) {
            error_propagate(errp, err);
            return;
        }
1982
    }
1983

1984 1985
    g_free(vdev->bus_name);
    vdev->bus_name = NULL;
1986 1987
}

C
Cornelia Huck 已提交
1988 1989 1990 1991 1992
static Property virtio_properties[] = {
    DEFINE_VIRTIO_COMMON_FEATURES(VirtIODevice, host_features),
    DEFINE_PROP_END_OF_LIST(),
};

1993 1994 1995 1996
static void virtio_device_class_init(ObjectClass *klass, void *data)
{
    /* Set the default value here. */
    DeviceClass *dc = DEVICE_CLASS(klass);
1997 1998 1999

    dc->realize = virtio_device_realize;
    dc->unrealize = virtio_device_unrealize;
2000
    dc->bus_type = TYPE_VIRTIO_BUS;
C
Cornelia Huck 已提交
2001
    dc->props = virtio_properties;
2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018
}

static const TypeInfo virtio_device_info = {
    .name = TYPE_VIRTIO_DEVICE,
    .parent = TYPE_DEVICE,
    .instance_size = sizeof(VirtIODevice),
    .class_init = virtio_device_class_init,
    .abstract = true,
    .class_size = sizeof(VirtioDeviceClass),
};

static void virtio_register_types(void)
{
    type_register_static(&virtio_device_info);
}

type_init(virtio_register_types)