提交 8e9a8681 编写于 作者: A Anthony Liguori

Merge remote-tracking branch 'mst/tags/for_anthony' into staging

pci,virtio

This further optimizes MSIX handling in virtio-pci.
Also included is pci cleanup by Paolo, and pci device
assignment fix by Alex.
Signed-off-by: NMichael S. Tsirkin <mst@redhat.com>
Signed-off-by: NAnthony Liguori <aliguori@us.ibm.com>

* mst/tags/for_anthony:
  pci-assign: Enable MSIX on device to match guest
  pci: use constants for devices under the 1B36 device ID, document them
  ivshmem: use symbolic constant for PCI ID, add to pci-ids.txt
  virtio-9p: use symbolic constant, add to pci-ids.txt
  reorganize pci-ids.txt
  docs: move pci-ids.txt to docs/specs/
  vhost: backend masking support
  vhost: set started flag while start is in progress
  virtio-net: set/clear vhost_started in reverse order
  virtio: backend virtqueue notifier masking
  virtio-pci: cache msix messages
  kvm: add stub for update msi route
  msix: add api to access msix message
  virtio: don't waste irqfds on control vqs
PCI IDs for qemu
================
Red Hat, Inc. donates a part of its device ID range to qemu, to be used for
virtual devices. The vendor IDs are 1af4 (formerly Qumranet ID) and 1b36.
Contact Gerd Hoffmann <kraxel@redhat.com> to get a device ID assigned
for your devices.
1af4 vendor ID
--------------
The 1000 -> 10ff device ID range is used as follows for virtio-pci devices.
Note that this allocation separate from the virtio device IDs, which are
maintained as part of the virtio specification.
1af4:1000 network device
1af4:1001 block device
1af4:1002 balloon device
1af4:1003 console device
1af4:1004 SCSI host bus adapter device
1af4:1005 entropy generator device
1af4:1009 9p filesystem device
1af4:10f0 Available for experimental usage without registration. Must get
to official ID when the code leaves the test lab (i.e. when seeking
1af4:10ff upstream merge or shipping a distro/product) to avoid conflicts.
1af4:1100 Used as PCI Subsystem ID for existing hardware devices emulated
by qemu.
1af4:1110 ivshmem device (shared memory, docs/specs/ivshmem_device_spec.txt)
All other device IDs are reserved.
1b36 vendor ID
--------------
The 0000 -> 00ff device ID range is used as follows for QEMU-specific
PCI devices (other than virtio):
1b36:0001 PCI-PCI bridge
1b36:0002 PCI serial port (16550A) adapter (docs/specs/pci-serial.txt)
1b36:0003 PCI Dual-port 16550A adapter (docs/specs/pci-serial.txt)
1b36:0004 PCI Quad-port 16550A adapter (docs/specs/pci-serial.txt)
All these devices are documented in docs/specs.
The 0100 device ID is used for the QXL video card device.
......@@ -170,7 +170,7 @@ static void virtio_9p_class_init(ObjectClass *klass, void *data)
k->init = virtio_9p_init_pci;
k->vendor_id = PCI_VENDOR_ID_REDHAT_QUMRANET;
k->device_id = 0x1009;
k->device_id = PCI_DEVICE_ID_VIRTIO_9P;
k->revision = VIRTIO_PCI_ABI_VERSION;
k->class_id = 0x2;
dc->props = virtio_9p_properties;
......
......@@ -29,6 +29,9 @@
#include <sys/mman.h>
#include <sys/types.h>
#define PCI_VENDOR_ID_IVSHMEM PCI_VENDOR_ID_REDHAT_QUMRANET
#define PCI_DEVICE_ID_IVSHMEM 0x1110
#define IVSHMEM_IOEVENTFD 0
#define IVSHMEM_MSI 1
......@@ -800,8 +803,8 @@ static void ivshmem_class_init(ObjectClass *klass, void *data)
k->init = pci_ivshmem_init;
k->exit = pci_ivshmem_uninit;
k->vendor_id = PCI_VENDOR_ID_REDHAT_QUMRANET;
k->device_id = 0x1110;
k->vendor_id = PCI_VENDOR_ID_IVSHMEM;
k->device_id = PCI_DEVICE_ID_IVSHMEM;
k->class_id = PCI_CLASS_MEMORY_RAM;
dc->reset = ivshmem_reset;
dc->props = ivshmem_properties;
......
......@@ -1031,6 +1031,19 @@ static bool assigned_dev_msix_masked(MSIXTableEntry *entry)
return (entry->ctrl & cpu_to_le32(0x1)) != 0;
}
/*
* When MSI-X is first enabled the vector table typically has all the
* vectors masked, so we can't use that as the obvious test to figure out
* how many vectors to initially enable. Instead we look at the data field
* because this is what worked for pci-assign for a long time. This makes
* sure the physical MSI-X state tracks the guest's view, which is important
* for some VF/PF and PF/fw communication channels.
*/
static bool assigned_dev_msix_skipped(MSIXTableEntry *entry)
{
return !entry->data;
}
static int assigned_dev_update_msix_mmio(PCIDevice *pci_dev)
{
AssignedDevice *adev = DO_UPCAST(AssignedDevice, dev, pci_dev);
......@@ -1041,7 +1054,7 @@ static int assigned_dev_update_msix_mmio(PCIDevice *pci_dev)
/* Get the usable entry number for allocating */
for (i = 0; i < adev->msix_max; i++, entry++) {
if (assigned_dev_msix_masked(entry)) {
if (assigned_dev_msix_skipped(entry)) {
continue;
}
entries_nr++;
......@@ -1070,7 +1083,7 @@ static int assigned_dev_update_msix_mmio(PCIDevice *pci_dev)
for (i = 0; i < adev->msix_max; i++, entry++) {
adev->msi_virq[i] = -1;
if (assigned_dev_msix_masked(entry)) {
if (assigned_dev_msix_skipped(entry)) {
continue;
}
......
......@@ -27,7 +27,7 @@
#define MSIX_ENABLE_MASK (PCI_MSIX_FLAGS_ENABLE >> 8)
#define MSIX_MASKALL_MASK (PCI_MSIX_FLAGS_MASKALL >> 8)
static MSIMessage msix_get_message(PCIDevice *dev, unsigned vector)
MSIMessage msix_get_message(PCIDevice *dev, unsigned vector)
{
uint8_t *table_entry = dev->msix_table + vector * PCI_MSIX_ENTRY_SIZE;
MSIMessage msg;
......
......@@ -5,6 +5,7 @@
#include "hw/pci/pci.h"
void msix_set_message(PCIDevice *dev, int vector, MSIMessage msg);
MSIMessage msix_get_message(PCIDevice *dev, unsigned int vector);
int msix_init(PCIDevice *dev, unsigned short nentries,
MemoryRegion *table_bar, uint8_t table_bar_nr,
unsigned table_offset, MemoryRegion *pba_bar,
......
......@@ -77,6 +77,14 @@
#define PCI_DEVICE_ID_VIRTIO_CONSOLE 0x1003
#define PCI_DEVICE_ID_VIRTIO_SCSI 0x1004
#define PCI_DEVICE_ID_VIRTIO_RNG 0x1005
#define PCI_DEVICE_ID_VIRTIO_9P 0x1009
#define PCI_VENDOR_ID_REDHAT 0x1b36
#define PCI_DEVICE_ID_REDHAT_BRIDGE 0x0001
#define PCI_DEVICE_ID_REDHAT_SERIAL 0x0002
#define PCI_DEVICE_ID_REDHAT_SERIAL2 0x0003
#define PCI_DEVICE_ID_REDHAT_SERIAL4 0x0004
#define PCI_DEVICE_ID_REDHAT_QXL 0x0100
#define FMT_PCIBUS PRIx64
......
......@@ -27,10 +27,6 @@
#include "exec/memory.h"
#include "pci/pci_bus.h"
#define REDHAT_PCI_VENDOR_ID 0x1b36
#define PCI_BRIDGE_DEV_VENDOR_ID REDHAT_PCI_VENDOR_ID
#define PCI_BRIDGE_DEV_DEVICE_ID 0x1
struct PCIBridgeDev {
PCIBridge bridge;
MemoryRegion bar;
......@@ -146,8 +142,8 @@ static void pci_bridge_dev_class_init(ObjectClass *klass, void *data)
k->init = pci_bridge_dev_initfn;
k->exit = pci_bridge_dev_exitfn;
k->config_write = pci_bridge_dev_write_config;
k->vendor_id = PCI_BRIDGE_DEV_VENDOR_ID;
k->device_id = PCI_BRIDGE_DEV_DEVICE_ID;
k->vendor_id = PCI_VENDOR_ID_REDHAT;
k->device_id = PCI_DEVICE_ID_REDHAT_BRIDGE;
k->class_id = PCI_CLASS_BRIDGE_PCI;
k->is_bridge = 1,
dc->desc = "Standard PCI Bridge";
......
......@@ -185,8 +185,8 @@ static void serial_pci_class_initfn(ObjectClass *klass, void *data)
PCIDeviceClass *pc = PCI_DEVICE_CLASS(klass);
pc->init = serial_pci_init;
pc->exit = serial_pci_exit;
pc->vendor_id = 0x1b36; /* Red Hat */
pc->device_id = 0x0002;
pc->vendor_id = PCI_VENDOR_ID_REDHAT;
pc->device_id = PCI_DEVICE_ID_REDHAT_SERIAL;
pc->revision = 1;
pc->class_id = PCI_CLASS_COMMUNICATION_SERIAL;
dc->vmsd = &vmstate_pci_serial;
......@@ -199,8 +199,8 @@ static void multi_2x_serial_pci_class_initfn(ObjectClass *klass, void *data)
PCIDeviceClass *pc = PCI_DEVICE_CLASS(klass);
pc->init = multi_serial_pci_init;
pc->exit = multi_serial_pci_exit;
pc->vendor_id = 0x1b36; /* Red Hat */
pc->device_id = 0x0003;
pc->vendor_id = PCI_VENDOR_ID_REDHAT;
pc->device_id = PCI_DEVICE_ID_REDHAT_SERIAL2;
pc->revision = 1;
pc->class_id = PCI_CLASS_COMMUNICATION_SERIAL;
dc->vmsd = &vmstate_pci_multi_serial;
......@@ -213,8 +213,8 @@ static void multi_4x_serial_pci_class_initfn(ObjectClass *klass, void *data)
PCIDeviceClass *pc = PCI_DEVICE_CLASS(klass);
pc->init = multi_serial_pci_init;
pc->exit = multi_serial_pci_exit;
pc->vendor_id = 0x1b36; /* Red Hat */
pc->device_id = 0x0004;
pc->vendor_id = PCI_VENDOR_ID_REDHAT;
pc->device_id = PCI_DEVICE_ID_REDHAT_SERIAL4;
pc->revision = 1;
pc->class_id = PCI_CLASS_COMMUNICATION_SERIAL;
dc->vmsd = &vmstate_pci_multi_serial;
......
......@@ -612,7 +612,7 @@ static void vhost_log_stop(MemoryListener *listener,
/* FIXME: implement */
}
static int vhost_virtqueue_init(struct vhost_dev *dev,
static int vhost_virtqueue_start(struct vhost_dev *dev,
struct VirtIODevice *vdev,
struct vhost_virtqueue *vq,
unsigned idx)
......@@ -681,16 +681,11 @@ static int vhost_virtqueue_init(struct vhost_dev *dev,
goto fail_kick;
}
file.fd = event_notifier_get_fd(virtio_queue_get_guest_notifier(vvq));
r = ioctl(dev->control, VHOST_SET_VRING_CALL, &file);
if (r) {
r = -errno;
goto fail_call;
}
/* Clear and discard previous events if any. */
event_notifier_test_and_clear(&vq->masked_notifier);
return 0;
fail_call:
fail_kick:
fail_alloc:
cpu_physical_memory_unmap(vq->ring, virtio_queue_get_ring_size(vdev, idx),
......@@ -708,7 +703,7 @@ fail_alloc_desc:
return r;
}
static void vhost_virtqueue_cleanup(struct vhost_dev *dev,
static void vhost_virtqueue_stop(struct vhost_dev *dev,
struct VirtIODevice *vdev,
struct vhost_virtqueue *vq,
unsigned idx)
......@@ -746,11 +741,39 @@ static void vhost_eventfd_del(MemoryListener *listener,
{
}
static int vhost_virtqueue_init(struct vhost_dev *dev,
struct vhost_virtqueue *vq, int n)
{
struct vhost_vring_file file = {
.index = n,
};
int r = event_notifier_init(&vq->masked_notifier, 0);
if (r < 0) {
return r;
}
file.fd = event_notifier_get_fd(&vq->masked_notifier);
r = ioctl(dev->control, VHOST_SET_VRING_CALL, &file);
if (r) {
r = -errno;
goto fail_call;
}
return 0;
fail_call:
event_notifier_cleanup(&vq->masked_notifier);
return r;
}
static void vhost_virtqueue_cleanup(struct vhost_virtqueue *vq)
{
event_notifier_cleanup(&vq->masked_notifier);
}
int vhost_dev_init(struct vhost_dev *hdev, int devfd, const char *devpath,
bool force)
{
uint64_t features;
int r;
int i, r;
if (devfd >= 0) {
hdev->control = devfd;
} else {
......@@ -768,6 +791,13 @@ int vhost_dev_init(struct vhost_dev *hdev, int devfd, const char *devpath,
if (r < 0) {
goto fail;
}
for (i = 0; i < hdev->nvqs; ++i) {
r = vhost_virtqueue_init(hdev, hdev->vqs + i, i);
if (r < 0) {
goto fail_vq;
}
}
hdev->features = features;
hdev->memory_listener = (MemoryListener) {
......@@ -795,6 +825,10 @@ int vhost_dev_init(struct vhost_dev *hdev, int devfd, const char *devpath,
memory_listener_register(&hdev->memory_listener, &address_space_memory);
hdev->force = force;
return 0;
fail_vq:
while (--i >= 0) {
vhost_virtqueue_cleanup(hdev->vqs + i);
}
fail:
r = -errno;
close(hdev->control);
......@@ -803,6 +837,10 @@ fail:
void vhost_dev_cleanup(struct vhost_dev *hdev)
{
int i;
for (i = 0; i < hdev->nvqs; ++i) {
vhost_virtqueue_cleanup(hdev->vqs + i);
}
memory_listener_unregister(&hdev->memory_listener);
g_free(hdev->mem);
g_free(hdev->mem_sections);
......@@ -869,17 +907,53 @@ void vhost_dev_disable_notifiers(struct vhost_dev *hdev, VirtIODevice *vdev)
}
}
/* Test and clear event pending status.
* Should be called after unmask to avoid losing events.
*/
bool vhost_virtqueue_pending(struct vhost_dev *hdev, int n)
{
struct vhost_virtqueue *vq = hdev->vqs + n;
assert(hdev->started);
return event_notifier_test_and_clear(&vq->masked_notifier);
}
/* Mask/unmask events from this vq. */
void vhost_virtqueue_mask(struct vhost_dev *hdev, VirtIODevice *vdev, int n,
bool mask)
{
struct VirtQueue *vvq = virtio_get_queue(vdev, n);
int r;
assert(hdev->started);
struct vhost_vring_file file = {
.index = n,
};
if (mask) {
file.fd = event_notifier_get_fd(&hdev->vqs[n].masked_notifier);
} else {
file.fd = event_notifier_get_fd(virtio_queue_get_guest_notifier(vvq));
}
r = ioctl(hdev->control, VHOST_SET_VRING_CALL, &file);
assert(r >= 0);
}
/* Host notifiers must be enabled at this point. */
int vhost_dev_start(struct vhost_dev *hdev, VirtIODevice *vdev)
{
int i, r;
hdev->started = true;
if (!vdev->binding->set_guest_notifiers) {
fprintf(stderr, "binding does not support guest notifiers\n");
r = -ENOSYS;
goto fail;
}
r = vdev->binding->set_guest_notifiers(vdev->binding_opaque, true);
r = vdev->binding->set_guest_notifiers(vdev->binding_opaque,
hdev->nvqs,
true);
if (r < 0) {
fprintf(stderr, "Error binding guest notifier: %d\n", -r);
goto fail_notifiers;
......@@ -895,7 +969,7 @@ int vhost_dev_start(struct vhost_dev *hdev, VirtIODevice *vdev)
goto fail_mem;
}
for (i = 0; i < hdev->nvqs; ++i) {
r = vhost_virtqueue_init(hdev,
r = vhost_virtqueue_start(hdev,
vdev,
hdev->vqs + i,
i);
......@@ -916,22 +990,22 @@ int vhost_dev_start(struct vhost_dev *hdev, VirtIODevice *vdev)
}
}
hdev->started = true;
return 0;
fail_log:
fail_vq:
while (--i >= 0) {
vhost_virtqueue_cleanup(hdev,
vhost_virtqueue_stop(hdev,
vdev,
hdev->vqs + i,
i);
}
fail_mem:
fail_features:
vdev->binding->set_guest_notifiers(vdev->binding_opaque, false);
vdev->binding->set_guest_notifiers(vdev->binding_opaque, hdev->nvqs, false);
fail_notifiers:
fail:
hdev->started = false;
return r;
}
......@@ -941,7 +1015,7 @@ void vhost_dev_stop(struct vhost_dev *hdev, VirtIODevice *vdev)
int i, r;
for (i = 0; i < hdev->nvqs; ++i) {
vhost_virtqueue_cleanup(hdev,
vhost_virtqueue_stop(hdev,
vdev,
hdev->vqs + i,
i);
......@@ -950,7 +1024,9 @@ void vhost_dev_stop(struct vhost_dev *hdev, VirtIODevice *vdev)
vhost_sync_dirty_bitmap(hdev, &hdev->mem_sections[i],
0, (hwaddr)~0x0ull);
}
r = vdev->binding->set_guest_notifiers(vdev->binding_opaque, false);
r = vdev->binding->set_guest_notifiers(vdev->binding_opaque,
hdev->nvqs,
false);
if (r < 0) {
fprintf(stderr, "vhost guest notifier cleanup failed: %d\n", r);
fflush(stderr);
......
......@@ -18,6 +18,7 @@ struct vhost_virtqueue {
void *ring;
unsigned long long ring_phys;
unsigned ring_size;
EventNotifier masked_notifier;
};
typedef unsigned long vhost_log_chunk_t;
......@@ -53,4 +54,13 @@ void vhost_dev_stop(struct vhost_dev *hdev, VirtIODevice *vdev);
int vhost_dev_enable_notifiers(struct vhost_dev *hdev, VirtIODevice *vdev);
void vhost_dev_disable_notifiers(struct vhost_dev *hdev, VirtIODevice *vdev);
/* Test and clear masked event pending status.
* Should be called after unmask to avoid losing events.
*/
bool vhost_virtqueue_pending(struct vhost_dev *hdev, int n);
/* Mask/unmask events from this vq.
*/
void vhost_virtqueue_mask(struct vhost_dev *hdev, VirtIODevice *vdev, int n,
bool mask);
#endif
......@@ -109,6 +109,9 @@ struct vhost_net *vhost_net_init(NetClientState *backend, int devfd,
(1 << VHOST_NET_F_VIRTIO_NET_HDR);
net->backend = r;
net->dev.nvqs = 2;
net->dev.vqs = net->vqs;
r = vhost_dev_init(&net->dev, devfd, "/dev/vhost-net", force);
if (r < 0) {
goto fail;
......@@ -143,9 +146,6 @@ int vhost_net_start(struct vhost_net *net,
struct vhost_vring_file file = { };
int r;
net->dev.nvqs = 2;
net->dev.vqs = net->vqs;
r = vhost_dev_enable_notifiers(&net->dev, dev);
if (r < 0) {
goto fail_notifiers;
......@@ -200,6 +200,17 @@ void vhost_net_cleanup(struct vhost_net *net)
vhost_dev_cleanup(&net->dev);
g_free(net);
}
bool vhost_net_virtqueue_pending(VHostNetState *net, int idx)
{
return vhost_virtqueue_pending(&net->dev, idx);
}
void vhost_net_virtqueue_mask(VHostNetState *net, VirtIODevice *dev,
int idx, bool mask)
{
vhost_virtqueue_mask(&net->dev, dev, idx, mask);
}
#else
struct vhost_net *vhost_net_init(NetClientState *backend, int devfd,
bool force)
......@@ -234,4 +245,14 @@ unsigned vhost_net_get_features(struct vhost_net *net, unsigned features)
void vhost_net_ack_features(struct vhost_net *net, unsigned features)
{
}
bool vhost_net_virtqueue_pending(VHostNetState *net, int idx)
{
return -ENOSYS;
}
void vhost_net_virtqueue_mask(VHostNetState *net, VirtIODevice *dev,
int idx, bool mask)
{
}
#endif
......@@ -17,4 +17,7 @@ void vhost_net_cleanup(VHostNetState *net);
unsigned vhost_net_get_features(VHostNetState *net, unsigned features);
void vhost_net_ack_features(VHostNetState *net, unsigned features);
bool vhost_net_virtqueue_pending(VHostNetState *net, int n);
void vhost_net_virtqueue_mask(VHostNetState *net, VirtIODevice *dev,
int idx, bool mask);
#endif
......@@ -126,12 +126,12 @@ static void virtio_net_vhost_status(VirtIONet *n, uint8_t status)
if (!vhost_net_query(tap_get_vhost_net(n->nic->nc.peer), &n->vdev)) {
return;
}
n->vhost_started = 1;
r = vhost_net_start(tap_get_vhost_net(n->nic->nc.peer), &n->vdev);
if (r < 0) {
error_report("unable to start vhost net: %d: "
"falling back on userspace virtio", -r);
} else {
n->vhost_started = 1;
n->vhost_started = 0;
}
} else {
vhost_net_stop(tap_get_vhost_net(n->nic->nc.peer), &n->vdev);
......@@ -1010,6 +1010,22 @@ static NetClientInfo net_virtio_info = {
.link_status_changed = virtio_net_set_link_status,
};
static bool virtio_net_guest_notifier_pending(VirtIODevice *vdev, int idx)
{
VirtIONet *n = to_virtio_net(vdev);
assert(n->vhost_started);
return vhost_net_virtqueue_pending(tap_get_vhost_net(n->nic->nc.peer), idx);
}
static void virtio_net_guest_notifier_mask(VirtIODevice *vdev, int idx,
bool mask)
{
VirtIONet *n = to_virtio_net(vdev);
assert(n->vhost_started);
vhost_net_virtqueue_mask(tap_get_vhost_net(n->nic->nc.peer),
vdev, idx, mask);
}
VirtIODevice *virtio_net_init(DeviceState *dev, NICConf *conf,
virtio_net_conf *net)
{
......@@ -1026,6 +1042,8 @@ VirtIODevice *virtio_net_init(DeviceState *dev, NICConf *conf,
n->vdev.bad_features = virtio_net_bad_features;
n->vdev.reset = virtio_net_reset;
n->vdev.set_status = virtio_net_set_status;
n->vdev.guest_notifier_mask = virtio_net_guest_notifier_mask;
n->vdev.guest_notifier_pending = virtio_net_guest_notifier_pending;
n->rx_vq = virtio_add_queue(&n->vdev, 256, virtio_net_handle_rx);
if (net->tx && strcmp(net->tx, "timer") && strcmp(net->tx, "bh")) {
......
......@@ -487,8 +487,6 @@ static int kvm_virtio_pci_vq_vector_use(VirtIOPCIProxy *proxy,
unsigned int vector,
MSIMessage msg)
{
VirtQueue *vq = virtio_get_queue(proxy->vdev, queue_no);
EventNotifier *n = virtio_queue_get_guest_notifier(vq);
VirtIOIRQFD *irqfd = &proxy->vector_irqfd[vector];
int ret;
......@@ -500,20 +498,33 @@ static int kvm_virtio_pci_vq_vector_use(VirtIOPCIProxy *proxy,
irqfd->virq = ret;
}
irqfd->users++;
ret = kvm_irqchip_add_irqfd_notifier(kvm_state, n, irqfd->virq);
if (ret < 0) {
if (--irqfd->users == 0) {
kvm_irqchip_release_virq(kvm_state, irqfd->virq);
}
return ret;
}
return 0;
}
static void kvm_virtio_pci_vq_vector_release(VirtIOPCIProxy *proxy,
unsigned int queue_no,
unsigned int vector)
{
VirtIOIRQFD *irqfd = &proxy->vector_irqfd[vector];
if (--irqfd->users == 0) {
kvm_irqchip_release_virq(kvm_state, irqfd->virq);
}
}
static int kvm_virtio_pci_irqfd_use(VirtIOPCIProxy *proxy,
unsigned int queue_no,
unsigned int vector)
{
VirtIOIRQFD *irqfd = &proxy->vector_irqfd[vector];
VirtQueue *vq = virtio_get_queue(proxy->vdev, queue_no);
EventNotifier *n = virtio_queue_get_guest_notifier(vq);
int ret;
ret = kvm_irqchip_add_irqfd_notifier(kvm_state, n, irqfd->virq);
return ret;
}
static void kvm_virtio_pci_irqfd_release(VirtIOPCIProxy *proxy,
unsigned int queue_no,
unsigned int vector)
{
VirtQueue *vq = virtio_get_queue(proxy->vdev, queue_no);
EventNotifier *n = virtio_queue_get_guest_notifier(vq);
......@@ -522,27 +533,143 @@ static void kvm_virtio_pci_vq_vector_release(VirtIOPCIProxy *proxy,
ret = kvm_irqchip_remove_irqfd_notifier(kvm_state, n, irqfd->virq);
assert(ret == 0);
}
if (--irqfd->users == 0) {
kvm_irqchip_release_virq(kvm_state, irqfd->virq);
static int kvm_virtio_pci_vector_use(VirtIOPCIProxy *proxy, int nvqs)
{
PCIDevice *dev = &proxy->pci_dev;
VirtIODevice *vdev = proxy->vdev;
unsigned int vector;
int ret, queue_no;
MSIMessage msg;
for (queue_no = 0; queue_no < nvqs; queue_no++) {
if (!virtio_queue_get_num(vdev, queue_no)) {
break;
}
vector = virtio_queue_vector(vdev, queue_no);
if (vector >= msix_nr_vectors_allocated(dev)) {
continue;
}
msg = msix_get_message(dev, vector);
ret = kvm_virtio_pci_vq_vector_use(proxy, queue_no, vector, msg);
if (ret < 0) {
goto undo;
}
/* If guest supports masking, set up irqfd now.
* Otherwise, delay until unmasked in the frontend.
*/
if (proxy->vdev->guest_notifier_mask) {
ret = kvm_virtio_pci_irqfd_use(proxy, queue_no, vector);
if (ret < 0) {
kvm_virtio_pci_vq_vector_release(proxy, vector);
goto undo;
}
}
}
return 0;
undo:
while (--queue_no >= 0) {
vector = virtio_queue_vector(vdev, queue_no);
if (vector >= msix_nr_vectors_allocated(dev)) {
continue;
}
if (proxy->vdev->guest_notifier_mask) {
kvm_virtio_pci_irqfd_release(proxy, vector, queue_no);
}
kvm_virtio_pci_vq_vector_release(proxy, vector);
}
return ret;
}
static void kvm_virtio_pci_vector_release(VirtIOPCIProxy *proxy, int nvqs)
{
PCIDevice *dev = &proxy->pci_dev;
VirtIODevice *vdev = proxy->vdev;
unsigned int vector;
int queue_no;
for (queue_no = 0; queue_no < nvqs; queue_no++) {
if (!virtio_queue_get_num(vdev, queue_no)) {
break;
}
vector = virtio_queue_vector(vdev, queue_no);
if (vector >= msix_nr_vectors_allocated(dev)) {
continue;
}
/* If guest supports masking, clean up irqfd now.
* Otherwise, it was cleaned when masked in the frontend.
*/
if (proxy->vdev->guest_notifier_mask) {
kvm_virtio_pci_irqfd_release(proxy, vector, queue_no);
}
kvm_virtio_pci_vq_vector_release(proxy, vector);
}
}
static int kvm_virtio_pci_vq_vector_unmask(VirtIOPCIProxy *proxy,
unsigned int queue_no,
unsigned int vector,
MSIMessage msg)
{
VirtQueue *vq = virtio_get_queue(proxy->vdev, queue_no);
EventNotifier *n = virtio_queue_get_guest_notifier(vq);
VirtIOIRQFD *irqfd = &proxy->vector_irqfd[vector];
int ret;
if (irqfd->msg.data != msg.data || irqfd->msg.address != msg.address) {
ret = kvm_irqchip_update_msi_route(kvm_state, irqfd->virq, msg);
if (ret < 0) {
return ret;
}
}
/* If guest supports masking, irqfd is already setup, unmask it.
* Otherwise, set it up now.
*/
if (proxy->vdev->guest_notifier_mask) {
proxy->vdev->guest_notifier_mask(proxy->vdev, queue_no, false);
/* Test after unmasking to avoid losing events. */
if (proxy->vdev->guest_notifier_pending &&
proxy->vdev->guest_notifier_pending(proxy->vdev, queue_no)) {
event_notifier_set(n);
}
} else {
ret = kvm_virtio_pci_irqfd_use(proxy, queue_no, vector);
}
return ret;
}
static int kvm_virtio_pci_vector_use(PCIDevice *dev, unsigned vector,
static void kvm_virtio_pci_vq_vector_mask(VirtIOPCIProxy *proxy,
unsigned int queue_no,
unsigned int vector)
{
/* If guest supports masking, keep irqfd but mask it.
* Otherwise, clean it up now.
*/
if (proxy->vdev->guest_notifier_mask) {
proxy->vdev->guest_notifier_mask(proxy->vdev, queue_no, true);
} else {
kvm_virtio_pci_irqfd_release(proxy, vector, queue_no);
}
}
static int kvm_virtio_pci_vector_unmask(PCIDevice *dev, unsigned vector,
MSIMessage msg)
{
VirtIOPCIProxy *proxy = container_of(dev, VirtIOPCIProxy, pci_dev);
VirtIODevice *vdev = proxy->vdev;
int ret, queue_no;
for (queue_no = 0; queue_no < VIRTIO_PCI_QUEUE_MAX; queue_no++) {
for (queue_no = 0; queue_no < proxy->nvqs_with_notifiers; queue_no++) {
if (!virtio_queue_get_num(vdev, queue_no)) {
break;
}
if (virtio_queue_vector(vdev, queue_no) != vector) {
continue;
}
ret = kvm_virtio_pci_vq_vector_use(proxy, queue_no, vector, msg);
ret = kvm_virtio_pci_vq_vector_unmask(proxy, queue_no, vector, msg);
if (ret < 0) {
goto undo;
}
......@@ -554,25 +681,25 @@ undo:
if (virtio_queue_vector(vdev, queue_no) != vector) {
continue;
}
kvm_virtio_pci_vq_vector_release(proxy, queue_no, vector);
kvm_virtio_pci_vq_vector_mask(proxy, queue_no, vector);
}
return ret;
}
static void kvm_virtio_pci_vector_release(PCIDevice *dev, unsigned vector)
static void kvm_virtio_pci_vector_mask(PCIDevice *dev, unsigned vector)
{
VirtIOPCIProxy *proxy = container_of(dev, VirtIOPCIProxy, pci_dev);
VirtIODevice *vdev = proxy->vdev;
int queue_no;
for (queue_no = 0; queue_no < VIRTIO_PCI_QUEUE_MAX; queue_no++) {
for (queue_no = 0; queue_no < proxy->nvqs_with_notifiers; queue_no++) {
if (!virtio_queue_get_num(vdev, queue_no)) {
break;
}
if (virtio_queue_vector(vdev, queue_no) != vector) {
continue;
}
kvm_virtio_pci_vq_vector_release(proxy, queue_no, vector);
kvm_virtio_pci_vq_vector_mask(proxy, queue_no, vector);
}
}
......@@ -587,7 +714,7 @@ static void kvm_virtio_pci_vector_poll(PCIDevice *dev,
EventNotifier *notifier;
VirtQueue *vq;
for (queue_no = 0; queue_no < VIRTIO_PCI_QUEUE_MAX; queue_no++) {
for (queue_no = 0; queue_no < proxy->nvqs_with_notifiers; queue_no++) {
if (!virtio_queue_get_num(vdev, queue_no)) {
break;
}
......@@ -598,7 +725,11 @@ static void kvm_virtio_pci_vector_poll(PCIDevice *dev,
}
vq = virtio_get_queue(vdev, queue_no);
notifier = virtio_queue_get_guest_notifier(vq);
if (event_notifier_test_and_clear(notifier)) {
if (vdev->guest_notifier_pending) {
if (vdev->guest_notifier_pending(vdev, queue_no)) {
msix_set_pending(dev, vector);
}
} else if (event_notifier_test_and_clear(notifier)) {
msix_set_pending(dev, vector);
}
}
......@@ -631,7 +762,7 @@ static bool virtio_pci_query_guest_notifiers(DeviceState *d)
return msix_enabled(&proxy->pci_dev);
}
static int virtio_pci_set_guest_notifiers(DeviceState *d, bool assign)
static int virtio_pci_set_guest_notifiers(DeviceState *d, int nvqs, bool assign)
{
VirtIOPCIProxy *proxy = to_virtio_pci_proxy(d);
VirtIODevice *vdev = proxy->vdev;
......@@ -639,14 +770,24 @@ static int virtio_pci_set_guest_notifiers(DeviceState *d, bool assign)
bool with_irqfd = msix_enabled(&proxy->pci_dev) &&
kvm_msi_via_irqfd_enabled();
nvqs = MIN(nvqs, VIRTIO_PCI_QUEUE_MAX);
/* When deassigning, pass a consistent nvqs value
* to avoid leaking notifiers.
*/
assert(assign || nvqs == proxy->nvqs_with_notifiers);
proxy->nvqs_with_notifiers = nvqs;
/* Must unset vector notifier while guest notifier is still assigned */
if (proxy->vector_irqfd && !assign) {
msix_unset_vector_notifiers(&proxy->pci_dev);
kvm_virtio_pci_vector_release(proxy, nvqs);
g_free(proxy->vector_irqfd);
proxy->vector_irqfd = NULL;
}
for (n = 0; n < VIRTIO_PCI_QUEUE_MAX; n++) {
for (n = 0; n < nvqs; n++) {
if (!virtio_queue_get_num(vdev, n)) {
break;
}
......@@ -663,17 +804,25 @@ static int virtio_pci_set_guest_notifiers(DeviceState *d, bool assign)
proxy->vector_irqfd =
g_malloc0(sizeof(*proxy->vector_irqfd) *
msix_nr_vectors_allocated(&proxy->pci_dev));
r = kvm_virtio_pci_vector_use(proxy, nvqs);
if (r < 0) {
goto assign_error;
}
r = msix_set_vector_notifiers(&proxy->pci_dev,
kvm_virtio_pci_vector_use,
kvm_virtio_pci_vector_release,
kvm_virtio_pci_vector_unmask,
kvm_virtio_pci_vector_mask,
kvm_virtio_pci_vector_poll);
if (r < 0) {
goto assign_error;
goto notifiers_error;
}
}
return 0;
notifiers_error:
assert(assign);
kvm_virtio_pci_vector_release(proxy, nvqs);
assign_error:
/* We get here on assignment failure. Recover by undoing for VQs 0 .. n. */
assert(assign);
......
......@@ -27,6 +27,7 @@
#define VIRTIO_PCI_FLAG_USE_IOEVENTFD (1 << VIRTIO_PCI_FLAG_USE_IOEVENTFD_BIT)
typedef struct {
MSIMessage msg;
int virq;
unsigned int users;
} VirtIOIRQFD;
......@@ -51,6 +52,7 @@ typedef struct {
bool ioeventfd_disabled;
bool ioeventfd_started;
VirtIOIRQFD *vector_irqfd;
int nvqs_with_notifiers;
} VirtIOPCIProxy;
void virtio_init_pci(VirtIOPCIProxy *proxy, VirtIODevice *vdev);
......
......@@ -99,7 +99,7 @@ typedef struct {
int (*load_done)(DeviceState *d, QEMUFile *f);
unsigned (*get_features)(DeviceState *d);
bool (*query_guest_notifiers)(DeviceState *d);
int (*set_guest_notifiers)(DeviceState *d, bool assigned);
int (*set_guest_notifiers)(DeviceState *d, int nvqs, bool assigned);
int (*set_host_notifier)(DeviceState *d, int n, bool assigned);
void (*vmstate_change)(DeviceState *d, bool running);
} VirtIOBindings;
......@@ -126,6 +126,19 @@ struct VirtIODevice
void (*set_config)(VirtIODevice *vdev, const uint8_t *config);
void (*reset)(VirtIODevice *vdev);
void (*set_status)(VirtIODevice *vdev, uint8_t val);
/* Test and clear event pending status.
* Should be called after unmask to avoid losing events.
* If backend does not support masking,
* must check in frontend instead.
*/
bool (*guest_notifier_pending)(VirtIODevice *vdev, int n);
/* Mask/unmask events from this vq. Any events reported
* while masked will become pending.
* If backend does not support masking,
* must mask in frontend instead.
*/
void (*guest_notifier_mask)(VirtIODevice *vdev, int n, bool mask);
VirtQueue *vq;
const VirtIOBindings *binding;
DeviceState *binding_opaque;
......
......@@ -131,6 +131,11 @@ void kvm_irqchip_release_virq(KVMState *s, int virq)
{
}
int kvm_irqchip_update_msi_route(KVMState *s, int virq, MSIMessage msg)
{
return -ENOSYS;
}
int kvm_irqchip_add_irqfd_notifier(KVMState *s, EventNotifier *n, int virq)
{
return -ENOSYS;
......
PCI IDs for qemu
================
Red Hat, Inc. donates a part of its device ID range to qemu, to be used for
virtual devices. The vendor ID is 1af4 (formerly Qumranet ID).
The 1000 -> 10ff device ID range is used for VirtIO devices.
The 1100 device ID is used as PCI Subsystem ID for existing hardware
devices emulated by qemu.
All other device IDs are reserved.
VirtIO Device IDs
-----------------
1af4:1000 network device
1af4:1001 block device
1af4:1002 balloon device
1af4:1003 console device
1af4:1004 Reserved.
to Contact Gerd Hoffmann <kraxel@redhat.com> to get a
1af4:10ef device ID assigned for your new virtio device.
1af4:10f0 Available for experimental usage without registration. Must get
to official ID when the code leaves the test lab (i.e. when seeking
1af4:10ff upstream merge or shipping a distro/product) to avoid conflicts.
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册