提交 53a259da 编写于 作者: P Peter Maydell

Merge remote-tracking branch 'remotes/awilliam/tags/vfio-pci-for-qemu-20140630.0' into staging

VFIO patches: MSI-X masking performance fix, Endian fixes, fix runstate on device error

# gpg: Signature made Mon 30 Jun 2014 18:13:40 BST using RSA key ID 3BB08B22
# gpg: Can't check signature: public key not found

* remotes/awilliam/tags/vfio-pci-for-qemu-20140630.0:
  vfio: use correct runstate
  vfio: Make BARs native endian
  vfio-pci: Fix MSI-X masking performance
  vfio-pci: Fix MSI/X debug code
Signed-off-by: NPeter Maydell <peter.maydell@linaro.org>
...@@ -121,6 +121,7 @@ typedef struct VFIOINTx { ...@@ -121,6 +121,7 @@ typedef struct VFIOINTx {
typedef struct VFIOMSIVector { typedef struct VFIOMSIVector {
EventNotifier interrupt; /* eventfd triggered on interrupt */ EventNotifier interrupt; /* eventfd triggered on interrupt */
EventNotifier kvm_interrupt; /* eventfd triggered for KVM irqfd bypass */
struct VFIODevice *vdev; /* back pointer to device */ struct VFIODevice *vdev; /* back pointer to device */
MSIMessage msg; /* cache the MSI message so we know when it changes */ MSIMessage msg; /* cache the MSI message so we know when it changes */
int virq; /* KVM irqchip route for QEMU bypass */ int virq; /* KVM irqchip route for QEMU bypass */
...@@ -642,9 +643,9 @@ static void vfio_msi_interrupt(void *opaque) ...@@ -642,9 +643,9 @@ static void vfio_msi_interrupt(void *opaque)
MSIMessage msg; MSIMessage msg;
if (vdev->interrupt == VFIO_INT_MSIX) { if (vdev->interrupt == VFIO_INT_MSIX) {
msg = msi_get_message(&vdev->pdev, nr);
} else if (vdev->interrupt == VFIO_INT_MSI) {
msg = msix_get_message(&vdev->pdev, nr); msg = msix_get_message(&vdev->pdev, nr);
} else if (vdev->interrupt == VFIO_INT_MSI) {
msg = msi_get_message(&vdev->pdev, nr);
} else { } else {
abort(); abort();
} }
...@@ -682,10 +683,11 @@ static int vfio_enable_vectors(VFIODevice *vdev, bool msix) ...@@ -682,10 +683,11 @@ static int vfio_enable_vectors(VFIODevice *vdev, bool msix)
for (i = 0; i < vdev->nr_vectors; i++) { for (i = 0; i < vdev->nr_vectors; i++) {
if (!vdev->msi_vectors[i].use) { if (!vdev->msi_vectors[i].use) {
fds[i] = -1; fds[i] = -1;
continue; } else if (vdev->msi_vectors[i].virq >= 0) {
fds[i] = event_notifier_get_fd(&vdev->msi_vectors[i].kvm_interrupt);
} else {
fds[i] = event_notifier_get_fd(&vdev->msi_vectors[i].interrupt);
} }
fds[i] = event_notifier_get_fd(&vdev->msi_vectors[i].interrupt);
} }
ret = ioctl(vdev->fd, VFIO_DEVICE_SET_IRQS, irq_set); ret = ioctl(vdev->fd, VFIO_DEVICE_SET_IRQS, irq_set);
...@@ -695,6 +697,52 @@ static int vfio_enable_vectors(VFIODevice *vdev, bool msix) ...@@ -695,6 +697,52 @@ static int vfio_enable_vectors(VFIODevice *vdev, bool msix)
return ret; return ret;
} }
static void vfio_add_kvm_msi_virq(VFIOMSIVector *vector, MSIMessage *msg,
bool msix)
{
int virq;
if ((msix && !VFIO_ALLOW_KVM_MSIX) ||
(!msix && !VFIO_ALLOW_KVM_MSI) || !msg) {
return;
}
if (event_notifier_init(&vector->kvm_interrupt, 0)) {
return;
}
virq = kvm_irqchip_add_msi_route(kvm_state, *msg);
if (virq < 0) {
event_notifier_cleanup(&vector->kvm_interrupt);
return;
}
if (kvm_irqchip_add_irqfd_notifier(kvm_state, &vector->kvm_interrupt,
NULL, virq) < 0) {
kvm_irqchip_release_virq(kvm_state, virq);
event_notifier_cleanup(&vector->kvm_interrupt);
return;
}
vector->msg = *msg;
vector->virq = virq;
}
static void vfio_remove_kvm_msi_virq(VFIOMSIVector *vector)
{
kvm_irqchip_remove_irqfd_notifier(kvm_state, &vector->kvm_interrupt,
vector->virq);
kvm_irqchip_release_virq(kvm_state, vector->virq);
vector->virq = -1;
event_notifier_cleanup(&vector->kvm_interrupt);
}
static void vfio_update_kvm_msi_virq(VFIOMSIVector *vector, MSIMessage msg)
{
kvm_irqchip_update_msi_route(kvm_state, vector->virq, msg);
vector->msg = msg;
}
static int vfio_msix_vector_do_use(PCIDevice *pdev, unsigned int nr, static int vfio_msix_vector_do_use(PCIDevice *pdev, unsigned int nr,
MSIMessage *msg, IOHandler *handler) MSIMessage *msg, IOHandler *handler)
{ {
...@@ -707,30 +755,32 @@ static int vfio_msix_vector_do_use(PCIDevice *pdev, unsigned int nr, ...@@ -707,30 +755,32 @@ static int vfio_msix_vector_do_use(PCIDevice *pdev, unsigned int nr,
vdev->host.function, nr); vdev->host.function, nr);
vector = &vdev->msi_vectors[nr]; vector = &vdev->msi_vectors[nr];
vector->vdev = vdev;
vector->use = true;
msix_vector_use(pdev, nr);
if (event_notifier_init(&vector->interrupt, 0)) { if (!vector->use) {
error_report("vfio: Error: event_notifier_init failed"); vector->vdev = vdev;
vector->virq = -1;
if (event_notifier_init(&vector->interrupt, 0)) {
error_report("vfio: Error: event_notifier_init failed");
}
vector->use = true;
msix_vector_use(pdev, nr);
} }
qemu_set_fd_handler(event_notifier_get_fd(&vector->interrupt),
handler, NULL, vector);
/* /*
* Attempt to enable route through KVM irqchip, * Attempt to enable route through KVM irqchip,
* default to userspace handling if unavailable. * default to userspace handling if unavailable.
*/ */
vector->virq = msg && VFIO_ALLOW_KVM_MSIX ? if (vector->virq >= 0) {
kvm_irqchip_add_msi_route(kvm_state, *msg) : -1; if (!msg) {
if (vector->virq < 0 || vfio_remove_kvm_msi_virq(vector);
kvm_irqchip_add_irqfd_notifier(kvm_state, &vector->interrupt, } else {
NULL, vector->virq) < 0) { vfio_update_kvm_msi_virq(vector, *msg);
if (vector->virq >= 0) {
kvm_irqchip_release_virq(kvm_state, vector->virq);
vector->virq = -1;
} }
qemu_set_fd_handler(event_notifier_get_fd(&vector->interrupt), } else {
handler, NULL, vector); vfio_add_kvm_msi_virq(vector, msg, true);
} }
/* /*
...@@ -761,7 +811,11 @@ static int vfio_msix_vector_do_use(PCIDevice *pdev, unsigned int nr, ...@@ -761,7 +811,11 @@ static int vfio_msix_vector_do_use(PCIDevice *pdev, unsigned int nr,
irq_set->count = 1; irq_set->count = 1;
pfd = (int32_t *)&irq_set->data; pfd = (int32_t *)&irq_set->data;
*pfd = event_notifier_get_fd(&vector->interrupt); if (vector->virq >= 0) {
*pfd = event_notifier_get_fd(&vector->kvm_interrupt);
} else {
*pfd = event_notifier_get_fd(&vector->interrupt);
}
ret = ioctl(vdev->fd, VFIO_DEVICE_SET_IRQS, irq_set); ret = ioctl(vdev->fd, VFIO_DEVICE_SET_IRQS, irq_set);
g_free(irq_set); g_free(irq_set);
...@@ -783,50 +837,41 @@ static void vfio_msix_vector_release(PCIDevice *pdev, unsigned int nr) ...@@ -783,50 +837,41 @@ static void vfio_msix_vector_release(PCIDevice *pdev, unsigned int nr)
{ {
VFIODevice *vdev = DO_UPCAST(VFIODevice, pdev, pdev); VFIODevice *vdev = DO_UPCAST(VFIODevice, pdev, pdev);
VFIOMSIVector *vector = &vdev->msi_vectors[nr]; VFIOMSIVector *vector = &vdev->msi_vectors[nr];
int argsz;
struct vfio_irq_set *irq_set;
int32_t *pfd;
DPRINTF("%s(%04x:%02x:%02x.%x) vector %d released\n", __func__, DPRINTF("%s(%04x:%02x:%02x.%x) vector %d released\n", __func__,
vdev->host.domain, vdev->host.bus, vdev->host.slot, vdev->host.domain, vdev->host.bus, vdev->host.slot,
vdev->host.function, nr); vdev->host.function, nr);
/* /*
* XXX What's the right thing to do here? This turns off the interrupt * There are still old guests that mask and unmask vectors on every
* completely, but do we really just want to switch the interrupt to * interrupt. If we're using QEMU bypass with a KVM irqfd, leave all of
* bouncing through userspace and let msix.c drop it? Not sure. * the KVM setup in place, simply switch VFIO to use the non-bypass
* eventfd. We'll then fire the interrupt through QEMU and the MSI-X
* core will mask the interrupt and set pending bits, allowing it to
* be re-asserted on unmask. Nothing to do if already using QEMU mode.
*/ */
msix_vector_unuse(pdev, nr); if (vector->virq >= 0) {
int argsz;
argsz = sizeof(*irq_set) + sizeof(*pfd); struct vfio_irq_set *irq_set;
int32_t *pfd;
irq_set = g_malloc0(argsz); argsz = sizeof(*irq_set) + sizeof(*pfd);
irq_set->argsz = argsz;
irq_set->flags = VFIO_IRQ_SET_DATA_EVENTFD |
VFIO_IRQ_SET_ACTION_TRIGGER;
irq_set->index = VFIO_PCI_MSIX_IRQ_INDEX;
irq_set->start = nr;
irq_set->count = 1;
pfd = (int32_t *)&irq_set->data;
*pfd = -1; irq_set = g_malloc0(argsz);
irq_set->argsz = argsz;
irq_set->flags = VFIO_IRQ_SET_DATA_EVENTFD |
VFIO_IRQ_SET_ACTION_TRIGGER;
irq_set->index = VFIO_PCI_MSIX_IRQ_INDEX;
irq_set->start = nr;
irq_set->count = 1;
pfd = (int32_t *)&irq_set->data;
ioctl(vdev->fd, VFIO_DEVICE_SET_IRQS, irq_set); *pfd = event_notifier_get_fd(&vector->interrupt);
g_free(irq_set); ioctl(vdev->fd, VFIO_DEVICE_SET_IRQS, irq_set);
if (vector->virq < 0) { g_free(irq_set);
qemu_set_fd_handler(event_notifier_get_fd(&vector->interrupt),
NULL, NULL, NULL);
} else {
kvm_irqchip_remove_irqfd_notifier(kvm_state, &vector->interrupt,
vector->virq);
kvm_irqchip_release_virq(kvm_state, vector->virq);
vector->virq = -1;
} }
event_notifier_cleanup(&vector->interrupt);
vector->use = false;
} }
static void vfio_enable_msix(VFIODevice *vdev) static void vfio_enable_msix(VFIODevice *vdev)
...@@ -876,28 +921,28 @@ retry: ...@@ -876,28 +921,28 @@ retry:
VFIOMSIVector *vector = &vdev->msi_vectors[i]; VFIOMSIVector *vector = &vdev->msi_vectors[i];
vector->vdev = vdev; vector->vdev = vdev;
vector->virq = -1;
vector->use = true; vector->use = true;
if (event_notifier_init(&vector->interrupt, 0)) { if (event_notifier_init(&vector->interrupt, 0)) {
error_report("vfio: Error: event_notifier_init failed"); error_report("vfio: Error: event_notifier_init failed");
} }
qemu_set_fd_handler(event_notifier_get_fd(&vector->interrupt),
vfio_msi_interrupt, NULL, vector);
vector->msg = msi_get_message(&vdev->pdev, i); vector->msg = msi_get_message(&vdev->pdev, i);
/* /*
* Attempt to enable route through KVM irqchip, * Attempt to enable route through KVM irqchip,
* default to userspace handling if unavailable. * default to userspace handling if unavailable.
*/ */
vector->virq = VFIO_ALLOW_KVM_MSI ? vfio_add_kvm_msi_virq(vector, &vector->msg, false);
kvm_irqchip_add_msi_route(kvm_state, vector->msg) : -1;
if (vector->virq < 0 ||
kvm_irqchip_add_irqfd_notifier(kvm_state, &vector->interrupt,
NULL, vector->virq) < 0) {
qemu_set_fd_handler(event_notifier_get_fd(&vector->interrupt),
vfio_msi_interrupt, NULL, vector);
}
} }
/* Set interrupt type prior to possible interrupts */
vdev->interrupt = VFIO_INT_MSI;
ret = vfio_enable_vectors(vdev, false); ret = vfio_enable_vectors(vdev, false);
if (ret) { if (ret) {
if (ret < 0) { if (ret < 0) {
...@@ -910,14 +955,10 @@ retry: ...@@ -910,14 +955,10 @@ retry:
for (i = 0; i < vdev->nr_vectors; i++) { for (i = 0; i < vdev->nr_vectors; i++) {
VFIOMSIVector *vector = &vdev->msi_vectors[i]; VFIOMSIVector *vector = &vdev->msi_vectors[i];
if (vector->virq >= 0) { if (vector->virq >= 0) {
kvm_irqchip_remove_irqfd_notifier(kvm_state, &vector->interrupt, vfio_remove_kvm_msi_virq(vector);
vector->virq);
kvm_irqchip_release_virq(kvm_state, vector->virq);
vector->virq = -1;
} else {
qemu_set_fd_handler(event_notifier_get_fd(&vector->interrupt),
NULL, NULL, NULL);
} }
qemu_set_fd_handler(event_notifier_get_fd(&vector->interrupt),
NULL, NULL, NULL);
event_notifier_cleanup(&vector->interrupt); event_notifier_cleanup(&vector->interrupt);
} }
...@@ -929,11 +970,17 @@ retry: ...@@ -929,11 +970,17 @@ retry:
} }
vdev->nr_vectors = 0; vdev->nr_vectors = 0;
/*
* Failing to setup MSI doesn't really fall within any specification.
* Let's try leaving interrupts disabled and hope the guest figures
* out to fall back to INTx for this device.
*/
error_report("vfio: Error: Failed to enable MSI");
vdev->interrupt = VFIO_INT_NONE;
return; return;
} }
vdev->interrupt = VFIO_INT_MSI;
DPRINTF("%s(%04x:%02x:%02x.%x) Enabled %d MSI vectors\n", __func__, DPRINTF("%s(%04x:%02x:%02x.%x) Enabled %d MSI vectors\n", __func__,
vdev->host.domain, vdev->host.bus, vdev->host.slot, vdev->host.domain, vdev->host.bus, vdev->host.slot,
vdev->host.function, vdev->nr_vectors); vdev->host.function, vdev->nr_vectors);
...@@ -941,6 +988,20 @@ retry: ...@@ -941,6 +988,20 @@ retry:
static void vfio_disable_msi_common(VFIODevice *vdev) static void vfio_disable_msi_common(VFIODevice *vdev)
{ {
int i;
for (i = 0; i < vdev->nr_vectors; i++) {
VFIOMSIVector *vector = &vdev->msi_vectors[i];
if (vdev->msi_vectors[i].use) {
if (vector->virq >= 0) {
vfio_remove_kvm_msi_virq(vector);
}
qemu_set_fd_handler(event_notifier_get_fd(&vector->interrupt),
NULL, NULL, NULL);
event_notifier_cleanup(&vector->interrupt);
}
}
g_free(vdev->msi_vectors); g_free(vdev->msi_vectors);
vdev->msi_vectors = NULL; vdev->msi_vectors = NULL;
vdev->nr_vectors = 0; vdev->nr_vectors = 0;
...@@ -962,6 +1023,7 @@ static void vfio_disable_msix(VFIODevice *vdev) ...@@ -962,6 +1023,7 @@ static void vfio_disable_msix(VFIODevice *vdev)
for (i = 0; i < vdev->nr_vectors; i++) { for (i = 0; i < vdev->nr_vectors; i++) {
if (vdev->msi_vectors[i].use) { if (vdev->msi_vectors[i].use) {
vfio_msix_vector_release(&vdev->pdev, i); vfio_msix_vector_release(&vdev->pdev, i);
msix_vector_unuse(&vdev->pdev, i);
} }
} }
...@@ -977,30 +1039,7 @@ static void vfio_disable_msix(VFIODevice *vdev) ...@@ -977,30 +1039,7 @@ static void vfio_disable_msix(VFIODevice *vdev)
static void vfio_disable_msi(VFIODevice *vdev) static void vfio_disable_msi(VFIODevice *vdev)
{ {
int i;
vfio_disable_irqindex(vdev, VFIO_PCI_MSI_IRQ_INDEX); vfio_disable_irqindex(vdev, VFIO_PCI_MSI_IRQ_INDEX);
for (i = 0; i < vdev->nr_vectors; i++) {
VFIOMSIVector *vector = &vdev->msi_vectors[i];
if (!vector->use) {
continue;
}
if (vector->virq >= 0) {
kvm_irqchip_remove_irqfd_notifier(kvm_state,
&vector->interrupt, vector->virq);
kvm_irqchip_release_virq(kvm_state, vector->virq);
vector->virq = -1;
} else {
qemu_set_fd_handler(event_notifier_get_fd(&vector->interrupt),
NULL, NULL, NULL);
}
event_notifier_cleanup(&vector->interrupt);
}
vfio_disable_msi_common(vdev); vfio_disable_msi_common(vdev);
DPRINTF("%s(%04x:%02x:%02x.%x)\n", __func__, vdev->host.domain, DPRINTF("%s(%04x:%02x:%02x.%x)\n", __func__, vdev->host.domain,
...@@ -1020,17 +1059,7 @@ static void vfio_update_msi(VFIODevice *vdev) ...@@ -1020,17 +1059,7 @@ static void vfio_update_msi(VFIODevice *vdev)
} }
msg = msi_get_message(&vdev->pdev, i); msg = msi_get_message(&vdev->pdev, i);
vfio_update_kvm_msi_virq(vector, msg);
if (msg.address != vector->msg.address ||
msg.data != vector->msg.data) {
DPRINTF("%s(%04x:%02x:%02x.%x) MSI vector %d changed\n",
__func__, vdev->host.domain, vdev->host.bus,
vdev->host.slot, vdev->host.function, i);
kvm_irqchip_update_msi_route(kvm_state, vector->virq, msg);
vector->msg = msg;
}
} }
} }
...@@ -1053,10 +1082,10 @@ static void vfio_bar_write(void *opaque, hwaddr addr, ...@@ -1053,10 +1082,10 @@ static void vfio_bar_write(void *opaque, hwaddr addr,
buf.byte = data; buf.byte = data;
break; break;
case 2: case 2:
buf.word = cpu_to_le16(data); buf.word = data;
break; break;
case 4: case 4:
buf.dword = cpu_to_le32(data); buf.dword = data;
break; break;
default: default:
hw_error("vfio: unsupported write size, %d bytes", size); hw_error("vfio: unsupported write size, %d bytes", size);
...@@ -1113,10 +1142,10 @@ static uint64_t vfio_bar_read(void *opaque, ...@@ -1113,10 +1142,10 @@ static uint64_t vfio_bar_read(void *opaque,
data = buf.byte; data = buf.byte;
break; break;
case 2: case 2:
data = le16_to_cpu(buf.word); data = buf.word;
break; break;
case 4: case 4:
data = le32_to_cpu(buf.dword); data = buf.dword;
break; break;
default: default:
hw_error("vfio: unsupported read size, %d bytes", size); hw_error("vfio: unsupported read size, %d bytes", size);
...@@ -1143,7 +1172,7 @@ static uint64_t vfio_bar_read(void *opaque, ...@@ -1143,7 +1172,7 @@ static uint64_t vfio_bar_read(void *opaque,
static const MemoryRegionOps vfio_bar_ops = { static const MemoryRegionOps vfio_bar_ops = {
.read = vfio_bar_read, .read = vfio_bar_read,
.write = vfio_bar_write, .write = vfio_bar_write,
.endianness = DEVICE_LITTLE_ENDIAN, .endianness = DEVICE_NATIVE_ENDIAN,
}; };
static void vfio_pci_load_rom(VFIODevice *vdev) static void vfio_pci_load_rom(VFIODevice *vdev)
...@@ -1205,21 +1234,42 @@ static void vfio_pci_load_rom(VFIODevice *vdev) ...@@ -1205,21 +1234,42 @@ static void vfio_pci_load_rom(VFIODevice *vdev)
static uint64_t vfio_rom_read(void *opaque, hwaddr addr, unsigned size) static uint64_t vfio_rom_read(void *opaque, hwaddr addr, unsigned size)
{ {
VFIODevice *vdev = opaque; VFIODevice *vdev = opaque;
uint64_t val = ((uint64_t)1 << (size * 8)) - 1; union {
uint8_t byte;
uint16_t word;
uint32_t dword;
uint64_t qword;
} buf;
uint64_t data = 0;
/* Load the ROM lazily when the guest tries to read it */ /* Load the ROM lazily when the guest tries to read it */
if (unlikely(!vdev->rom && !vdev->rom_read_failed)) { if (unlikely(!vdev->rom && !vdev->rom_read_failed)) {
vfio_pci_load_rom(vdev); vfio_pci_load_rom(vdev);
} }
memcpy(&val, vdev->rom + addr, memcpy(&buf, vdev->rom + addr,
(addr < vdev->rom_size) ? MIN(size, vdev->rom_size - addr) : 0); (addr < vdev->rom_size) ? MIN(size, vdev->rom_size - addr) : 0);
switch (size) {
case 1:
data = buf.byte;
break;
case 2:
data = buf.word;
break;
case 4:
data = buf.dword;
break;
default:
hw_error("vfio: unsupported read size, %d bytes", size);
break;
}
DPRINTF("%s(%04x:%02x:%02x.%x, 0x%"HWADDR_PRIx", 0x%x) = 0x%"PRIx64"\n", DPRINTF("%s(%04x:%02x:%02x.%x, 0x%"HWADDR_PRIx", 0x%x) = 0x%"PRIx64"\n",
__func__, vdev->host.domain, vdev->host.bus, vdev->host.slot, __func__, vdev->host.domain, vdev->host.bus, vdev->host.slot,
vdev->host.function, addr, size, val); vdev->host.function, addr, size, data);
return val; return data;
} }
static void vfio_rom_write(void *opaque, hwaddr addr, static void vfio_rom_write(void *opaque, hwaddr addr,
...@@ -1230,7 +1280,7 @@ static void vfio_rom_write(void *opaque, hwaddr addr, ...@@ -1230,7 +1280,7 @@ static void vfio_rom_write(void *opaque, hwaddr addr,
static const MemoryRegionOps vfio_rom_ops = { static const MemoryRegionOps vfio_rom_ops = {
.read = vfio_rom_read, .read = vfio_rom_read,
.write = vfio_rom_write, .write = vfio_rom_write,
.endianness = DEVICE_LITTLE_ENDIAN, .endianness = DEVICE_NATIVE_ENDIAN,
}; };
static bool vfio_blacklist_opt_rom(VFIODevice *vdev) static bool vfio_blacklist_opt_rom(VFIODevice *vdev)
...@@ -4012,7 +4062,7 @@ static void vfio_err_notifier_handler(void *opaque) ...@@ -4012,7 +4062,7 @@ static void vfio_err_notifier_handler(void *opaque)
__func__, vdev->host.domain, vdev->host.bus, __func__, vdev->host.domain, vdev->host.bus,
vdev->host.slot, vdev->host.function); vdev->host.slot, vdev->host.function);
vm_stop(RUN_STATE_IO_ERROR); vm_stop(RUN_STATE_INTERNAL_ERROR);
} }
/* /*
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册