diff --git a/drivers/vfio/Kconfig b/drivers/vfio/Kconfig index af7b204b921555ec980d64b09b26543d8932c890..d8c57636b9ce8750d128169e85b1c9ce3094219a 100644 --- a/drivers/vfio/Kconfig +++ b/drivers/vfio/Kconfig @@ -8,11 +8,17 @@ config VFIO_IOMMU_SPAPR_TCE depends on VFIO && SPAPR_TCE_IOMMU default n +config VFIO_SPAPR_EEH + tristate + depends on EEH && VFIO_IOMMU_SPAPR_TCE + default n + menuconfig VFIO tristate "VFIO Non-Privileged userspace driver framework" depends on IOMMU_API select VFIO_IOMMU_TYPE1 if X86 select VFIO_IOMMU_SPAPR_TCE if (PPC_POWERNV || PPC_PSERIES) + select VFIO_SPAPR_EEH if (PPC_POWERNV || PPC_PSERIES) select ANON_INODES help VFIO provides a framework for secure userspace device drivers. diff --git a/drivers/vfio/Makefile b/drivers/vfio/Makefile index 50e30bc75e855f6564bd10a35d88a4040b6ee3e7..0b035b12600a7ba6aeaa8d16367fdfbe4f78c9da 100644 --- a/drivers/vfio/Makefile +++ b/drivers/vfio/Makefile @@ -1,5 +1,5 @@ obj-$(CONFIG_VFIO) += vfio.o obj-$(CONFIG_VFIO_IOMMU_TYPE1) += vfio_iommu_type1.o obj-$(CONFIG_VFIO_IOMMU_SPAPR_TCE) += vfio_iommu_spapr_tce.o -obj-$(CONFIG_EEH) += vfio_spapr_eeh.o +obj-$(CONFIG_VFIO_SPAPR_EEH) += vfio_spapr_eeh.o obj-$(CONFIG_VFIO_PCI) += pci/ diff --git a/drivers/vfio/pci/vfio_pci.c b/drivers/vfio/pci/vfio_pci.c index e2ee80f36e3ea2d4a845e1e7b22ce47b070dc9c8..f7825332a3251b2001645b581eec09524562fbec 100644 --- a/drivers/vfio/pci/vfio_pci.c +++ b/drivers/vfio/pci/vfio_pci.c @@ -37,6 +37,10 @@ module_param_named(nointxmask, nointxmask, bool, S_IRUGO | S_IWUSR); MODULE_PARM_DESC(nointxmask, "Disable support for PCI 2.3 style INTx masking. If this resolves problems for specific devices, report lspci -vvvxxx to linux-pci@vger.kernel.org so the device can be fixed automatically via the broken_intx_masking flag."); +static DEFINE_MUTEX(driver_lock); + +static void vfio_pci_try_bus_reset(struct vfio_pci_device *vdev); + static int vfio_pci_enable(struct vfio_pci_device *vdev) { struct pci_dev *pdev = vdev->pdev; @@ -44,6 +48,9 @@ static int vfio_pci_enable(struct vfio_pci_device *vdev) u16 cmd; u8 msix_pos; + /* Don't allow our initial saved state to include busmaster */ + pci_clear_master(pdev); + ret = pci_enable_device(pdev); if (ret) return ret; @@ -99,7 +106,8 @@ static void vfio_pci_disable(struct vfio_pci_device *vdev) struct pci_dev *pdev = vdev->pdev; int bar; - pci_disable_device(pdev); + /* Stop the device from further DMA */ + pci_clear_master(pdev); vfio_pci_set_irqs_ioctl(vdev, VFIO_IRQ_SET_DATA_NONE | VFIO_IRQ_SET_ACTION_TRIGGER, @@ -117,6 +125,8 @@ static void vfio_pci_disable(struct vfio_pci_device *vdev) vdev->barmap[bar] = NULL; } + vdev->needs_reset = true; + /* * If we have saved state, restore it. If we can reset the device, * even better. Resetting with current state seems better than @@ -128,7 +138,7 @@ static void vfio_pci_disable(struct vfio_pci_device *vdev) __func__, dev_name(&pdev->dev)); if (!vdev->reset_works) - return; + goto out; pci_save_state(pdev); } @@ -148,46 +158,55 @@ static void vfio_pci_disable(struct vfio_pci_device *vdev) if (ret) pr_warn("%s: Failed to reset device %s (%d)\n", __func__, dev_name(&pdev->dev), ret); + else + vdev->needs_reset = false; } pci_restore_state(pdev); +out: + pci_disable_device(pdev); + + vfio_pci_try_bus_reset(vdev); } static void vfio_pci_release(void *device_data) { struct vfio_pci_device *vdev = device_data; - if (atomic_dec_and_test(&vdev->refcnt)) { + mutex_lock(&driver_lock); + + if (!(--vdev->refcnt)) { vfio_spapr_pci_eeh_release(vdev->pdev); vfio_pci_disable(vdev); } + mutex_unlock(&driver_lock); + module_put(THIS_MODULE); } static int vfio_pci_open(void *device_data) { struct vfio_pci_device *vdev = device_data; - int ret; + int ret = 0; if (!try_module_get(THIS_MODULE)) return -ENODEV; - if (atomic_inc_return(&vdev->refcnt) == 1) { + mutex_lock(&driver_lock); + + if (!vdev->refcnt) { ret = vfio_pci_enable(vdev); if (ret) goto error; - ret = vfio_spapr_pci_eeh_open(vdev->pdev); - if (ret) { - vfio_pci_disable(vdev); - goto error; - } + vfio_spapr_pci_eeh_open(vdev->pdev); } - - return 0; + vdev->refcnt++; error: - module_put(THIS_MODULE); + mutex_unlock(&driver_lock); + if (ret) + module_put(THIS_MODULE); return ret; } @@ -843,7 +862,6 @@ static int vfio_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id) vdev->irq_type = VFIO_PCI_NUM_IRQS; mutex_init(&vdev->igate); spin_lock_init(&vdev->irqlock); - atomic_set(&vdev->refcnt, 0); ret = vfio_add_group_dev(&pdev->dev, &vfio_pci_ops, vdev); if (ret) { @@ -858,12 +876,15 @@ static void vfio_pci_remove(struct pci_dev *pdev) { struct vfio_pci_device *vdev; + mutex_lock(&driver_lock); + vdev = vfio_del_group_dev(&pdev->dev); - if (!vdev) - return; + if (vdev) { + iommu_group_put(pdev->dev.iommu_group); + kfree(vdev); + } - iommu_group_put(pdev->dev.iommu_group); - kfree(vdev); + mutex_unlock(&driver_lock); } static pci_ers_result_t vfio_pci_aer_err_detected(struct pci_dev *pdev, @@ -906,6 +927,110 @@ static struct pci_driver vfio_pci_driver = { .err_handler = &vfio_err_handlers, }; +/* + * Test whether a reset is necessary and possible. We mark devices as + * needs_reset when they are released, but don't have a function-local reset + * available. If any of these exist in the affected devices, we want to do + * a bus/slot reset. We also need all of the affected devices to be unused, + * so we abort if any device has a non-zero refcnt. driver_lock prevents a + * device from being opened during the scan or unbound from vfio-pci. + */ +static int vfio_pci_test_bus_reset(struct pci_dev *pdev, void *data) +{ + bool *needs_reset = data; + struct pci_driver *pci_drv = ACCESS_ONCE(pdev->driver); + int ret = -EBUSY; + + if (pci_drv == &vfio_pci_driver) { + struct vfio_device *device; + struct vfio_pci_device *vdev; + + device = vfio_device_get_from_dev(&pdev->dev); + if (!device) + return ret; + + vdev = vfio_device_data(device); + if (vdev) { + if (vdev->needs_reset) + *needs_reset = true; + + if (!vdev->refcnt) + ret = 0; + } + + vfio_device_put(device); + } + + /* + * TODO: vfio-core considers groups to be viable even if some devices + * are attached to known drivers, like pci-stub or pcieport. We can't + * freeze devices from being unbound to those drivers like we can + * here though, so it would be racy to test for them. We also can't + * use device_lock() to prevent changes as that would interfere with + * PCI-core taking device_lock during bus reset. For now, we require + * devices to be bound to vfio-pci to get a bus/slot reset on release. + */ + + return ret; +} + +/* Clear needs_reset on all affected devices after successful bus/slot reset */ +static int vfio_pci_clear_needs_reset(struct pci_dev *pdev, void *data) +{ + struct pci_driver *pci_drv = ACCESS_ONCE(pdev->driver); + + if (pci_drv == &vfio_pci_driver) { + struct vfio_device *device; + struct vfio_pci_device *vdev; + + device = vfio_device_get_from_dev(&pdev->dev); + if (!device) + return 0; + + vdev = vfio_device_data(device); + if (vdev) + vdev->needs_reset = false; + + vfio_device_put(device); + } + + return 0; +} + +/* + * Attempt to do a bus/slot reset if there are devices affected by a reset for + * this device that are needs_reset and all of the affected devices are unused + * (!refcnt). Callers of this function are required to hold driver_lock such + * that devices can not be unbound from vfio-pci or opened by a user while we + * test for and perform a bus/slot reset. + */ +static void vfio_pci_try_bus_reset(struct vfio_pci_device *vdev) +{ + bool needs_reset = false, slot = false; + int ret; + + if (!pci_probe_reset_slot(vdev->pdev->slot)) + slot = true; + else if (pci_probe_reset_bus(vdev->pdev->bus)) + return; + + if (vfio_pci_for_each_slot_or_bus(vdev->pdev, + vfio_pci_test_bus_reset, + &needs_reset, slot) || !needs_reset) + return; + + if (slot) + ret = pci_try_reset_slot(vdev->pdev->slot); + else + ret = pci_try_reset_bus(vdev->pdev->bus); + + if (ret) + return; + + vfio_pci_for_each_slot_or_bus(vdev->pdev, + vfio_pci_clear_needs_reset, NULL, slot); +} + static void __exit vfio_pci_cleanup(void) { pci_unregister_driver(&vfio_pci_driver); diff --git a/drivers/vfio/pci/vfio_pci_private.h b/drivers/vfio/pci/vfio_pci_private.h index 9c6d5d0f3b02db8cfb1acdd418b4a5abb44e12e5..671c17a6e6d029dfdffe5d7243150ed757e44cf4 100644 --- a/drivers/vfio/pci/vfio_pci_private.h +++ b/drivers/vfio/pci/vfio_pci_private.h @@ -54,8 +54,9 @@ struct vfio_pci_device { bool extended_caps; bool bardirty; bool has_vga; + bool needs_reset; struct pci_saved_state *pci_saved_state; - atomic_t refcnt; + int refcnt; struct eventfd_ctx *err_trigger; }; diff --git a/drivers/vfio/vfio_spapr_eeh.c b/drivers/vfio/vfio_spapr_eeh.c index f834b4ce1431b133c1fa5aa6c7f6187c420a0ad8..86dfceb9201f2503e2050dc71c8906fcd8abf281 100644 --- a/drivers/vfio/vfio_spapr_eeh.c +++ b/drivers/vfio/vfio_spapr_eeh.c @@ -9,20 +9,27 @@ * published by the Free Software Foundation. */ +#include #include #include #include +#define DRIVER_VERSION "0.1" +#define DRIVER_AUTHOR "Gavin Shan, IBM Corporation" +#define DRIVER_DESC "VFIO IOMMU SPAPR EEH" + /* We might build address mapping here for "fast" path later */ -int vfio_spapr_pci_eeh_open(struct pci_dev *pdev) +void vfio_spapr_pci_eeh_open(struct pci_dev *pdev) { - return eeh_dev_open(pdev); + eeh_dev_open(pdev); } +EXPORT_SYMBOL_GPL(vfio_spapr_pci_eeh_open); void vfio_spapr_pci_eeh_release(struct pci_dev *pdev) { eeh_dev_release(pdev); } +EXPORT_SYMBOL_GPL(vfio_spapr_pci_eeh_release); long vfio_spapr_iommu_eeh_ioctl(struct iommu_group *group, unsigned int cmd, unsigned long arg) @@ -85,3 +92,9 @@ long vfio_spapr_iommu_eeh_ioctl(struct iommu_group *group, return ret; } +EXPORT_SYMBOL(vfio_spapr_iommu_eeh_ioctl); + +MODULE_VERSION(DRIVER_VERSION); +MODULE_LICENSE("GPL v2"); +MODULE_AUTHOR(DRIVER_AUTHOR); +MODULE_DESCRIPTION(DRIVER_DESC); diff --git a/include/linux/vfio.h b/include/linux/vfio.h index 25a0fbd4b998f7572b8d8d705faf761b5c20b5fe..d3204115f15d21dd7ef3d879df2393884795b037 100644 --- a/include/linux/vfio.h +++ b/include/linux/vfio.h @@ -98,16 +98,16 @@ extern int vfio_external_user_iommu_id(struct vfio_group *group); extern long vfio_external_check_extension(struct vfio_group *group, unsigned long arg); +struct pci_dev; #ifdef CONFIG_EEH -extern int vfio_spapr_pci_eeh_open(struct pci_dev *pdev); +extern void vfio_spapr_pci_eeh_open(struct pci_dev *pdev); extern void vfio_spapr_pci_eeh_release(struct pci_dev *pdev); extern long vfio_spapr_iommu_eeh_ioctl(struct iommu_group *group, unsigned int cmd, unsigned long arg); #else -static inline int vfio_spapr_pci_eeh_open(struct pci_dev *pdev) +static inline void vfio_spapr_pci_eeh_open(struct pci_dev *pdev) { - return 0; } static inline void vfio_spapr_pci_eeh_release(struct pci_dev *pdev)