提交 27a9716b 编写于 作者: L Linus Torvalds

Merge tag 'vfio-v3.18-rc1' of git://github.com/awilliam/linux-vfio

Pull VFIO updates from Alex Williamson:
 - Nested IOMMU extension to type1 (Will Deacon)
 - Restore MSIx message before enabling (Gavin Shan)
 - Fix remove path locking (Alex Williamson)

* tag 'vfio-v3.18-rc1' of git://github.com/awilliam/linux-vfio:
  vfio-pci: Fix remove path locking
  drivers/vfio: Export vfio_spapr_iommu_eeh_ioctl() with GPL
  vfio/pci: Restore MSIx message prior to enabling
  PCI: Export MSI message relevant functions
  vfio/iommu_type1: add new VFIO_TYPE1_NESTING_IOMMU IOMMU type
  iommu: introduce domain attribute for nesting IOMMUs
...@@ -302,6 +302,7 @@ void get_cached_msi_msg(unsigned int irq, struct msi_msg *msg) ...@@ -302,6 +302,7 @@ void get_cached_msi_msg(unsigned int irq, struct msi_msg *msg)
__get_cached_msi_msg(entry, msg); __get_cached_msi_msg(entry, msg);
} }
EXPORT_SYMBOL_GPL(get_cached_msi_msg);
void __write_msi_msg(struct msi_desc *entry, struct msi_msg *msg) void __write_msi_msg(struct msi_desc *entry, struct msi_msg *msg)
{ {
...@@ -346,6 +347,7 @@ void write_msi_msg(unsigned int irq, struct msi_msg *msg) ...@@ -346,6 +347,7 @@ void write_msi_msg(unsigned int irq, struct msi_msg *msg)
__write_msi_msg(entry, msg); __write_msi_msg(entry, msg);
} }
EXPORT_SYMBOL_GPL(write_msi_msg);
static void free_msi_irqs(struct pci_dev *dev) static void free_msi_irqs(struct pci_dev *dev)
{ {
......
...@@ -876,15 +876,11 @@ static void vfio_pci_remove(struct pci_dev *pdev) ...@@ -876,15 +876,11 @@ static void vfio_pci_remove(struct pci_dev *pdev)
{ {
struct vfio_pci_device *vdev; struct vfio_pci_device *vdev;
mutex_lock(&driver_lock);
vdev = vfio_del_group_dev(&pdev->dev); vdev = vfio_del_group_dev(&pdev->dev);
if (vdev) { if (vdev) {
iommu_group_put(pdev->dev.iommu_group); iommu_group_put(pdev->dev.iommu_group);
kfree(vdev); kfree(vdev);
} }
mutex_unlock(&driver_lock);
} }
static pci_ers_result_t vfio_pci_aer_err_detected(struct pci_dev *pdev, static pci_ers_result_t vfio_pci_aer_err_detected(struct pci_dev *pdev,
...@@ -927,108 +923,90 @@ static struct pci_driver vfio_pci_driver = { ...@@ -927,108 +923,90 @@ static struct pci_driver vfio_pci_driver = {
.err_handler = &vfio_err_handlers, .err_handler = &vfio_err_handlers,
}; };
/* struct vfio_devices {
* Test whether a reset is necessary and possible. We mark devices as struct vfio_device **devices;
* needs_reset when they are released, but don't have a function-local reset int cur_index;
* available. If any of these exist in the affected devices, we want to do int max_index;
* a bus/slot reset. We also need all of the affected devices to be unused, };
* so we abort if any device has a non-zero refcnt. driver_lock prevents a
* device from being opened during the scan or unbound from vfio-pci.
*/
static int vfio_pci_test_bus_reset(struct pci_dev *pdev, void *data)
{
bool *needs_reset = data;
struct pci_driver *pci_drv = ACCESS_ONCE(pdev->driver);
int ret = -EBUSY;
if (pci_drv == &vfio_pci_driver) {
struct vfio_device *device;
struct vfio_pci_device *vdev;
device = vfio_device_get_from_dev(&pdev->dev);
if (!device)
return ret;
vdev = vfio_device_data(device);
if (vdev) {
if (vdev->needs_reset)
*needs_reset = true;
if (!vdev->refcnt)
ret = 0;
}
vfio_device_put(device);
}
/*
* TODO: vfio-core considers groups to be viable even if some devices
* are attached to known drivers, like pci-stub or pcieport. We can't
* freeze devices from being unbound to those drivers like we can
* here though, so it would be racy to test for them. We also can't
* use device_lock() to prevent changes as that would interfere with
* PCI-core taking device_lock during bus reset. For now, we require
* devices to be bound to vfio-pci to get a bus/slot reset on release.
*/
return ret;
}
/* Clear needs_reset on all affected devices after successful bus/slot reset */ static int vfio_pci_get_devs(struct pci_dev *pdev, void *data)
static int vfio_pci_clear_needs_reset(struct pci_dev *pdev, void *data)
{ {
struct vfio_devices *devs = data;
struct pci_driver *pci_drv = ACCESS_ONCE(pdev->driver); struct pci_driver *pci_drv = ACCESS_ONCE(pdev->driver);
if (pci_drv == &vfio_pci_driver) { if (pci_drv != &vfio_pci_driver)
struct vfio_device *device; return -EBUSY;
struct vfio_pci_device *vdev;
device = vfio_device_get_from_dev(&pdev->dev); if (devs->cur_index == devs->max_index)
if (!device) return -ENOSPC;
return 0;
vdev = vfio_device_data(device); devs->devices[devs->cur_index] = vfio_device_get_from_dev(&pdev->dev);
if (vdev) if (!devs->devices[devs->cur_index])
vdev->needs_reset = false; return -EINVAL;
vfio_device_put(device);
}
devs->cur_index++;
return 0; return 0;
} }
/* /*
* Attempt to do a bus/slot reset if there are devices affected by a reset for * Attempt to do a bus/slot reset if there are devices affected by a reset for
* this device that are needs_reset and all of the affected devices are unused * this device that are needs_reset and all of the affected devices are unused
* (!refcnt). Callers of this function are required to hold driver_lock such * (!refcnt). Callers are required to hold driver_lock when calling this to
* that devices can not be unbound from vfio-pci or opened by a user while we * prevent device opens and concurrent bus reset attempts. We prevent device
* test for and perform a bus/slot reset. * unbinds by acquiring and holding a reference to the vfio_device.
*
* NB: vfio-core considers a group to be viable even if some devices are
* bound to drivers like pci-stub or pcieport. Here we require all devices
* to be bound to vfio_pci since that's the only way we can be sure they
* stay put.
*/ */
static void vfio_pci_try_bus_reset(struct vfio_pci_device *vdev) static void vfio_pci_try_bus_reset(struct vfio_pci_device *vdev)
{ {
struct vfio_devices devs = { .cur_index = 0 };
int i = 0, ret = -EINVAL;
bool needs_reset = false, slot = false; bool needs_reset = false, slot = false;
int ret; struct vfio_pci_device *tmp;
if (!pci_probe_reset_slot(vdev->pdev->slot)) if (!pci_probe_reset_slot(vdev->pdev->slot))
slot = true; slot = true;
else if (pci_probe_reset_bus(vdev->pdev->bus)) else if (pci_probe_reset_bus(vdev->pdev->bus))
return; return;
if (vfio_pci_for_each_slot_or_bus(vdev->pdev, if (vfio_pci_for_each_slot_or_bus(vdev->pdev, vfio_pci_count_devs,
vfio_pci_test_bus_reset, &i, slot) || !i)
&needs_reset, slot) || !needs_reset)
return; return;
if (slot) devs.max_index = i;
ret = pci_try_reset_slot(vdev->pdev->slot); devs.devices = kcalloc(i, sizeof(struct vfio_device *), GFP_KERNEL);
else if (!devs.devices)
ret = pci_try_reset_bus(vdev->pdev->bus);
if (ret)
return; return;
vfio_pci_for_each_slot_or_bus(vdev->pdev, if (vfio_pci_for_each_slot_or_bus(vdev->pdev,
vfio_pci_clear_needs_reset, NULL, slot); vfio_pci_get_devs, &devs, slot))
goto put_devs;
for (i = 0; i < devs.cur_index; i++) {
tmp = vfio_device_data(devs.devices[i]);
if (tmp->needs_reset)
needs_reset = true;
if (tmp->refcnt)
goto put_devs;
}
if (needs_reset)
ret = slot ? pci_try_reset_slot(vdev->pdev->slot) :
pci_try_reset_bus(vdev->pdev->bus);
put_devs:
for (i = 0; i < devs.cur_index; i++) {
if (!ret) {
tmp = vfio_device_data(devs.devices[i]);
tmp->needs_reset = false;
}
vfio_device_put(devs.devices[i]);
}
kfree(devs.devices);
} }
static void __exit vfio_pci_cleanup(void) static void __exit vfio_pci_cleanup(void)
......
...@@ -16,6 +16,7 @@ ...@@ -16,6 +16,7 @@
#include <linux/device.h> #include <linux/device.h>
#include <linux/interrupt.h> #include <linux/interrupt.h>
#include <linux/eventfd.h> #include <linux/eventfd.h>
#include <linux/msi.h>
#include <linux/pci.h> #include <linux/pci.h>
#include <linux/file.h> #include <linux/file.h>
#include <linux/poll.h> #include <linux/poll.h>
...@@ -548,6 +549,20 @@ static int vfio_msi_set_vector_signal(struct vfio_pci_device *vdev, ...@@ -548,6 +549,20 @@ static int vfio_msi_set_vector_signal(struct vfio_pci_device *vdev,
return PTR_ERR(trigger); return PTR_ERR(trigger);
} }
/*
* The MSIx vector table resides in device memory which may be cleared
* via backdoor resets. We don't allow direct access to the vector
* table so even if a userspace driver attempts to save/restore around
* such a reset it would be unsuccessful. To avoid this, restore the
* cached value of the message prior to enabling.
*/
if (msix) {
struct msi_msg msg;
get_cached_msi_msg(irq, &msg);
write_msi_msg(irq, &msg);
}
ret = request_irq(irq, vfio_msihandler, 0, ret = request_irq(irq, vfio_msihandler, 0,
vdev->ctx[vector].name, trigger); vdev->ctx[vector].name, trigger);
if (ret) { if (ret) {
......
...@@ -57,7 +57,8 @@ struct vfio_iommu { ...@@ -57,7 +57,8 @@ struct vfio_iommu {
struct list_head domain_list; struct list_head domain_list;
struct mutex lock; struct mutex lock;
struct rb_root dma_list; struct rb_root dma_list;
bool v2; bool v2;
bool nesting;
}; };
struct vfio_domain { struct vfio_domain {
...@@ -705,6 +706,15 @@ static int vfio_iommu_type1_attach_group(void *iommu_data, ...@@ -705,6 +706,15 @@ static int vfio_iommu_type1_attach_group(void *iommu_data,
goto out_free; goto out_free;
} }
if (iommu->nesting) {
int attr = 1;
ret = iommu_domain_set_attr(domain->domain, DOMAIN_ATTR_NESTING,
&attr);
if (ret)
goto out_domain;
}
ret = iommu_attach_group(domain->domain, iommu_group); ret = iommu_attach_group(domain->domain, iommu_group);
if (ret) if (ret)
goto out_domain; goto out_domain;
...@@ -819,17 +829,26 @@ static void *vfio_iommu_type1_open(unsigned long arg) ...@@ -819,17 +829,26 @@ static void *vfio_iommu_type1_open(unsigned long arg)
{ {
struct vfio_iommu *iommu; struct vfio_iommu *iommu;
if (arg != VFIO_TYPE1_IOMMU && arg != VFIO_TYPE1v2_IOMMU)
return ERR_PTR(-EINVAL);
iommu = kzalloc(sizeof(*iommu), GFP_KERNEL); iommu = kzalloc(sizeof(*iommu), GFP_KERNEL);
if (!iommu) if (!iommu)
return ERR_PTR(-ENOMEM); return ERR_PTR(-ENOMEM);
switch (arg) {
case VFIO_TYPE1_IOMMU:
break;
case VFIO_TYPE1_NESTING_IOMMU:
iommu->nesting = true;
case VFIO_TYPE1v2_IOMMU:
iommu->v2 = true;
break;
default:
kfree(iommu);
return ERR_PTR(-EINVAL);
}
INIT_LIST_HEAD(&iommu->domain_list); INIT_LIST_HEAD(&iommu->domain_list);
iommu->dma_list = RB_ROOT; iommu->dma_list = RB_ROOT;
mutex_init(&iommu->lock); mutex_init(&iommu->lock);
iommu->v2 = (arg == VFIO_TYPE1v2_IOMMU);
return iommu; return iommu;
} }
...@@ -885,6 +904,7 @@ static long vfio_iommu_type1_ioctl(void *iommu_data, ...@@ -885,6 +904,7 @@ static long vfio_iommu_type1_ioctl(void *iommu_data,
switch (arg) { switch (arg) {
case VFIO_TYPE1_IOMMU: case VFIO_TYPE1_IOMMU:
case VFIO_TYPE1v2_IOMMU: case VFIO_TYPE1v2_IOMMU:
case VFIO_TYPE1_NESTING_IOMMU:
return 1; return 1;
case VFIO_DMA_CC_IOMMU: case VFIO_DMA_CC_IOMMU:
if (!iommu) if (!iommu)
......
...@@ -92,7 +92,7 @@ long vfio_spapr_iommu_eeh_ioctl(struct iommu_group *group, ...@@ -92,7 +92,7 @@ long vfio_spapr_iommu_eeh_ioctl(struct iommu_group *group,
return ret; return ret;
} }
EXPORT_SYMBOL(vfio_spapr_iommu_eeh_ioctl); EXPORT_SYMBOL_GPL(vfio_spapr_iommu_eeh_ioctl);
MODULE_VERSION(DRIVER_VERSION); MODULE_VERSION(DRIVER_VERSION);
MODULE_LICENSE("GPL v2"); MODULE_LICENSE("GPL v2");
......
...@@ -80,6 +80,7 @@ enum iommu_attr { ...@@ -80,6 +80,7 @@ enum iommu_attr {
DOMAIN_ATTR_FSL_PAMU_STASH, DOMAIN_ATTR_FSL_PAMU_STASH,
DOMAIN_ATTR_FSL_PAMU_ENABLE, DOMAIN_ATTR_FSL_PAMU_ENABLE,
DOMAIN_ATTR_FSL_PAMUV1, DOMAIN_ATTR_FSL_PAMUV1,
DOMAIN_ATTR_NESTING, /* two stages of translation */
DOMAIN_ATTR_MAX, DOMAIN_ATTR_MAX,
}; };
......
...@@ -33,6 +33,9 @@ ...@@ -33,6 +33,9 @@
/* Check if EEH is supported */ /* Check if EEH is supported */
#define VFIO_EEH 5 #define VFIO_EEH 5
/* Two-stage IOMMU */
#define VFIO_TYPE1_NESTING_IOMMU 6 /* Implies v2 */
/* /*
* The IOCTL interface is designed for extensibility by embedding the * The IOCTL interface is designed for extensibility by embedding the
* structure length (argsz) and flags into structures passed between * structure length (argsz) and flags into structures passed between
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册