提交 da76349c 编写于 作者: J Jacob Pan 提交者: Zheng Zengkai

iommu: handle page response timeout

maillist inclusion
category: feature
bugzilla: 51855
CVE: NA

Reference: https://jpbrucker.net/git/linux/commit/?h=sva/2021-03-01&id=9a1e957fd072d0e993827e428367947d915da382

---------------------------------------------

When IO page faults are reported outside IOMMU subsystem, the page
request handler may fail for various reasons. E.g. a guest received
page requests but did not have a chance to run for a long time. The
irresponsive behavior could hold off limited resources on the pending
device.
There can be hardware or credit based software solutions as suggested
in the PCI ATS Ch-4. To provide a basic safety net this patch
introduces a per device deferrable timer which monitors the longest
pending page fault that requires a response. Proper action such as
sending failure response code could be taken when timer expires but not
included in this patch. We need to consider the life cycle of page
groupd ID to prevent confusion with reused group ID by a device.
For now, a warning message provides clue of such failure.
Signed-off-by: NJacob Pan <jacob.jun.pan@linux.intel.com>
Signed-off-by: NAshok Raj <ashok.raj@intel.com>
Signed-off-by: NLijun Fang <fanglijun3@huawei.com>
Reviewed-by: NWeilong Chen <chenweilong@huawei.com>
Signed-off-by: NZheng Zengkai <zhengzengkai@huawei.com>
上级 75c5a92e
...@@ -1070,6 +1070,39 @@ int iommu_group_unregister_notifier(struct iommu_group *group, ...@@ -1070,6 +1070,39 @@ int iommu_group_unregister_notifier(struct iommu_group *group,
} }
EXPORT_SYMBOL_GPL(iommu_group_unregister_notifier); EXPORT_SYMBOL_GPL(iommu_group_unregister_notifier);
static void iommu_dev_fault_timer_fn(struct timer_list *t)
{
struct iommu_fault_param *fparam = from_timer(fparam, t, timer);
struct iommu_fault_event *evt;
struct iommu_fault_page_request *prm;
u64 now;
now = get_jiffies_64();
/* The goal is to ensure driver or guest page fault handler(via vfio)
* send page response on time. Otherwise, limited queue resources
* may be occupied by some irresponsive guests or drivers.
* When per device pending fault list is not empty, we periodically checks
* if any anticipated page response time has expired.
*
* TODO:
* We could do the following if response time expires:
* 1. send page response code FAILURE to all pending PRQ
* 2. inform device driver or vfio
* 3. drain in-flight page requests and responses for this device
* 4. clear pending fault list such that driver can unregister fault
* handler(otherwise blocked when pending faults are present).
*/
list_for_each_entry(evt, &fparam->faults, list) {
prm = &evt->fault.prm;
if (time_after64(now, evt->expire))
pr_err("Page response time expired!, pasid %d gid %d exp %llu now %llu\n",
prm->pasid, prm->grpid, evt->expire, now);
}
mod_timer(t, now + prq_timeout);
}
/** /**
* iommu_register_device_fault_handler() - Register a device fault handler * iommu_register_device_fault_handler() - Register a device fault handler
* @dev: the device * @dev: the device
...@@ -1117,6 +1150,9 @@ int iommu_register_device_fault_handler(struct device *dev, ...@@ -1117,6 +1150,9 @@ int iommu_register_device_fault_handler(struct device *dev,
mutex_init(&param->fault_param->lock); mutex_init(&param->fault_param->lock);
INIT_LIST_HEAD(&param->fault_param->faults); INIT_LIST_HEAD(&param->fault_param->faults);
if (prq_timeout)
timer_setup(&param->fault_param->timer, iommu_dev_fault_timer_fn,
TIMER_DEFERRABLE);
done_unlock: done_unlock:
mutex_unlock(&param->lock); mutex_unlock(&param->lock);
...@@ -1178,7 +1214,9 @@ int iommu_report_device_fault(struct device *dev, struct iommu_fault_event *evt) ...@@ -1178,7 +1214,9 @@ int iommu_report_device_fault(struct device *dev, struct iommu_fault_event *evt)
struct dev_iommu *param = dev->iommu; struct dev_iommu *param = dev->iommu;
struct iommu_fault_event *evt_pending = NULL; struct iommu_fault_event *evt_pending = NULL;
struct iommu_fault_param *fparam; struct iommu_fault_param *fparam;
struct timer_list *tmr;
int ret = 0; int ret = 0;
u64 exp;
if (!param || !evt) if (!param || !evt)
return -EINVAL; return -EINVAL;
...@@ -1199,7 +1237,17 @@ int iommu_report_device_fault(struct device *dev, struct iommu_fault_event *evt) ...@@ -1199,7 +1237,17 @@ int iommu_report_device_fault(struct device *dev, struct iommu_fault_event *evt)
ret = -ENOMEM; ret = -ENOMEM;
goto done_unlock; goto done_unlock;
} }
/* Keep track of response expiration time */
exp = get_jiffies_64() + prq_timeout;
evt_pending->expire = exp;
mutex_lock(&fparam->lock); mutex_lock(&fparam->lock);
if (list_empty(&fparam->faults)) {
/* First pending event, start timer */
tmr = &fparam->timer;
WARN_ON(timer_pending(tmr));
mod_timer(tmr, exp);
}
list_add_tail(&evt_pending->list, &fparam->faults); list_add_tail(&evt_pending->list, &fparam->faults);
mutex_unlock(&fparam->lock); mutex_unlock(&fparam->lock);
} }
...@@ -1275,6 +1323,13 @@ int iommu_page_response(struct device *dev, ...@@ -1275,6 +1323,13 @@ int iommu_page_response(struct device *dev,
break; break;
} }
/* stop response timer if no more pending request */
if (list_empty(&param->fault_param->faults) &&
timer_pending(&param->fault_param->timer)) {
pr_debug("no pending PRQ, stop timer\n");
del_timer(&param->fault_param->timer);
}
done_unlock: done_unlock:
mutex_unlock(&param->fault_param->lock); mutex_unlock(&param->fault_param->lock);
return ret; return ret;
......
...@@ -346,10 +346,12 @@ struct iommu_device { ...@@ -346,10 +346,12 @@ struct iommu_device {
* *
* @fault: fault descriptor * @fault: fault descriptor
* @list: pending fault event list, used for tracking responses * @list: pending fault event list, used for tracking responses
* @expire: time limit in jiffies will wait for page response
*/ */
struct iommu_fault_event { struct iommu_fault_event {
struct iommu_fault fault; struct iommu_fault fault;
struct list_head list; struct list_head list;
u64 expire;
}; };
/** /**
...@@ -358,11 +360,13 @@ struct iommu_fault_event { ...@@ -358,11 +360,13 @@ struct iommu_fault_event {
* @data: handler private data * @data: handler private data
* @faults: holds the pending faults which needs response * @faults: holds the pending faults which needs response
* @lock: protect pending faults list * @lock: protect pending faults list
* @timer: track page request pending time limit
*/ */
struct iommu_fault_param { struct iommu_fault_param {
iommu_dev_fault_handler_t handler; iommu_dev_fault_handler_t handler;
void *data; void *data;
struct list_head faults; struct list_head faults;
struct timer_list timer;
struct mutex lock; struct mutex lock;
}; };
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册