diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c index fdaf1964eca1fc001933feae05361fab26daf39f..e3f5a2337517774220499c0ca51fe2de6b9de00f 100644 --- a/drivers/iommu/iommu.c +++ b/drivers/iommu/iommu.c @@ -1070,6 +1070,39 @@ int iommu_group_unregister_notifier(struct iommu_group *group, } EXPORT_SYMBOL_GPL(iommu_group_unregister_notifier); +static void iommu_dev_fault_timer_fn(struct timer_list *t) +{ + struct iommu_fault_param *fparam = from_timer(fparam, t, timer); + struct iommu_fault_event *evt; + struct iommu_fault_page_request *prm; + + u64 now; + + now = get_jiffies_64(); + + /* The goal is to ensure driver or guest page fault handler(via vfio) + * send page response on time. Otherwise, limited queue resources + * may be occupied by some irresponsive guests or drivers. + * When per device pending fault list is not empty, we periodically checks + * if any anticipated page response time has expired. + * + * TODO: + * We could do the following if response time expires: + * 1. send page response code FAILURE to all pending PRQ + * 2. inform device driver or vfio + * 3. drain in-flight page requests and responses for this device + * 4. clear pending fault list such that driver can unregister fault + * handler(otherwise blocked when pending faults are present). + */ + list_for_each_entry(evt, &fparam->faults, list) { + prm = &evt->fault.prm; + if (time_after64(now, evt->expire)) + pr_err("Page response time expired!, pasid %d gid %d exp %llu now %llu\n", + prm->pasid, prm->grpid, evt->expire, now); + } + mod_timer(t, now + prq_timeout); +} + /** * iommu_register_device_fault_handler() - Register a device fault handler * @dev: the device @@ -1117,6 +1150,9 @@ int iommu_register_device_fault_handler(struct device *dev, mutex_init(¶m->fault_param->lock); INIT_LIST_HEAD(¶m->fault_param->faults); + if (prq_timeout) + timer_setup(¶m->fault_param->timer, iommu_dev_fault_timer_fn, + TIMER_DEFERRABLE); done_unlock: mutex_unlock(¶m->lock); @@ -1178,7 +1214,9 @@ int iommu_report_device_fault(struct device *dev, struct iommu_fault_event *evt) struct dev_iommu *param = dev->iommu; struct iommu_fault_event *evt_pending = NULL; struct iommu_fault_param *fparam; + struct timer_list *tmr; int ret = 0; + u64 exp; if (!param || !evt) return -EINVAL; @@ -1199,7 +1237,17 @@ int iommu_report_device_fault(struct device *dev, struct iommu_fault_event *evt) ret = -ENOMEM; goto done_unlock; } + /* Keep track of response expiration time */ + exp = get_jiffies_64() + prq_timeout; + evt_pending->expire = exp; mutex_lock(&fparam->lock); + if (list_empty(&fparam->faults)) { + /* First pending event, start timer */ + tmr = &fparam->timer; + WARN_ON(timer_pending(tmr)); + mod_timer(tmr, exp); + } + list_add_tail(&evt_pending->list, &fparam->faults); mutex_unlock(&fparam->lock); } @@ -1275,6 +1323,13 @@ int iommu_page_response(struct device *dev, break; } + /* stop response timer if no more pending request */ + if (list_empty(¶m->fault_param->faults) && + timer_pending(¶m->fault_param->timer)) { + pr_debug("no pending PRQ, stop timer\n"); + del_timer(¶m->fault_param->timer); + } + done_unlock: mutex_unlock(¶m->fault_param->lock); return ret; diff --git a/include/linux/iommu.h b/include/linux/iommu.h index 0b4897cceedcec198710e51934f269298ddd12b1..aa453c9dd032d67e7a06e89307806ab719cd12e8 100644 --- a/include/linux/iommu.h +++ b/include/linux/iommu.h @@ -346,10 +346,12 @@ struct iommu_device { * * @fault: fault descriptor * @list: pending fault event list, used for tracking responses + * @expire: time limit in jiffies will wait for page response */ struct iommu_fault_event { struct iommu_fault fault; struct list_head list; + u64 expire; }; /** @@ -358,11 +360,13 @@ struct iommu_fault_event { * @data: handler private data * @faults: holds the pending faults which needs response * @lock: protect pending faults list + * @timer: track page request pending time limit */ struct iommu_fault_param { iommu_dev_fault_handler_t handler; void *data; struct list_head faults; + struct timer_list timer; struct mutex lock; };