提交 5cec7537 编写于 作者: D David Woodhouse

iommu/vt-d: Implement SVM_FLAG_SUPERVISOR_MODE for kernel access

This is only usable for the static 1:1 mapping of physical memory.

Any access to vmalloc or module regions will require some way of doing
an IOTLB flush. It's theoretically possible to hook into the
tlb_flush_kernel_range() function, but that seems like overkill — most
of the addresses accessed through a kernel PASID *will* be in the 1:1
mapping.

If we really need to allow access to more interesting kernel regions,
then the answer will probably be an explicit IOTLB flush call after use,
akin to the DMA API's unmap function.

In fact, it might be worth introducing that sooner rather than later, and
making it just BUG() if the address isn't in the static 1:1 mapping.
Signed-off-by: NDavid Woodhouse <David.Woodhouse@intel.com>
上级 569e4f77
...@@ -269,11 +269,10 @@ int intel_svm_bind_mm(struct device *dev, int *pasid, int flags, struct svm_dev_ ...@@ -269,11 +269,10 @@ int intel_svm_bind_mm(struct device *dev, int *pasid, int flags, struct svm_dev_
struct intel_iommu *iommu = intel_svm_device_to_iommu(dev); struct intel_iommu *iommu = intel_svm_device_to_iommu(dev);
struct intel_svm_dev *sdev; struct intel_svm_dev *sdev;
struct intel_svm *svm = NULL; struct intel_svm *svm = NULL;
struct mm_struct *mm = NULL;
int pasid_max; int pasid_max;
int ret; int ret;
BUG_ON(pasid && !current->mm);
if (WARN_ON(!iommu)) if (WARN_ON(!iommu))
return -EINVAL; return -EINVAL;
...@@ -284,12 +283,20 @@ int intel_svm_bind_mm(struct device *dev, int *pasid, int flags, struct svm_dev_ ...@@ -284,12 +283,20 @@ int intel_svm_bind_mm(struct device *dev, int *pasid, int flags, struct svm_dev_
} else } else
pasid_max = 1 << 20; pasid_max = 1 << 20;
if ((flags & SVM_FLAG_SUPERVISOR_MODE)) {
if (!ecap_srs(iommu->ecap))
return -EINVAL;
} else if (pasid) {
mm = get_task_mm(current);
BUG_ON(!mm);
}
mutex_lock(&pasid_mutex); mutex_lock(&pasid_mutex);
if (pasid && !(flags & SVM_FLAG_PRIVATE_PASID)) { if (pasid && !(flags & SVM_FLAG_PRIVATE_PASID)) {
int i; int i;
idr_for_each_entry(&iommu->pasid_idr, svm, i) { idr_for_each_entry(&iommu->pasid_idr, svm, i) {
if (svm->mm != current->mm || if (svm->mm != mm ||
(svm->flags & SVM_FLAG_PRIVATE_PASID)) (svm->flags & SVM_FLAG_PRIVATE_PASID))
continue; continue;
...@@ -355,17 +362,22 @@ int intel_svm_bind_mm(struct device *dev, int *pasid, int flags, struct svm_dev_ ...@@ -355,17 +362,22 @@ int intel_svm_bind_mm(struct device *dev, int *pasid, int flags, struct svm_dev_
} }
svm->pasid = ret; svm->pasid = ret;
svm->notifier.ops = &intel_mmuops; svm->notifier.ops = &intel_mmuops;
svm->mm = get_task_mm(current); svm->mm = mm;
svm->flags = flags; svm->flags = flags;
INIT_LIST_HEAD_RCU(&svm->devs); INIT_LIST_HEAD_RCU(&svm->devs);
ret = -ENOMEM; ret = -ENOMEM;
if (!svm->mm || (ret = mmu_notifier_register(&svm->notifier, svm->mm))) { if (mm) {
ret = mmu_notifier_register(&svm->notifier, mm);
if (ret) {
idr_remove(&svm->iommu->pasid_idr, svm->pasid); idr_remove(&svm->iommu->pasid_idr, svm->pasid);
kfree(svm); kfree(svm);
kfree(sdev); kfree(sdev);
goto out; goto out;
} }
iommu->pasid_table[svm->pasid].val = (u64)__pa(svm->mm->pgd) | 1; iommu->pasid_table[svm->pasid].val = (u64)__pa(mm->pgd) | 1;
mm = NULL;
} else
iommu->pasid_table[svm->pasid].val = (u64)__pa(init_mm.pgd) | 1 | (1ULL << 11);
wmb(); wmb();
} }
list_add_rcu(&sdev->list, &svm->devs); list_add_rcu(&sdev->list, &svm->devs);
...@@ -375,6 +387,8 @@ int intel_svm_bind_mm(struct device *dev, int *pasid, int flags, struct svm_dev_ ...@@ -375,6 +387,8 @@ int intel_svm_bind_mm(struct device *dev, int *pasid, int flags, struct svm_dev_
ret = 0; ret = 0;
out: out:
mutex_unlock(&pasid_mutex); mutex_unlock(&pasid_mutex);
if (mm)
mmput(mm);
return ret; return ret;
} }
EXPORT_SYMBOL_GPL(intel_svm_bind_mm); EXPORT_SYMBOL_GPL(intel_svm_bind_mm);
...@@ -416,6 +430,7 @@ int intel_svm_unbind_mm(struct device *dev, int pasid) ...@@ -416,6 +430,7 @@ int intel_svm_unbind_mm(struct device *dev, int pasid)
mmu_notifier_unregister(&svm->notifier, svm->mm); mmu_notifier_unregister(&svm->notifier, svm->mm);
idr_remove(&svm->iommu->pasid_idr, svm->pasid); idr_remove(&svm->iommu->pasid_idr, svm->pasid);
if (svm->mm)
mmput(svm->mm); mmput(svm->mm);
/* We mandate that no page faults may be outstanding /* We mandate that no page faults may be outstanding
* for the PASID when intel_svm_unbind_mm() is called. * for the PASID when intel_svm_unbind_mm() is called.
...@@ -500,6 +515,10 @@ static irqreturn_t prq_event_thread(int irq, void *d) ...@@ -500,6 +515,10 @@ static irqreturn_t prq_event_thread(int irq, void *d)
} }
result = QI_RESP_INVALID; result = QI_RESP_INVALID;
/* Since we're using init_mm.pgd directly, we should never take
* any faults on kernel addresses. */
if (!svm->mm)
goto bad_req;
down_read(&svm->mm->mmap_sem); down_read(&svm->mm->mmap_sem);
vma = find_extend_vma(svm->mm, address); vma = find_extend_vma(svm->mm, address);
if (!vma || address < vma->vm_start) if (!vma || address < vma->vm_start)
......
...@@ -42,6 +42,19 @@ struct svm_dev_ops { ...@@ -42,6 +42,19 @@ struct svm_dev_ops {
*/ */
#define SVM_FLAG_PRIVATE_PASID (1<<0) #define SVM_FLAG_PRIVATE_PASID (1<<0)
/*
* The SVM_FLAG_SUPERVISOR_MODE flag requests a PASID which can be used only
* for access to kernel addresses. No IOTLB flushes are automatically done
* for kernel mappings; it is valid only for access to the kernel's static
* 1:1 mapping of physical memory — not to vmalloc or even module mappings.
* A future API addition may permit the use of such ranges, by means of an
* explicit IOTLB flush call (akin to the DMA API's unmap method).
*
* It is unlikely that we will ever hook into flush_tlb_kernel_range() to
* do such IOTLB flushes automatically.
*/
#define SVM_FLAG_SUPERVISOR_MODE (1<<1)
/** /**
* intel_svm_bind_mm() - Bind the current process to a PASID * intel_svm_bind_mm() - Bind the current process to a PASID
* @dev: Device to be granted acccess * @dev: Device to be granted acccess
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册