diff --git a/drivers/vfio/vfio_iommu_type1.c b/drivers/vfio/vfio_iommu_type1.c index 085e0e9c2104c45ae385097d6deaadd1b6178020..75381c800e3fe7231595a4854317412592ad22d2 100644 --- a/drivers/vfio/vfio_iommu_type1.c +++ b/drivers/vfio/vfio_iommu_type1.c @@ -42,6 +42,7 @@ #include #include #include +#include #define DRIVER_VERSION "0.2" #define DRIVER_AUTHOR "Alex Williamson " @@ -411,7 +412,7 @@ static int vaddr_get_pfn(struct mm_struct *mm, unsigned long vaddr, */ static long vfio_pin_pages_remote(struct vfio_dma *dma, unsigned long vaddr, long npage, unsigned long *pfn_base, - unsigned long limit) + unsigned long limit, struct mm_struct *mm) { unsigned long pfn = 0; long ret, pinned = 0, lock_acct = 0; @@ -419,10 +420,10 @@ static long vfio_pin_pages_remote(struct vfio_dma *dma, unsigned long vaddr, dma_addr_t iova = vaddr - dma->vaddr + dma->iova; /* This code path is only user initiated */ - if (!current->mm) + if (!mm) return -ENODEV; - ret = vaddr_get_pfn(current->mm, vaddr, dma->prot, pfn_base); + ret = vaddr_get_pfn(mm, vaddr, dma->prot, pfn_base); if (ret) return ret; @@ -434,7 +435,7 @@ static long vfio_pin_pages_remote(struct vfio_dma *dma, unsigned long vaddr, * pages are already counted against the user. */ if (!rsvd && !vfio_find_vpfn(dma, iova)) { - if (!dma->lock_cap && current->mm->locked_vm + 1 > limit) { + if (!dma->lock_cap && mm->locked_vm + 1 > limit) { put_pfn(*pfn_base, dma->prot); pr_warn("%s: RLIMIT_MEMLOCK (%ld) exceeded\n", __func__, limit << PAGE_SHIFT); @@ -449,7 +450,7 @@ static long vfio_pin_pages_remote(struct vfio_dma *dma, unsigned long vaddr, /* Lock all the consecutive pages from pfn_base */ for (vaddr += PAGE_SIZE, iova += PAGE_SIZE; pinned < npage; pinned++, vaddr += PAGE_SIZE, iova += PAGE_SIZE) { - ret = vaddr_get_pfn(current->mm, vaddr, dma->prot, &pfn); + ret = vaddr_get_pfn(mm, vaddr, dma->prot, &pfn); if (ret) break; @@ -461,7 +462,7 @@ static long vfio_pin_pages_remote(struct vfio_dma *dma, unsigned long vaddr, if (!rsvd && !vfio_find_vpfn(dma, iova)) { if (!dma->lock_cap && - current->mm->locked_vm + lock_acct + 1 > limit) { + mm->locked_vm + lock_acct + 1 > limit) { put_pfn(pfn, dma->prot); pr_warn("%s: RLIMIT_MEMLOCK (%ld) exceeded\n", __func__, limit << PAGE_SHIFT); @@ -768,15 +769,15 @@ static size_t unmap_unpin_slow(struct vfio_domain *domain, } static long vfio_unmap_unpin(struct vfio_iommu *iommu, struct vfio_dma *dma, + dma_addr_t iova, dma_addr_t end, bool do_accounting) { - dma_addr_t iova = dma->iova, end = dma->iova + dma->size; struct vfio_domain *domain, *d; LIST_HEAD(unmapped_region_list); int unmapped_region_cnt = 0; long unlocked = 0; - if (!dma->size) + if (iova == end) return 0; if (!IS_IOMMU_CAP_DOMAIN_IN_CONTAINER(iommu)) @@ -793,7 +794,7 @@ static long vfio_unmap_unpin(struct vfio_iommu *iommu, struct vfio_dma *dma, struct vfio_domain, next); list_for_each_entry_continue(d, &iommu->domain_list, next) { - iommu_unmap(d->domain, dma->iova, dma->size); + iommu_unmap(d->domain, iova, end - iova); cond_resched(); } @@ -834,8 +835,6 @@ static long vfio_unmap_unpin(struct vfio_iommu *iommu, struct vfio_dma *dma, } } - dma->iommu_mapped = false; - if (unmapped_region_cnt) unlocked += vfio_sync_unpin(dma, domain, &unmapped_region_list); @@ -846,15 +845,22 @@ static long vfio_unmap_unpin(struct vfio_iommu *iommu, struct vfio_dma *dma, return unlocked; } -static void vfio_remove_dma(struct vfio_iommu *iommu, struct vfio_dma *dma) +static void vfio_remove_dma_finish(struct vfio_iommu *iommu, + struct vfio_dma *dma) { - vfio_unmap_unpin(iommu, dma, true); + dma->iommu_mapped = false; vfio_unlink_dma(iommu, dma); put_task_struct(dma->task); kfree(dma); iommu->dma_avail++; } +static void vfio_remove_dma(struct vfio_iommu *iommu, struct vfio_dma *dma) +{ + vfio_unmap_unpin(iommu, dma, dma->iova, dma->iova + dma->size, true); + vfio_remove_dma_finish(iommu, dma); +} + static unsigned long vfio_pgsize_bitmap(struct vfio_iommu *iommu) { struct vfio_domain *domain; @@ -1048,20 +1054,29 @@ static int vfio_iommu_map(struct vfio_iommu *iommu, dma_addr_t iova, return ret; } -static int vfio_pin_map_dma(struct vfio_iommu *iommu, struct vfio_dma *dma, - size_t map_size) +struct vfio_pin_args { + struct vfio_iommu *iommu; + struct vfio_dma *dma; + unsigned long limit; + struct mm_struct *mm; +}; + +static int vfio_pin_map_dma_chunk(unsigned long start_vaddr, + unsigned long end_vaddr, + struct vfio_pin_args *args) { - dma_addr_t iova = dma->iova; - unsigned long vaddr = dma->vaddr; - size_t size = map_size; + struct vfio_dma *dma = args->dma; + dma_addr_t iova = dma->iova + (start_vaddr - dma->vaddr); + unsigned long unmapped_size = end_vaddr - start_vaddr; + unsigned long pfn, mapped_size = 0; long npage; - unsigned long pfn, limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT; int ret = 0; - while (size) { + while (unmapped_size) { /* Pin a contiguous chunk of memory */ - npage = vfio_pin_pages_remote(dma, vaddr + dma->size, - size >> PAGE_SHIFT, &pfn, limit); + npage = vfio_pin_pages_remote(dma, start_vaddr + mapped_size, + unmapped_size >> PAGE_SHIFT, + &pfn, args->limit, args->mm); if (npage <= 0) { WARN_ON(!npage); ret = (int)npage; @@ -1069,22 +1084,50 @@ static int vfio_pin_map_dma(struct vfio_iommu *iommu, struct vfio_dma *dma, } /* Map it! */ - ret = vfio_iommu_map(iommu, iova + dma->size, pfn, npage, - dma->prot); + ret = vfio_iommu_map(args->iommu, iova + mapped_size, pfn, + npage, dma->prot); if (ret) { - vfio_unpin_pages_remote(dma, iova + dma->size, pfn, + vfio_unpin_pages_remote(dma, iova + mapped_size, pfn, npage, true); break; } - size -= npage << PAGE_SHIFT; - dma->size += npage << PAGE_SHIFT; + unmapped_size -= npage << PAGE_SHIFT; + mapped_size += npage << PAGE_SHIFT; } + return (ret == 0) ? KTASK_RETURN_SUCCESS : ret; +} + +static void vfio_pin_map_dma_undo(unsigned long start_vaddr, + unsigned long end_vaddr, + struct vfio_pin_args *args) +{ + struct vfio_dma *dma = args->dma; + dma_addr_t iova = dma->iova + (start_vaddr - dma->vaddr); + dma_addr_t end = dma->iova + (end_vaddr - dma->vaddr); + + vfio_unmap_unpin(args->iommu, args->dma, iova, end, true); +} + +static int vfio_pin_map_dma(struct vfio_iommu *iommu, struct vfio_dma *dma, + size_t map_size) +{ + unsigned long limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT; + int ret = 0; + struct vfio_pin_args args = { iommu, dma, limit, current->mm }; + /* Stay on PMD boundary in case THP is being used. */ + DEFINE_KTASK_CTL(ctl, vfio_pin_map_dma_chunk, &args, PMD_SIZE); + + ktask_ctl_set_undo_func(&ctl, vfio_pin_map_dma_undo); + ret = ktask_run((void *)dma->vaddr, map_size, &ctl); + dma->iommu_mapped = true; if (ret) - vfio_remove_dma(iommu, dma); + vfio_remove_dma_finish(iommu, dma); + else + dma->size += map_size; return ret; } @@ -1252,7 +1295,8 @@ static int vfio_iommu_replay(struct vfio_iommu *iommu, npage = vfio_pin_pages_remote(dma, vaddr, n >> PAGE_SHIFT, - &pfn, limit); + &pfn, limit, + current->mm); if (npage <= 0) { WARN_ON(!npage); ret = (int)npage; @@ -1708,7 +1752,9 @@ static void vfio_iommu_unmap_unpin_reaccount(struct vfio_iommu *iommu) long locked = 0, unlocked = 0; dma = rb_entry(n, struct vfio_dma, node); - unlocked += vfio_unmap_unpin(iommu, dma, false); + unlocked += vfio_unmap_unpin(iommu, dma, dma->iova, + dma->iova + dma->size, false); + dma->iommu_mapped = false; p = rb_first(&dma->pfn_list); for (; p; p = rb_next(p)) { struct vfio_pfn *vpfn = rb_entry(p, struct vfio_pfn,