提交 c2dc4c07 编写于 作者: L Linus Torvalds

Merge tag 'for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mst/vhost

Pull vhost fixes from Michael Tsirkin:
 "Fixes all over the place.

  A new UAPI is borderline: can also be considered a new feature but
  also seems to be the only way we could come up with to fix addressing
  for userspace - and it seems important to switch to it now before
  userspace making assumptions about addressing ability of devices is
  set in stone"

* tag 'for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mst/vhost:
  vdpasim: allow to assign a MAC address
  vdpasim: fix MAC address configuration
  vdpa: handle irq bypass register failure case
  vdpa_sim: Fix DMA mask
  Revert "vhost-vdpa: fix page pinning leakage in error path"
  vdpa/mlx5: Fix error return in map_direct_mr()
  vhost_vdpa: Return -EFAULT if copy_from_user() fails
  vdpa_sim: implement get_iova_range()
  vhost: vdpa: report iova range
  vdpa: introduce config op to get valid iova range
...@@ -239,7 +239,6 @@ static int map_direct_mr(struct mlx5_vdpa_dev *mvdev, struct mlx5_vdpa_direct_mr ...@@ -239,7 +239,6 @@ static int map_direct_mr(struct mlx5_vdpa_dev *mvdev, struct mlx5_vdpa_direct_mr
u64 paend; u64 paend;
struct scatterlist *sg; struct scatterlist *sg;
struct device *dma = mvdev->mdev->device; struct device *dma = mvdev->mdev->device;
int ret;
for (map = vhost_iotlb_itree_first(iotlb, mr->start, mr->end - 1); for (map = vhost_iotlb_itree_first(iotlb, mr->start, mr->end - 1);
map; map = vhost_iotlb_itree_next(map, start, mr->end - 1)) { map; map = vhost_iotlb_itree_next(map, start, mr->end - 1)) {
...@@ -277,8 +276,8 @@ static int map_direct_mr(struct mlx5_vdpa_dev *mvdev, struct mlx5_vdpa_direct_mr ...@@ -277,8 +276,8 @@ static int map_direct_mr(struct mlx5_vdpa_dev *mvdev, struct mlx5_vdpa_direct_mr
done: done:
mr->log_size = log_entity_size; mr->log_size = log_entity_size;
mr->nsg = nsg; mr->nsg = nsg;
ret = dma_map_sg_attrs(dma, mr->sg_head.sgl, mr->nsg, DMA_BIDIRECTIONAL, 0); err = dma_map_sg_attrs(dma, mr->sg_head.sgl, mr->nsg, DMA_BIDIRECTIONAL, 0);
if (!ret) if (!err)
goto err_map; goto err_map;
err = create_direct_mr(mvdev, mr); err = create_direct_mr(mvdev, mr);
......
...@@ -38,6 +38,10 @@ static int batch_mapping = 1; ...@@ -38,6 +38,10 @@ static int batch_mapping = 1;
module_param(batch_mapping, int, 0444); module_param(batch_mapping, int, 0444);
MODULE_PARM_DESC(batch_mapping, "Batched mapping 1 -Enable; 0 - Disable"); MODULE_PARM_DESC(batch_mapping, "Batched mapping 1 -Enable; 0 - Disable");
static char *macaddr;
module_param(macaddr, charp, 0);
MODULE_PARM_DESC(macaddr, "Ethernet MAC address");
struct vdpasim_virtqueue { struct vdpasim_virtqueue {
struct vringh vring; struct vringh vring;
struct vringh_kiov iov; struct vringh_kiov iov;
...@@ -60,7 +64,8 @@ struct vdpasim_virtqueue { ...@@ -60,7 +64,8 @@ struct vdpasim_virtqueue {
static u64 vdpasim_features = (1ULL << VIRTIO_F_ANY_LAYOUT) | static u64 vdpasim_features = (1ULL << VIRTIO_F_ANY_LAYOUT) |
(1ULL << VIRTIO_F_VERSION_1) | (1ULL << VIRTIO_F_VERSION_1) |
(1ULL << VIRTIO_F_ACCESS_PLATFORM); (1ULL << VIRTIO_F_ACCESS_PLATFORM) |
(1ULL << VIRTIO_NET_F_MAC);
/* State of each vdpasim device */ /* State of each vdpasim device */
struct vdpasim { struct vdpasim {
...@@ -361,7 +366,9 @@ static struct vdpasim *vdpasim_create(void) ...@@ -361,7 +366,9 @@ static struct vdpasim *vdpasim_create(void)
spin_lock_init(&vdpasim->iommu_lock); spin_lock_init(&vdpasim->iommu_lock);
dev = &vdpasim->vdpa.dev; dev = &vdpasim->vdpa.dev;
dev->coherent_dma_mask = DMA_BIT_MASK(64); dev->dma_mask = &dev->coherent_dma_mask;
if (dma_set_mask_and_coherent(dev, DMA_BIT_MASK(64)))
goto err_iommu;
set_dma_ops(dev, &vdpasim_dma_ops); set_dma_ops(dev, &vdpasim_dma_ops);
vdpasim->iommu = vhost_iotlb_alloc(2048, 0); vdpasim->iommu = vhost_iotlb_alloc(2048, 0);
...@@ -372,7 +379,15 @@ static struct vdpasim *vdpasim_create(void) ...@@ -372,7 +379,15 @@ static struct vdpasim *vdpasim_create(void)
if (!vdpasim->buffer) if (!vdpasim->buffer)
goto err_iommu; goto err_iommu;
eth_random_addr(vdpasim->config.mac); if (macaddr) {
mac_pton(macaddr, vdpasim->config.mac);
if (!is_valid_ether_addr(vdpasim->config.mac)) {
ret = -EADDRNOTAVAIL;
goto err_iommu;
}
} else {
eth_random_addr(vdpasim->config.mac);
}
vringh_set_iotlb(&vdpasim->vqs[0].vring, vdpasim->iommu); vringh_set_iotlb(&vdpasim->vqs[0].vring, vdpasim->iommu);
vringh_set_iotlb(&vdpasim->vqs[1].vring, vdpasim->iommu); vringh_set_iotlb(&vdpasim->vqs[1].vring, vdpasim->iommu);
...@@ -574,6 +589,16 @@ static u32 vdpasim_get_generation(struct vdpa_device *vdpa) ...@@ -574,6 +589,16 @@ static u32 vdpasim_get_generation(struct vdpa_device *vdpa)
return vdpasim->generation; return vdpasim->generation;
} }
static struct vdpa_iova_range vdpasim_get_iova_range(struct vdpa_device *vdpa)
{
struct vdpa_iova_range range = {
.first = 0ULL,
.last = ULLONG_MAX,
};
return range;
}
static int vdpasim_set_map(struct vdpa_device *vdpa, static int vdpasim_set_map(struct vdpa_device *vdpa,
struct vhost_iotlb *iotlb) struct vhost_iotlb *iotlb)
{ {
...@@ -657,6 +682,7 @@ static const struct vdpa_config_ops vdpasim_net_config_ops = { ...@@ -657,6 +682,7 @@ static const struct vdpa_config_ops vdpasim_net_config_ops = {
.get_config = vdpasim_get_config, .get_config = vdpasim_get_config,
.set_config = vdpasim_set_config, .set_config = vdpasim_set_config,
.get_generation = vdpasim_get_generation, .get_generation = vdpasim_get_generation,
.get_iova_range = vdpasim_get_iova_range,
.dma_map = vdpasim_dma_map, .dma_map = vdpasim_dma_map,
.dma_unmap = vdpasim_dma_unmap, .dma_unmap = vdpasim_dma_unmap,
.free = vdpasim_free, .free = vdpasim_free,
...@@ -683,6 +709,7 @@ static const struct vdpa_config_ops vdpasim_net_batch_config_ops = { ...@@ -683,6 +709,7 @@ static const struct vdpa_config_ops vdpasim_net_batch_config_ops = {
.get_config = vdpasim_get_config, .get_config = vdpasim_get_config,
.set_config = vdpasim_set_config, .set_config = vdpasim_set_config,
.get_generation = vdpasim_get_generation, .get_generation = vdpasim_get_generation,
.get_iova_range = vdpasim_get_iova_range,
.set_map = vdpasim_set_map, .set_map = vdpasim_set_map,
.free = vdpasim_free, .free = vdpasim_free,
}; };
......
...@@ -47,6 +47,7 @@ struct vhost_vdpa { ...@@ -47,6 +47,7 @@ struct vhost_vdpa {
int minor; int minor;
struct eventfd_ctx *config_ctx; struct eventfd_ctx *config_ctx;
int in_batch; int in_batch;
struct vdpa_iova_range range;
}; };
static DEFINE_IDA(vhost_vdpa_ida); static DEFINE_IDA(vhost_vdpa_ida);
...@@ -103,6 +104,9 @@ static void vhost_vdpa_setup_vq_irq(struct vhost_vdpa *v, u16 qid) ...@@ -103,6 +104,9 @@ static void vhost_vdpa_setup_vq_irq(struct vhost_vdpa *v, u16 qid)
vq->call_ctx.producer.token = vq->call_ctx.ctx; vq->call_ctx.producer.token = vq->call_ctx.ctx;
vq->call_ctx.producer.irq = irq; vq->call_ctx.producer.irq = irq;
ret = irq_bypass_register_producer(&vq->call_ctx.producer); ret = irq_bypass_register_producer(&vq->call_ctx.producer);
if (unlikely(ret))
dev_info(&v->dev, "vq %u, irq bypass producer (token %p) registration fails, ret = %d\n",
qid, vq->call_ctx.producer.token, ret);
} }
static void vhost_vdpa_unsetup_vq_irq(struct vhost_vdpa *v, u16 qid) static void vhost_vdpa_unsetup_vq_irq(struct vhost_vdpa *v, u16 qid)
...@@ -337,6 +341,16 @@ static long vhost_vdpa_set_config_call(struct vhost_vdpa *v, u32 __user *argp) ...@@ -337,6 +341,16 @@ static long vhost_vdpa_set_config_call(struct vhost_vdpa *v, u32 __user *argp)
return 0; return 0;
} }
static long vhost_vdpa_get_iova_range(struct vhost_vdpa *v, u32 __user *argp)
{
struct vhost_vdpa_iova_range range = {
.first = v->range.first,
.last = v->range.last,
};
return copy_to_user(argp, &range, sizeof(range));
}
static long vhost_vdpa_vring_ioctl(struct vhost_vdpa *v, unsigned int cmd, static long vhost_vdpa_vring_ioctl(struct vhost_vdpa *v, unsigned int cmd,
void __user *argp) void __user *argp)
{ {
...@@ -421,12 +435,11 @@ static long vhost_vdpa_unlocked_ioctl(struct file *filep, ...@@ -421,12 +435,11 @@ static long vhost_vdpa_unlocked_ioctl(struct file *filep,
void __user *argp = (void __user *)arg; void __user *argp = (void __user *)arg;
u64 __user *featurep = argp; u64 __user *featurep = argp;
u64 features; u64 features;
long r; long r = 0;
if (cmd == VHOST_SET_BACKEND_FEATURES) { if (cmd == VHOST_SET_BACKEND_FEATURES) {
r = copy_from_user(&features, featurep, sizeof(features)); if (copy_from_user(&features, featurep, sizeof(features)))
if (r) return -EFAULT;
return r;
if (features & ~VHOST_VDPA_BACKEND_FEATURES) if (features & ~VHOST_VDPA_BACKEND_FEATURES)
return -EOPNOTSUPP; return -EOPNOTSUPP;
vhost_set_backend_features(&v->vdev, features); vhost_set_backend_features(&v->vdev, features);
...@@ -469,7 +482,11 @@ static long vhost_vdpa_unlocked_ioctl(struct file *filep, ...@@ -469,7 +482,11 @@ static long vhost_vdpa_unlocked_ioctl(struct file *filep,
break; break;
case VHOST_GET_BACKEND_FEATURES: case VHOST_GET_BACKEND_FEATURES:
features = VHOST_VDPA_BACKEND_FEATURES; features = VHOST_VDPA_BACKEND_FEATURES;
r = copy_to_user(featurep, &features, sizeof(features)); if (copy_to_user(featurep, &features, sizeof(features)))
r = -EFAULT;
break;
case VHOST_VDPA_GET_IOVA_RANGE:
r = vhost_vdpa_get_iova_range(v, argp);
break; break;
default: default:
r = vhost_dev_ioctl(&v->vdev, cmd, argp); r = vhost_dev_ioctl(&v->vdev, cmd, argp);
...@@ -588,19 +605,25 @@ static int vhost_vdpa_process_iotlb_update(struct vhost_vdpa *v, ...@@ -588,19 +605,25 @@ static int vhost_vdpa_process_iotlb_update(struct vhost_vdpa *v,
struct vhost_dev *dev = &v->vdev; struct vhost_dev *dev = &v->vdev;
struct vhost_iotlb *iotlb = dev->iotlb; struct vhost_iotlb *iotlb = dev->iotlb;
struct page **page_list; struct page **page_list;
struct vm_area_struct **vmas; unsigned long list_size = PAGE_SIZE / sizeof(struct page *);
unsigned int gup_flags = FOLL_LONGTERM; unsigned int gup_flags = FOLL_LONGTERM;
unsigned long map_pfn, last_pfn = 0; unsigned long npages, cur_base, map_pfn, last_pfn = 0;
unsigned long npages, lock_limit; unsigned long locked, lock_limit, pinned, i;
unsigned long i, nmap = 0;
u64 iova = msg->iova; u64 iova = msg->iova;
long pinned;
int ret = 0; int ret = 0;
if (msg->iova < v->range.first ||
msg->iova + msg->size - 1 > v->range.last)
return -EINVAL;
if (vhost_iotlb_itree_first(iotlb, msg->iova, if (vhost_iotlb_itree_first(iotlb, msg->iova,
msg->iova + msg->size - 1)) msg->iova + msg->size - 1))
return -EEXIST; return -EEXIST;
page_list = (struct page **) __get_free_page(GFP_KERNEL);
if (!page_list)
return -ENOMEM;
if (msg->perm & VHOST_ACCESS_WO) if (msg->perm & VHOST_ACCESS_WO)
gup_flags |= FOLL_WRITE; gup_flags |= FOLL_WRITE;
...@@ -608,86 +631,61 @@ static int vhost_vdpa_process_iotlb_update(struct vhost_vdpa *v, ...@@ -608,86 +631,61 @@ static int vhost_vdpa_process_iotlb_update(struct vhost_vdpa *v,
if (!npages) if (!npages)
return -EINVAL; return -EINVAL;
page_list = kvmalloc_array(npages, sizeof(struct page *), GFP_KERNEL);
vmas = kvmalloc_array(npages, sizeof(struct vm_area_struct *),
GFP_KERNEL);
if (!page_list || !vmas) {
ret = -ENOMEM;
goto free;
}
mmap_read_lock(dev->mm); mmap_read_lock(dev->mm);
locked = atomic64_add_return(npages, &dev->mm->pinned_vm);
lock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT; lock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT;
if (npages + atomic64_read(&dev->mm->pinned_vm) > lock_limit) {
ret = -ENOMEM;
goto unlock;
}
pinned = pin_user_pages(msg->uaddr & PAGE_MASK, npages, gup_flags, if (locked > lock_limit) {
page_list, vmas); ret = -ENOMEM;
if (npages != pinned) { goto out;
if (pinned < 0) {
ret = pinned;
} else {
unpin_user_pages(page_list, pinned);
ret = -ENOMEM;
}
goto unlock;
} }
cur_base = msg->uaddr & PAGE_MASK;
iova &= PAGE_MASK; iova &= PAGE_MASK;
map_pfn = page_to_pfn(page_list[0]);
while (npages) {
/* One more iteration to avoid extra vdpa_map() call out of loop. */ pinned = min_t(unsigned long, npages, list_size);
for (i = 0; i <= npages; i++) { ret = pin_user_pages(cur_base, pinned,
unsigned long this_pfn; gup_flags, page_list, NULL);
u64 csize; if (ret != pinned)
goto out;
/* The last chunk may have no valid PFN next to it */
this_pfn = i < npages ? page_to_pfn(page_list[i]) : -1UL; if (!last_pfn)
map_pfn = page_to_pfn(page_list[0]);
if (last_pfn && (this_pfn == -1UL ||
this_pfn != last_pfn + 1)) { for (i = 0; i < ret; i++) {
/* Pin a contiguous chunk of memory */ unsigned long this_pfn = page_to_pfn(page_list[i]);
csize = last_pfn - map_pfn + 1; u64 csize;
ret = vhost_vdpa_map(v, iova, csize << PAGE_SHIFT,
map_pfn << PAGE_SHIFT, if (last_pfn && (this_pfn != last_pfn + 1)) {
msg->perm); /* Pin a contiguous chunk of memory */
if (ret) { csize = (last_pfn - map_pfn + 1) << PAGE_SHIFT;
/* if (vhost_vdpa_map(v, iova, csize,
* Unpin the rest chunks of memory on the map_pfn << PAGE_SHIFT,
* flight with no corresponding vdpa_map() msg->perm))
* calls having been made yet. On the other goto out;
* hand, vdpa_unmap() in the failure path map_pfn = this_pfn;
* is in charge of accounting the number of iova += csize;
* pinned pages for its own.
* This asymmetrical pattern of accounting
* is for efficiency to pin all pages at
* once, while there is no other callsite
* of vdpa_map() than here above.
*/
unpin_user_pages(&page_list[nmap],
npages - nmap);
goto out;
} }
atomic64_add(csize, &dev->mm->pinned_vm);
nmap += csize; last_pfn = this_pfn;
iova += csize << PAGE_SHIFT;
map_pfn = this_pfn;
} }
last_pfn = this_pfn;
cur_base += ret << PAGE_SHIFT;
npages -= ret;
} }
WARN_ON(nmap != npages); /* Pin the rest chunk */
ret = vhost_vdpa_map(v, iova, (last_pfn - map_pfn + 1) << PAGE_SHIFT,
map_pfn << PAGE_SHIFT, msg->perm);
out: out:
if (ret) if (ret) {
vhost_vdpa_unmap(v, msg->iova, msg->size); vhost_vdpa_unmap(v, msg->iova, msg->size);
unlock: atomic64_sub(npages, &dev->mm->pinned_vm);
}
mmap_read_unlock(dev->mm); mmap_read_unlock(dev->mm);
free: free_page((unsigned long)page_list);
kvfree(vmas);
kvfree(page_list);
return ret; return ret;
} }
...@@ -783,6 +781,27 @@ static void vhost_vdpa_free_domain(struct vhost_vdpa *v) ...@@ -783,6 +781,27 @@ static void vhost_vdpa_free_domain(struct vhost_vdpa *v)
v->domain = NULL; v->domain = NULL;
} }
static void vhost_vdpa_set_iova_range(struct vhost_vdpa *v)
{
struct vdpa_iova_range *range = &v->range;
struct iommu_domain_geometry geo;
struct vdpa_device *vdpa = v->vdpa;
const struct vdpa_config_ops *ops = vdpa->config;
if (ops->get_iova_range) {
*range = ops->get_iova_range(vdpa);
} else if (v->domain &&
!iommu_domain_get_attr(v->domain,
DOMAIN_ATTR_GEOMETRY, &geo) &&
geo.force_aperture) {
range->first = geo.aperture_start;
range->last = geo.aperture_end;
} else {
range->first = 0;
range->last = ULLONG_MAX;
}
}
static int vhost_vdpa_open(struct inode *inode, struct file *filep) static int vhost_vdpa_open(struct inode *inode, struct file *filep)
{ {
struct vhost_vdpa *v; struct vhost_vdpa *v;
...@@ -823,6 +842,8 @@ static int vhost_vdpa_open(struct inode *inode, struct file *filep) ...@@ -823,6 +842,8 @@ static int vhost_vdpa_open(struct inode *inode, struct file *filep)
if (r) if (r)
goto err_init_iotlb; goto err_init_iotlb;
vhost_vdpa_set_iova_range(v);
filep->private_data = v; filep->private_data = v;
return 0; return 0;
......
...@@ -52,6 +52,16 @@ struct vdpa_device { ...@@ -52,6 +52,16 @@ struct vdpa_device {
int nvqs; int nvqs;
}; };
/**
* vDPA IOVA range - the IOVA range support by the device
* @first: start of the IOVA range
* @last: end of the IOVA range
*/
struct vdpa_iova_range {
u64 first;
u64 last;
};
/** /**
* vDPA_config_ops - operations for configuring a vDPA device. * vDPA_config_ops - operations for configuring a vDPA device.
* Note: vDPA device drivers are required to implement all of the * Note: vDPA device drivers are required to implement all of the
...@@ -151,6 +161,10 @@ struct vdpa_device { ...@@ -151,6 +161,10 @@ struct vdpa_device {
* @get_generation: Get device config generation (optional) * @get_generation: Get device config generation (optional)
* @vdev: vdpa device * @vdev: vdpa device
* Returns u32: device generation * Returns u32: device generation
* @get_iova_range: Get supported iova range (optional)
* @vdev: vdpa device
* Returns the iova range supported by
* the device.
* @set_map: Set device memory mapping (optional) * @set_map: Set device memory mapping (optional)
* Needed for device that using device * Needed for device that using device
* specific DMA translation (on-chip IOMMU) * specific DMA translation (on-chip IOMMU)
...@@ -216,6 +230,7 @@ struct vdpa_config_ops { ...@@ -216,6 +230,7 @@ struct vdpa_config_ops {
void (*set_config)(struct vdpa_device *vdev, unsigned int offset, void (*set_config)(struct vdpa_device *vdev, unsigned int offset,
const void *buf, unsigned int len); const void *buf, unsigned int len);
u32 (*get_generation)(struct vdpa_device *vdev); u32 (*get_generation)(struct vdpa_device *vdev);
struct vdpa_iova_range (*get_iova_range)(struct vdpa_device *vdev);
/* DMA ops */ /* DMA ops */
int (*set_map)(struct vdpa_device *vdev, struct vhost_iotlb *iotlb); int (*set_map)(struct vdpa_device *vdev, struct vhost_iotlb *iotlb);
......
...@@ -146,4 +146,8 @@ ...@@ -146,4 +146,8 @@
/* Set event fd for config interrupt*/ /* Set event fd for config interrupt*/
#define VHOST_VDPA_SET_CONFIG_CALL _IOW(VHOST_VIRTIO, 0x77, int) #define VHOST_VDPA_SET_CONFIG_CALL _IOW(VHOST_VIRTIO, 0x77, int)
/* Get the valid iova range */
#define VHOST_VDPA_GET_IOVA_RANGE _IOR(VHOST_VIRTIO, 0x78, \
struct vhost_vdpa_iova_range)
#endif #endif
...@@ -138,6 +138,15 @@ struct vhost_vdpa_config { ...@@ -138,6 +138,15 @@ struct vhost_vdpa_config {
__u8 buf[0]; __u8 buf[0];
}; };
/* vhost vdpa IOVA range
* @first: First address that can be mapped by vhost-vDPA
* @last: Last address that can be mapped by vhost-vDPA
*/
struct vhost_vdpa_iova_range {
__u64 first;
__u64 last;
};
/* Feature bits */ /* Feature bits */
/* Log all write descriptors. Can be changed while device is active. */ /* Log all write descriptors. Can be changed while device is active. */
#define VHOST_F_LOG_ALL 26 #define VHOST_F_LOG_ALL 26
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册