gntdev.c 27.3 KB
Newer Older
1 2 3 4 5 6 7 8
/******************************************************************************
 * gntdev.c
 *
 * Device for accessing (in user-space) pages that have been granted by other
 * domains.
 *
 * Copyright (c) 2006-2007, D G Murray.
 *           (c) 2009 Gerd Hoffmann <kraxel@redhat.com>
9
 *           (c) 2018 Oleksandr Andrushchenko, EPAM Systems Inc.
10 11 12 13 14 15 16 17 18 19 20 21 22
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, write to the Free Software
 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
 */

#undef DEBUG

J
Joe Perches 已提交
23 24
#define pr_fmt(fmt) "xen:" KBUILD_MODNAME ": " fmt

25
#include <linux/dma-mapping.h>
26 27 28 29 30 31 32
#include <linux/module.h>
#include <linux/kernel.h>
#include <linux/init.h>
#include <linux/miscdevice.h>
#include <linux/fs.h>
#include <linux/uaccess.h>
#include <linux/sched.h>
33
#include <linux/sched/mm.h>
34 35
#include <linux/spinlock.h>
#include <linux/slab.h>
36
#include <linux/highmem.h>
37
#include <linux/refcount.h>
38 39 40

#include <xen/xen.h>
#include <xen/grant_table.h>
41
#include <xen/balloon.h>
42
#include <xen/gntdev.h>
43
#include <xen/events.h>
44
#include <xen/page.h>
45 46 47
#include <asm/xen/hypervisor.h>
#include <asm/xen/hypercall.h>

48
#include "gntdev-common.h"
49 50 51
#ifdef CONFIG_XEN_GNTDEV_DMABUF
#include "gntdev-dmabuf.h"
#endif
52

53 54 55 56 57
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Derek G. Murray <Derek.Murray@cl.cam.ac.uk>, "
	      "Gerd Hoffmann <kraxel@redhat.com>");
MODULE_DESCRIPTION("User-space granted page access driver");

58 59 60 61
static unsigned int limit = 64*1024;
module_param(limit, uint, 0644);
MODULE_PARM_DESC(limit,
	"Maximum number of grants that may be mapped by one mapping request");
62

63 64
static int use_ptemod;

65 66
static int unmap_grant_pages(struct gntdev_grant_map *map,
			     int offset, int pages);
67

68 69
static struct miscdevice gntdev_miscdev;

70 71
/* ------------------------------------------------------------------ */

72
bool gntdev_test_page_count(unsigned int count)
73
{
74
	return !count || count > limit;
75 76
}

77 78 79 80
static void gntdev_print_maps(struct gntdev_priv *priv,
			      char *text, int text_index)
{
#ifdef DEBUG
81
	struct gntdev_grant_map *map;
82

83
	pr_debug("%s: maps list (priv %p)\n", __func__, priv);
84 85 86 87 88 89 90
	list_for_each_entry(map, &priv->maps, next)
		pr_debug("  index %2d, count %2d %s\n",
		       map->index, map->count,
		       map->index == text_index && text ? text : "");
#endif
}

91
static void gntdev_free_map(struct gntdev_grant_map *map)
92 93 94 95
{
	if (map == NULL)
		return;

96 97 98 99 100 101 102 103 104 105 106 107 108 109 110
#ifdef CONFIG_XEN_GRANT_DMA_ALLOC
	if (map->dma_vaddr) {
		struct gnttab_dma_alloc_args args;

		args.dev = map->dma_dev;
		args.coherent = !!(map->dma_flags & GNTDEV_DMA_FLAG_COHERENT);
		args.nr_pages = map->count;
		args.pages = map->pages;
		args.frames = map->frames;
		args.vaddr = map->dma_vaddr;
		args.dev_bus_addr = map->dma_bus_addr;

		gnttab_dma_free_pages(&args);
	} else
#endif
111
	if (map->pages)
112
		gnttab_free_pages(map->count, map->pages);
113 114 115 116

#ifdef CONFIG_XEN_GRANT_DMA_ALLOC
	kfree(map->frames);
#endif
117 118 119 120 121
	kfree(map->pages);
	kfree(map->grants);
	kfree(map->map_ops);
	kfree(map->unmap_ops);
	kfree(map->kmap_ops);
122
	kfree(map->kunmap_ops);
123 124 125
	kfree(map);
}

126
struct gntdev_grant_map *gntdev_alloc_map(struct gntdev_priv *priv, int count,
127
					  int dma_flags)
128
{
129
	struct gntdev_grant_map *add;
130
	int i;
131

132
	add = kzalloc(sizeof(*add), GFP_KERNEL);
133 134 135
	if (NULL == add)
		return NULL;

136 137 138 139
	add->grants    = kcalloc(count, sizeof(add->grants[0]), GFP_KERNEL);
	add->map_ops   = kcalloc(count, sizeof(add->map_ops[0]), GFP_KERNEL);
	add->unmap_ops = kcalloc(count, sizeof(add->unmap_ops[0]), GFP_KERNEL);
	add->kmap_ops  = kcalloc(count, sizeof(add->kmap_ops[0]), GFP_KERNEL);
140
	add->kunmap_ops = kcalloc(count, sizeof(add->kunmap_ops[0]), GFP_KERNEL);
141
	add->pages     = kcalloc(count, sizeof(add->pages[0]), GFP_KERNEL);
142 143 144
	if (NULL == add->grants    ||
	    NULL == add->map_ops   ||
	    NULL == add->unmap_ops ||
145
	    NULL == add->kmap_ops  ||
146
	    NULL == add->kunmap_ops ||
147
	    NULL == add->pages)
148 149
		goto err;

150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180
#ifdef CONFIG_XEN_GRANT_DMA_ALLOC
	add->dma_flags = dma_flags;

	/*
	 * Check if this mapping is requested to be backed
	 * by a DMA buffer.
	 */
	if (dma_flags & (GNTDEV_DMA_FLAG_WC | GNTDEV_DMA_FLAG_COHERENT)) {
		struct gnttab_dma_alloc_args args;

		add->frames = kcalloc(count, sizeof(add->frames[0]),
				      GFP_KERNEL);
		if (!add->frames)
			goto err;

		/* Remember the device, so we can free DMA memory. */
		add->dma_dev = priv->dma_dev;

		args.dev = priv->dma_dev;
		args.coherent = !!(dma_flags & GNTDEV_DMA_FLAG_COHERENT);
		args.nr_pages = count;
		args.pages = add->pages;
		args.frames = add->frames;

		if (gnttab_dma_alloc_pages(&args))
			goto err;

		add->dma_vaddr = args.vaddr;
		add->dma_bus_addr = args.dev_bus_addr;
	} else
#endif
181
	if (gnttab_alloc_pages(count, add->pages))
182 183
		goto err;

184
	for (i = 0; i < count; i++) {
185 186
		add->map_ops[i].handle = -1;
		add->unmap_ops[i].handle = -1;
187
		add->kmap_ops[i].handle = -1;
188
		add->kunmap_ops[i].handle = -1;
189 190
	}

191 192
	add->index = 0;
	add->count = count;
193
	refcount_set(&add->users, 1);
194 195 196 197

	return add;

err:
198
	gntdev_free_map(add);
199 200 201
	return NULL;
}

202
void gntdev_add_map(struct gntdev_priv *priv, struct gntdev_grant_map *add)
203
{
204
	struct gntdev_grant_map *map;
205 206 207 208 209 210 211 212 213 214 215 216 217 218

	list_for_each_entry(map, &priv->maps, next) {
		if (add->index + add->count < map->index) {
			list_add_tail(&add->next, &map->next);
			goto done;
		}
		add->index = map->index + map->count;
	}
	list_add_tail(&add->next, &priv->maps);

done:
	gntdev_print_maps(priv, "[new]", add->index);
}

219 220
static struct gntdev_grant_map *gntdev_find_map_index(struct gntdev_priv *priv,
						      int index, int count)
221
{
222
	struct gntdev_grant_map *map;
223 224 225 226

	list_for_each_entry(map, &priv->maps, next) {
		if (map->index != index)
			continue;
227
		if (count && map->count != count)
228 229 230 231 232 233
			continue;
		return map;
	}
	return NULL;
}

234
void gntdev_put_map(struct gntdev_priv *priv, struct gntdev_grant_map *map)
235 236 237
{
	if (!map)
		return;
238

239
	if (!refcount_dec_and_test(&map->users))
240 241
		return;

242
	if (map->notify.flags & UNMAP_NOTIFY_SEND_EVENT) {
243
		notify_remote_via_evtchn(map->notify.event);
244 245
		evtchn_put(map->notify.event);
	}
246

247 248 249
	if (map->pages && !use_ptemod)
		unmap_grant_pages(map, 0, map->count);
	gntdev_free_map(map);
250 251 252 253
}

/* ------------------------------------------------------------------ */

254
static int find_grant_ptes(pte_t *pte, unsigned long addr, void *data)
255
{
256
	struct gntdev_grant_map *map = data;
257
	unsigned int pgnr = (addr - map->vma->vm_start) >> PAGE_SHIFT;
258
	int flags = map->flags | GNTMAP_application_map | GNTMAP_contains_pte;
259 260 261
	u64 pte_maddr;

	BUG_ON(pgnr >= map->count);
262 263
	pte_maddr = arbitrary_virt_to_machine(pte).maddr;

264 265 266 267 268 269 270 271
	/*
	 * Set the PTE as special to force get_user_pages_fast() fall
	 * back to the slow path.  If this is not supported as part of
	 * the grant map, it will be done afterwards.
	 */
	if (xen_feature(XENFEAT_gnttab_map_avail_bits))
		flags |= (1 << _GNTMAP_guest_avail0);

272
	gnttab_set_map_op(&map->map_ops[pgnr], pte_maddr, flags,
273 274
			  map->grants[pgnr].ref,
			  map->grants[pgnr].domid);
275
	gnttab_set_unmap_op(&map->unmap_ops[pgnr], pte_maddr, flags,
276
			    -1 /* handle */);
277 278 279
	return 0;
}

280
#ifdef CONFIG_X86
281
static int set_grant_ptes_as_special(pte_t *pte, unsigned long addr, void *data)
282 283 284 285 286 287
{
	set_pte_at(current->mm, addr, pte, pte_mkspecial(*pte));
	return 0;
}
#endif

288
int gntdev_map_grant_pages(struct gntdev_grant_map *map)
289 290
{
	int i, err = 0;
291 292

	if (!use_ptemod) {
293
		/* Note: it could already be mapped */
294
		if (map->map_ops[0].handle != -1)
295
			return 0;
296
		for (i = 0; i < map->count; i++) {
I
Ian Campbell 已提交
297
			unsigned long addr = (unsigned long)
298 299 300 301 302
				pfn_to_kaddr(page_to_pfn(map->pages[i]));
			gnttab_set_map_op(&map->map_ops[i], addr, map->flags,
				map->grants[i].ref,
				map->grants[i].domid);
			gnttab_set_unmap_op(&map->unmap_ops[i], addr,
303
				map->flags, -1 /* handle */);
304
		}
305 306 307 308 309 310 311 312 313 314 315 316
	} else {
		/*
		 * Setup the map_ops corresponding to the pte entries pointing
		 * to the kernel linear addresses of the struct pages.
		 * These ptes are completely different from the user ptes dealt
		 * with find_grant_ptes.
		 */
		for (i = 0; i < map->count; i++) {
			unsigned long address = (unsigned long)
				pfn_to_kaddr(page_to_pfn(map->pages[i]));
			BUG_ON(PageHighMem(map->pages[i]));

317 318
			gnttab_set_map_op(&map->kmap_ops[i], address,
				map->flags | GNTMAP_host_map,
319 320
				map->grants[i].ref,
				map->grants[i].domid);
321 322
			gnttab_set_unmap_op(&map->kunmap_ops[i], address,
				map->flags | GNTMAP_host_map, -1);
323
		}
324
	}
325 326

	pr_debug("map %d+%d\n", map->index, map->count);
327 328
	err = gnttab_map_refs(map->map_ops, use_ptemod ? map->kmap_ops : NULL,
			map->pages, map->count);
329 330 331 332
	if (err)
		return err;

	for (i = 0; i < map->count; i++) {
333
		if (map->map_ops[i].status) {
334
			err = -EINVAL;
335
			continue;
336
		}
337 338 339 340

		map->unmap_ops[i].handle = map->map_ops[i].handle;
		if (use_ptemod)
			map->kunmap_ops[i].handle = map->kmap_ops[i].handle;
341 342 343 344 345 346 347 348
#ifdef CONFIG_XEN_GRANT_DMA_ALLOC
		else if (map->dma_vaddr) {
			unsigned long bfn;

			bfn = pfn_to_bfn(page_to_pfn(map->pages[i]));
			map->unmap_ops[i].dev_bus_addr = __pfn_to_phys(bfn);
		}
#endif
349 350 351 352
	}
	return err;
}

353 354
static int __unmap_grant_pages(struct gntdev_grant_map *map, int offset,
			       int pages)
355 356
{
	int i, err = 0;
357
	struct gntab_unmap_queue_data unmap_data;
358

359 360
	if (map->notify.flags & UNMAP_NOTIFY_CLEAR_BYTE) {
		int pgno = (map->notify.addr >> PAGE_SHIFT);
361 362 363
		if (pgno >= offset && pgno < offset + pages) {
			/* No need for kmap, pages are in lowmem */
			uint8_t *tmp = pfn_to_kaddr(page_to_pfn(map->pages[pgno]));
364 365 366 367 368
			tmp[map->notify.addr & (PAGE_SIZE-1)] = 0;
			map->notify.flags &= ~UNMAP_NOTIFY_CLEAR_BYTE;
		}
	}

369 370 371 372 373
	unmap_data.unmap_ops = map->unmap_ops + offset;
	unmap_data.kunmap_ops = use_ptemod ? map->kunmap_ops + offset : NULL;
	unmap_data.pages = map->pages + offset;
	unmap_data.count = pages;

374 375 376
	err = gnttab_unmap_refs_sync(&unmap_data);
	if (err)
		return err;
377 378 379 380

	for (i = 0; i < pages; i++) {
		if (map->unmap_ops[offset+i].status)
			err = -EINVAL;
381 382 383 384
		pr_debug("unmap handle=%d st=%d\n",
			map->unmap_ops[offset+i].handle,
			map->unmap_ops[offset+i].status);
		map->unmap_ops[offset+i].handle = -1;
385 386 387 388
	}
	return err;
}

389 390
static int unmap_grant_pages(struct gntdev_grant_map *map, int offset,
			     int pages)
391 392 393 394 395 396 397 398 399
{
	int range, err = 0;

	pr_debug("unmap %d+%d [%d+%d]\n", map->index, map->count, offset, pages);

	/* It is possible the requested range will have a "hole" where we
	 * already unmapped some of the grants. Only unmap valid ranges.
	 */
	while (pages && !err) {
400
		while (pages && map->unmap_ops[offset].handle == -1) {
401 402 403 404 405
			offset++;
			pages--;
		}
		range = 0;
		while (range < pages) {
406
			if (map->unmap_ops[offset+range].handle == -1)
407 408 409 410 411 412 413 414 415 416 417
				break;
			range++;
		}
		err = __unmap_grant_pages(map, offset, range);
		offset += range;
		pages -= range;
	}

	return err;
}

418 419
/* ------------------------------------------------------------------ */

420 421
static void gntdev_vma_open(struct vm_area_struct *vma)
{
422
	struct gntdev_grant_map *map = vma->vm_private_data;
423 424

	pr_debug("gntdev_vma_open %p\n", vma);
425
	refcount_inc(&map->users);
426 427
}

428 429
static void gntdev_vma_close(struct vm_area_struct *vma)
{
430
	struct gntdev_grant_map *map = vma->vm_private_data;
431 432
	struct file *file = vma->vm_file;
	struct gntdev_priv *priv = file->private_data;
433

434
	pr_debug("gntdev_vma_close %p\n", vma);
435
	if (use_ptemod) {
436 437
		WARN_ON(map->vma != vma);
		mmu_interval_notifier_remove(&map->notifier);
438 439
		map->vma = NULL;
	}
440
	vma->vm_private_data = NULL;
441
	gntdev_put_map(priv, map);
442 443
}

444 445 446
static struct page *gntdev_vma_find_special_page(struct vm_area_struct *vma,
						 unsigned long addr)
{
447
	struct gntdev_grant_map *map = vma->vm_private_data;
448 449 450 451

	return map->pages[(addr - map->pages_vm_start) >> PAGE_SHIFT];
}

452
static const struct vm_operations_struct gntdev_vmops = {
453
	.open = gntdev_vma_open,
454
	.close = gntdev_vma_close,
455
	.find_special_page = gntdev_vma_find_special_page,
456 457 458 459
};

/* ------------------------------------------------------------------ */

460 461 462
static bool gntdev_invalidate(struct mmu_interval_notifier *mn,
			      const struct mmu_notifier_range *range,
			      unsigned long cur_seq)
463
{
464 465
	struct gntdev_grant_map *map =
		container_of(mn, struct gntdev_grant_map, notifier);
466 467 468
	unsigned long mstart, mend;
	int err;

469 470
	if (!mmu_notifier_range_blockable(range))
		return false;
471

472 473 474 475 476 477 478 479 480
	/*
	 * If the VMA is split or otherwise changed the notifier is not
	 * updated, but we don't want to process VA's outside the modified
	 * VMA. FIXME: It would be much more understandable to just prevent
	 * modifying the VMA in the first place.
	 */
	if (map->vma->vm_start >= range->end ||
	    map->vma->vm_end <= range->start)
		return true;
481

482 483
	mstart = max(range->start, map->vma->vm_start);
	mend = min(range->end, map->vma->vm_end);
484 485 486
	pr_debug("map %d+%d (%lx %lx), range %lx %lx, mrange %lx %lx\n",
			map->index, map->count,
			map->vma->vm_start, map->vma->vm_end,
487
			range->start, range->end, mstart, mend);
488 489 490 491
	err = unmap_grant_pages(map,
				(mstart - map->vma->vm_start) >> PAGE_SHIFT,
				(mend - mstart) >> PAGE_SHIFT);
	WARN_ON(err);
492

493
	return true;
494 495
}

496 497
static const struct mmu_interval_notifier_ops gntdev_mmu_ops = {
	.invalidate = gntdev_invalidate,
498 499 500 501 502 503 504 505 506 507 508 509 510
};

/* ------------------------------------------------------------------ */

static int gntdev_open(struct inode *inode, struct file *flip)
{
	struct gntdev_priv *priv;

	priv = kzalloc(sizeof(*priv), GFP_KERNEL);
	if (!priv)
		return -ENOMEM;

	INIT_LIST_HEAD(&priv->maps);
511
	mutex_init(&priv->lock);
512

513
#ifdef CONFIG_XEN_GNTDEV_DMABUF
514
	priv->dmabuf_priv = gntdev_dmabuf_init(flip);
515
	if (IS_ERR(priv->dmabuf_priv)) {
516
		int ret = PTR_ERR(priv->dmabuf_priv);
517

518 519 520
		kfree(priv);
		return ret;
	}
521
#endif
522 523

	flip->private_data = priv;
524 525
#ifdef CONFIG_XEN_GRANT_DMA_ALLOC
	priv->dma_dev = gntdev_miscdev.this_device;
526
	dma_coerce_mask_and_coherent(priv->dma_dev, DMA_BIT_MASK(64));
527
#endif
528 529 530 531 532 533 534 535
	pr_debug("priv %p\n", priv);

	return 0;
}

static int gntdev_release(struct inode *inode, struct file *flip)
{
	struct gntdev_priv *priv = flip->private_data;
536
	struct gntdev_grant_map *map;
537 538 539

	pr_debug("priv %p\n", priv);

540
	mutex_lock(&priv->lock);
541
	while (!list_empty(&priv->maps)) {
542 543
		map = list_entry(priv->maps.next,
				 struct gntdev_grant_map, next);
544
		list_del(&map->next);
545
		gntdev_put_map(NULL /* already removed */, map);
546
	}
547
	mutex_unlock(&priv->lock);
548

549 550 551 552
#ifdef CONFIG_XEN_GNTDEV_DMABUF
	gntdev_dmabuf_fini(priv->dmabuf_priv);
#endif

553 554 555 556 557 558 559 560
	kfree(priv);
	return 0;
}

static long gntdev_ioctl_map_grant_ref(struct gntdev_priv *priv,
				       struct ioctl_gntdev_map_grant_ref __user *u)
{
	struct ioctl_gntdev_map_grant_ref op;
561
	struct gntdev_grant_map *map;
562 563 564 565 566
	int err;

	if (copy_from_user(&op, u, sizeof(op)) != 0)
		return -EFAULT;
	pr_debug("priv %p, add %d\n", priv, op.count);
567
	if (unlikely(gntdev_test_page_count(op.count)))
568 569 570
		return -EINVAL;

	err = -ENOMEM;
571
	map = gntdev_alloc_map(priv, op.count, 0 /* This is not a dma-buf. */);
572 573
	if (!map)
		return err;
574

575 576
	if (copy_from_user(map->grants, &u->refs,
			   sizeof(map->grants[0]) * op.count) != 0) {
577 578
		gntdev_put_map(NULL, map);
		return -EFAULT;
579 580
	}

581
	mutex_lock(&priv->lock);
582 583
	gntdev_add_map(priv, map);
	op.index = map->index << PAGE_SHIFT;
584
	mutex_unlock(&priv->lock);
585

586 587 588
	if (copy_to_user(u, &op, sizeof(op)) != 0)
		return -EFAULT;

589 590 591 592 593 594 595
	return 0;
}

static long gntdev_ioctl_unmap_grant_ref(struct gntdev_priv *priv,
					 struct ioctl_gntdev_unmap_grant_ref __user *u)
{
	struct ioctl_gntdev_unmap_grant_ref op;
596
	struct gntdev_grant_map *map;
597 598 599 600 601 602
	int err = -ENOENT;

	if (copy_from_user(&op, u, sizeof(op)) != 0)
		return -EFAULT;
	pr_debug("priv %p, del %d+%d\n", priv, (int)op.index, (int)op.count);

603
	mutex_lock(&priv->lock);
604
	map = gntdev_find_map_index(priv, op.index >> PAGE_SHIFT, op.count);
605 606 607 608
	if (map) {
		list_del(&map->next);
		err = 0;
	}
609
	mutex_unlock(&priv->lock);
610
	if (map)
611
		gntdev_put_map(priv, map);
612 613 614 615 616 617 618
	return err;
}

static long gntdev_ioctl_get_offset_for_vaddr(struct gntdev_priv *priv,
					      struct ioctl_gntdev_get_offset_for_vaddr __user *u)
{
	struct ioctl_gntdev_get_offset_for_vaddr op;
619
	struct vm_area_struct *vma;
620
	struct gntdev_grant_map *map;
621
	int rv = -EINVAL;
622 623 624 625 626

	if (copy_from_user(&op, u, sizeof(op)) != 0)
		return -EFAULT;
	pr_debug("priv %p, offset for vaddr %lx\n", priv, (unsigned long)op.vaddr);

627
	down_read(&current->mm->mmap_sem);
628 629
	vma = find_vma(current->mm, op.vaddr);
	if (!vma || vma->vm_ops != &gntdev_vmops)
630
		goto out_unlock;
631 632 633

	map = vma->vm_private_data;
	if (!map)
634
		goto out_unlock;
635

636 637
	op.offset = map->index << PAGE_SHIFT;
	op.count = map->count;
638
	rv = 0;
639

640 641 642 643
 out_unlock:
	up_read(&current->mm->mmap_sem);

	if (rv == 0 && copy_to_user(u, &op, sizeof(op)) != 0)
644
		return -EFAULT;
645
	return rv;
646 647
}

648 649 650
static long gntdev_ioctl_notify(struct gntdev_priv *priv, void __user *u)
{
	struct ioctl_gntdev_unmap_notify op;
651
	struct gntdev_grant_map *map;
652
	int rc;
653 654
	int out_flags;
	unsigned int out_event;
655 656 657 658 659 660 661

	if (copy_from_user(&op, u, sizeof(op)))
		return -EFAULT;

	if (op.action & ~(UNMAP_NOTIFY_CLEAR_BYTE|UNMAP_NOTIFY_SEND_EVENT))
		return -EINVAL;

662 663 664 665 666 667 668 669 670 671 672 673 674 675 676
	/* We need to grab a reference to the event channel we are going to use
	 * to send the notify before releasing the reference we may already have
	 * (if someone has called this ioctl twice). This is required so that
	 * it is possible to change the clear_byte part of the notification
	 * without disturbing the event channel part, which may now be the last
	 * reference to that event channel.
	 */
	if (op.action & UNMAP_NOTIFY_SEND_EVENT) {
		if (evtchn_get(op.event_channel_port))
			return -EINVAL;
	}

	out_flags = op.action;
	out_event = op.event_channel_port;

677
	mutex_lock(&priv->lock);
678 679 680 681 682 683 684 685 686 687 688

	list_for_each_entry(map, &priv->maps, next) {
		uint64_t begin = map->index << PAGE_SHIFT;
		uint64_t end = (map->index + map->count) << PAGE_SHIFT;
		if (op.index >= begin && op.index < end)
			goto found;
	}
	rc = -ENOENT;
	goto unlock_out;

 found:
689 690 691 692 693 694
	if ((op.action & UNMAP_NOTIFY_CLEAR_BYTE) &&
			(map->flags & GNTMAP_readonly)) {
		rc = -EINVAL;
		goto unlock_out;
	}

695 696 697
	out_flags = map->notify.flags;
	out_event = map->notify.event;

698 699 700
	map->notify.flags = op.action;
	map->notify.addr = op.index - (map->index << PAGE_SHIFT);
	map->notify.event = op.event_channel_port;
701

702
	rc = 0;
703

704
 unlock_out:
705
	mutex_unlock(&priv->lock);
706 707 708 709 710

	/* Drop the reference to the event channel we did not save in the map */
	if (out_flags & UNMAP_NOTIFY_SEND_EVENT)
		evtchn_put(out_event);

711 712 713
	return rc;
}

714
#define GNTDEV_COPY_BATCH 16
715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731

struct gntdev_copy_batch {
	struct gnttab_copy ops[GNTDEV_COPY_BATCH];
	struct page *pages[GNTDEV_COPY_BATCH];
	s16 __user *status[GNTDEV_COPY_BATCH];
	unsigned int nr_ops;
	unsigned int nr_pages;
};

static int gntdev_get_page(struct gntdev_copy_batch *batch, void __user *virt,
			   bool writeable, unsigned long *gfn)
{
	unsigned long addr = (unsigned long)virt;
	struct page *page;
	unsigned long xen_pfn;
	int ret;

732
	ret = get_user_pages_fast(addr, 1, writeable ? FOLL_WRITE : 0, &page);
733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913
	if (ret < 0)
		return ret;

	batch->pages[batch->nr_pages++] = page;

	xen_pfn = page_to_xen_pfn(page) + XEN_PFN_DOWN(addr & ~PAGE_MASK);
	*gfn = pfn_to_gfn(xen_pfn);

	return 0;
}

static void gntdev_put_pages(struct gntdev_copy_batch *batch)
{
	unsigned int i;

	for (i = 0; i < batch->nr_pages; i++)
		put_page(batch->pages[i]);
	batch->nr_pages = 0;
}

static int gntdev_copy(struct gntdev_copy_batch *batch)
{
	unsigned int i;

	gnttab_batch_copy(batch->ops, batch->nr_ops);
	gntdev_put_pages(batch);

	/*
	 * For each completed op, update the status if the op failed
	 * and all previous ops for the segment were successful.
	 */
	for (i = 0; i < batch->nr_ops; i++) {
		s16 status = batch->ops[i].status;
		s16 old_status;

		if (status == GNTST_okay)
			continue;

		if (__get_user(old_status, batch->status[i]))
			return -EFAULT;

		if (old_status != GNTST_okay)
			continue;

		if (__put_user(status, batch->status[i]))
			return -EFAULT;
	}

	batch->nr_ops = 0;
	return 0;
}

static int gntdev_grant_copy_seg(struct gntdev_copy_batch *batch,
				 struct gntdev_grant_copy_segment *seg,
				 s16 __user *status)
{
	uint16_t copied = 0;

	/*
	 * Disallow local -> local copies since there is only space in
	 * batch->pages for one page per-op and this would be a very
	 * expensive memcpy().
	 */
	if (!(seg->flags & (GNTCOPY_source_gref | GNTCOPY_dest_gref)))
		return -EINVAL;

	/* Can't cross page if source/dest is a grant ref. */
	if (seg->flags & GNTCOPY_source_gref) {
		if (seg->source.foreign.offset + seg->len > XEN_PAGE_SIZE)
			return -EINVAL;
	}
	if (seg->flags & GNTCOPY_dest_gref) {
		if (seg->dest.foreign.offset + seg->len > XEN_PAGE_SIZE)
			return -EINVAL;
	}

	if (put_user(GNTST_okay, status))
		return -EFAULT;

	while (copied < seg->len) {
		struct gnttab_copy *op;
		void __user *virt;
		size_t len, off;
		unsigned long gfn;
		int ret;

		if (batch->nr_ops >= GNTDEV_COPY_BATCH) {
			ret = gntdev_copy(batch);
			if (ret < 0)
				return ret;
		}

		len = seg->len - copied;

		op = &batch->ops[batch->nr_ops];
		op->flags = 0;

		if (seg->flags & GNTCOPY_source_gref) {
			op->source.u.ref = seg->source.foreign.ref;
			op->source.domid = seg->source.foreign.domid;
			op->source.offset = seg->source.foreign.offset + copied;
			op->flags |= GNTCOPY_source_gref;
		} else {
			virt = seg->source.virt + copied;
			off = (unsigned long)virt & ~XEN_PAGE_MASK;
			len = min(len, (size_t)XEN_PAGE_SIZE - off);

			ret = gntdev_get_page(batch, virt, false, &gfn);
			if (ret < 0)
				return ret;

			op->source.u.gmfn = gfn;
			op->source.domid = DOMID_SELF;
			op->source.offset = off;
		}

		if (seg->flags & GNTCOPY_dest_gref) {
			op->dest.u.ref = seg->dest.foreign.ref;
			op->dest.domid = seg->dest.foreign.domid;
			op->dest.offset = seg->dest.foreign.offset + copied;
			op->flags |= GNTCOPY_dest_gref;
		} else {
			virt = seg->dest.virt + copied;
			off = (unsigned long)virt & ~XEN_PAGE_MASK;
			len = min(len, (size_t)XEN_PAGE_SIZE - off);

			ret = gntdev_get_page(batch, virt, true, &gfn);
			if (ret < 0)
				return ret;

			op->dest.u.gmfn = gfn;
			op->dest.domid = DOMID_SELF;
			op->dest.offset = off;
		}

		op->len = len;
		copied += len;

		batch->status[batch->nr_ops] = status;
		batch->nr_ops++;
	}

	return 0;
}

static long gntdev_ioctl_grant_copy(struct gntdev_priv *priv, void __user *u)
{
	struct ioctl_gntdev_grant_copy copy;
	struct gntdev_copy_batch batch;
	unsigned int i;
	int ret = 0;

	if (copy_from_user(&copy, u, sizeof(copy)))
		return -EFAULT;

	batch.nr_ops = 0;
	batch.nr_pages = 0;

	for (i = 0; i < copy.count; i++) {
		struct gntdev_grant_copy_segment seg;

		if (copy_from_user(&seg, &copy.segments[i], sizeof(seg))) {
			ret = -EFAULT;
			goto out;
		}

		ret = gntdev_grant_copy_seg(&batch, &seg, &copy.segments[i].status);
		if (ret < 0)
			goto out;

		cond_resched();
	}
	if (batch.nr_ops)
		ret = gntdev_copy(&batch);
	return ret;

  out:
	gntdev_put_pages(&batch);
	return ret;
}

914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929
static long gntdev_ioctl(struct file *flip,
			 unsigned int cmd, unsigned long arg)
{
	struct gntdev_priv *priv = flip->private_data;
	void __user *ptr = (void __user *)arg;

	switch (cmd) {
	case IOCTL_GNTDEV_MAP_GRANT_REF:
		return gntdev_ioctl_map_grant_ref(priv, ptr);

	case IOCTL_GNTDEV_UNMAP_GRANT_REF:
		return gntdev_ioctl_unmap_grant_ref(priv, ptr);

	case IOCTL_GNTDEV_GET_OFFSET_FOR_VADDR:
		return gntdev_ioctl_get_offset_for_vaddr(priv, ptr);

930 931 932
	case IOCTL_GNTDEV_SET_UNMAP_NOTIFY:
		return gntdev_ioctl_notify(priv, ptr);

933 934 935
	case IOCTL_GNTDEV_GRANT_COPY:
		return gntdev_ioctl_grant_copy(priv, ptr);

936 937 938 939 940 941 942 943 944 945 946 947 948 949
#ifdef CONFIG_XEN_GNTDEV_DMABUF
	case IOCTL_GNTDEV_DMABUF_EXP_FROM_REFS:
		return gntdev_ioctl_dmabuf_exp_from_refs(priv, use_ptemod, ptr);

	case IOCTL_GNTDEV_DMABUF_EXP_WAIT_RELEASED:
		return gntdev_ioctl_dmabuf_exp_wait_released(priv, ptr);

	case IOCTL_GNTDEV_DMABUF_IMP_TO_REFS:
		return gntdev_ioctl_dmabuf_imp_to_refs(priv, ptr);

	case IOCTL_GNTDEV_DMABUF_IMP_RELEASE:
		return gntdev_ioctl_dmabuf_imp_release(priv, ptr);
#endif

950 951 952 953 954 955 956 957 958 959 960 961
	default:
		pr_debug("priv %p, unknown cmd %x\n", priv, cmd);
		return -ENOIOCTLCMD;
	}

	return 0;
}

static int gntdev_mmap(struct file *flip, struct vm_area_struct *vma)
{
	struct gntdev_priv *priv = flip->private_data;
	int index = vma->vm_pgoff;
M
Muhammad Falak R Wani 已提交
962
	int count = vma_pages(vma);
963
	struct gntdev_grant_map *map;
964
	int err = -EINVAL;
965 966 967 968 969 970 971

	if ((vma->vm_flags & VM_WRITE) && !(vma->vm_flags & VM_SHARED))
		return -EINVAL;

	pr_debug("map %d+%d at %lx (pgoff %lx)\n",
			index, count, vma->vm_start, vma->vm_pgoff);

972
	mutex_lock(&priv->lock);
973 974 975
	map = gntdev_find_map_index(priv, index, count);
	if (!map)
		goto unlock_out;
976
	if (use_ptemod && map->vma)
977
		goto unlock_out;
978
	refcount_inc(&map->users);
979

980 981
	vma->vm_ops = &gntdev_vmops;

982
	vma->vm_flags |= VM_DONTEXPAND | VM_DONTDUMP | VM_MIXEDMAP;
983 984

	if (use_ptemod)
985
		vma->vm_flags |= VM_DONTCOPY;
986 987

	vma->vm_private_data = map;
988 989 990
	if (map->flags) {
		if ((vma->vm_flags & VM_WRITE) &&
				(map->flags & GNTMAP_readonly))
991
			goto out_unlock_put;
992 993 994 995 996
	} else {
		map->flags = GNTMAP_host_map;
		if (!(vma->vm_flags & VM_WRITE))
			map->flags |= GNTMAP_readonly;
	}
997

998 999 1000 1001 1002 1003 1004 1005
	if (use_ptemod) {
		map->vma = vma;
		err = mmu_interval_notifier_insert_locked(
			&map->notifier, vma->vm_mm, vma->vm_start,
			vma->vm_end - vma->vm_start, &gntdev_mmu_ops);
		if (err)
			goto out_unlock_put;
	}
1006
	mutex_unlock(&priv->lock);
1007

1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019
	/*
	 * gntdev takes the address of the PTE in find_grant_ptes() and passes
	 * it to the hypervisor in gntdev_map_grant_pages(). The purpose of
	 * the notifier is to prevent the hypervisor pointer to the PTE from
	 * going stale.
	 *
	 * Since this vma's mappings can't be touched without the mmap_sem,
	 * and we are holding it now, there is no need for the notifier_range
	 * locking pattern.
	 */
	mmu_interval_read_begin(&map->notifier);

1020
	if (use_ptemod) {
1021
		map->pages_vm_start = vma->vm_start;
1022 1023 1024 1025
		err = apply_to_page_range(vma->vm_mm, vma->vm_start,
					  vma->vm_end - vma->vm_start,
					  find_grant_ptes, map);
		if (err) {
J
Joe Perches 已提交
1026
			pr_warn("find_grant_ptes() failure.\n");
1027
			goto out_put_map;
1028
		}
1029 1030
	}

1031
	err = gntdev_map_grant_pages(map);
1032 1033
	if (err)
		goto out_put_map;
1034

1035
	if (!use_ptemod) {
1036
		err = vm_map_pages_zero(vma, map->pages, map->count);
1037 1038
		if (err)
			goto out_put_map;
1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055
	} else {
#ifdef CONFIG_X86
		/*
		 * If the PTEs were not made special by the grant map
		 * hypercall, do so here.
		 *
		 * This is racy since the mapping is already visible
		 * to userspace but userspace should be well-behaved
		 * enough to not touch it until the mmap() call
		 * returns.
		 */
		if (!xen_feature(XENFEAT_gnttab_map_avail_bits)) {
			apply_to_page_range(vma->vm_mm, vma->vm_start,
					    vma->vm_end - vma->vm_start,
					    set_grant_ptes_as_special, NULL);
		}
#endif
1056 1057
	}

1058 1059
	return 0;

1060
unlock_out:
1061
	mutex_unlock(&priv->lock);
1062
	return err;
1063

1064
out_unlock_put:
1065
	mutex_unlock(&priv->lock);
1066
out_put_map:
1067 1068
	if (use_ptemod) {
		unmap_grant_pages(map, 0, map->count);
1069 1070 1071 1072
		if (map->vma) {
			mmu_interval_notifier_remove(&map->notifier);
			map->vma = NULL;
		}
1073
	}
1074
	gntdev_put_map(priv, map);
1075
	return err;
1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100
}

static const struct file_operations gntdev_fops = {
	.owner = THIS_MODULE,
	.open = gntdev_open,
	.release = gntdev_release,
	.mmap = gntdev_mmap,
	.unlocked_ioctl = gntdev_ioctl
};

static struct miscdevice gntdev_miscdev = {
	.minor        = MISC_DYNAMIC_MINOR,
	.name         = "xen/gntdev",
	.fops         = &gntdev_fops,
};

/* ------------------------------------------------------------------ */

static int __init gntdev_init(void)
{
	int err;

	if (!xen_domain())
		return -ENODEV;

1101
	use_ptemod = !xen_feature(XENFEAT_auto_translated_physmap);
1102

1103 1104
	err = misc_register(&gntdev_miscdev);
	if (err != 0) {
J
Joe Perches 已提交
1105
		pr_err("Could not register gntdev device\n");
1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119
		return err;
	}
	return 0;
}

static void __exit gntdev_exit(void)
{
	misc_deregister(&gntdev_miscdev);
}

module_init(gntdev_init);
module_exit(gntdev_exit);

/* ------------------------------------------------------------------ */