amdgpu_dma_buf.c 15.7 KB
Newer Older
A
Alex Deucher 已提交
1
/*
2
 * Copyright 2019 Advanced Micro Devices, Inc.
A
Alex Deucher 已提交
3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25
 *
 * Permission is hereby granted, free of charge, to any person obtaining a
 * copy of this software and associated documentation files (the "Software"),
 * to deal in the Software without restriction, including without limitation
 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
 * and/or sell copies of the Software, and to permit persons to whom the
 * Software is furnished to do so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice shall be included in
 * all copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
 * OTHER DEALINGS IN THE SOFTWARE.
 *
 * based on nouveau_prime.c
 *
 * Authors: Alex Deucher
 */
26 27 28 29 30 31 32 33

/**
 * DOC: PRIME Buffer Sharing
 *
 * The following callback implementations are used for :ref:`sharing GEM buffer
 * objects between different devices via PRIME <prime_buffer_sharing>`.
 */

A
Alex Deucher 已提交
34
#include "amdgpu.h"
35
#include "amdgpu_display.h"
36
#include "amdgpu_gem.h"
37
#include "amdgpu_dma_buf.h"
A
Alex Deucher 已提交
38 39
#include <drm/amdgpu_drm.h>
#include <linux/dma-buf.h>
40
#include <linux/dma-fence-array.h>
41
#include <linux/pci-p2pdma.h>
A
Alex Deucher 已提交
42

43 44
/**
 * amdgpu_gem_prime_vmap - &dma_buf_ops.vmap implementation
45
 * @obj: GEM BO
46
 *
47
 * Sets up an in-kernel virtual mapping of the BO's memory.
48 49 50 51
 *
 * Returns:
 * The virtual address of the mapping or an error pointer.
 */
A
Alex Deucher 已提交
52 53 54 55 56 57 58 59 60 61 62 63 64
void *amdgpu_gem_prime_vmap(struct drm_gem_object *obj)
{
	struct amdgpu_bo *bo = gem_to_amdgpu_bo(obj);
	int ret;

	ret = ttm_bo_kmap(&bo->tbo, 0, bo->tbo.num_pages,
			  &bo->dma_buf_vmap);
	if (ret)
		return ERR_PTR(ret);

	return bo->dma_buf_vmap.virtual;
}

65 66
/**
 * amdgpu_gem_prime_vunmap - &dma_buf_ops.vunmap implementation
67 68
 * @obj: GEM BO
 * @vaddr: Virtual address (unused)
69
 *
70
 * Tears down the in-kernel virtual mapping of the BO's memory.
71
 */
A
Alex Deucher 已提交
72 73 74 75 76 77 78
void amdgpu_gem_prime_vunmap(struct drm_gem_object *obj, void *vaddr)
{
	struct amdgpu_bo *bo = gem_to_amdgpu_bo(obj);

	ttm_bo_kunmap(&bo->dma_buf_vmap);
}

79 80
/**
 * amdgpu_gem_prime_mmap - &drm_driver.gem_prime_mmap implementation
81 82
 * @obj: GEM BO
 * @vma: Virtual memory area
83
 *
84
 * Sets up a userspace mapping of the BO's memory in the given
85 86 87
 * virtual memory area.
 *
 * Returns:
88
 * 0 on success or a negative error code on failure.
89
 */
90 91
int amdgpu_gem_prime_mmap(struct drm_gem_object *obj,
			  struct vm_area_struct *vma)
92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124
{
	struct amdgpu_bo *bo = gem_to_amdgpu_bo(obj);
	struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
	unsigned asize = amdgpu_bo_size(bo);
	int ret;

	if (!vma->vm_file)
		return -ENODEV;

	if (adev == NULL)
		return -ENODEV;

	/* Check for valid size. */
	if (asize < vma->vm_end - vma->vm_start)
		return -EINVAL;

	if (amdgpu_ttm_tt_get_usermm(bo->tbo.ttm) ||
	    (bo->flags & AMDGPU_GEM_CREATE_NO_CPU_ACCESS)) {
		return -EPERM;
	}
	vma->vm_pgoff += amdgpu_bo_mmap_offset(bo) >> PAGE_SHIFT;

	/* prime mmap does not need to check access, so allow here */
	ret = drm_vma_node_allow(&obj->vma_node, vma->vm_file->private_data);
	if (ret)
		return ret;

	ret = ttm_bo_mmap(vma->vm_file, vma, &adev->mman.bdev);
	drm_vma_node_revoke(&obj->vma_node, vma->vm_file->private_data);

	return ret;
}

125
static int
126
__dma_resv_make_exclusive(struct dma_resv *obj)
127 128 129 130 131
{
	struct dma_fence **fences;
	unsigned int count;
	int r;

132
	if (!dma_resv_get_list(obj)) /* no shared fences to convert */
133 134
		return 0;

135
	r = dma_resv_get_fences_rcu(obj, NULL, &count, &fences);
136 137 138 139 140 141
	if (r)
		return r;

	if (count == 0) {
		/* Now that was unexpected. */
	} else if (count == 1) {
142
		dma_resv_add_excl_fence(obj, fences[0]);
143 144 145 146 147 148 149 150 151 152 153
		dma_fence_put(fences[0]);
		kfree(fences);
	} else {
		struct dma_fence_array *array;

		array = dma_fence_array_create(count, fences,
					       dma_fence_context_alloc(1), 0,
					       false);
		if (!array)
			goto err_fences_put;

154
		dma_resv_add_excl_fence(obj, &array->base);
155 156 157 158 159 160 161 162 163 164 165 166
		dma_fence_put(&array->base);
	}

	return 0;

err_fences_put:
	while (count--)
		dma_fence_put(fences[count]);
	kfree(fences);
	return -ENOMEM;
}

167
/**
168 169 170 171 172 173 174 175 176 177 178 179 180 181 182
 * amdgpu_dma_buf_attach - &dma_buf_ops.attach implementation
 *
 * @dmabuf: DMA-buf where we attach to
 * @attach: attachment to add
 *
 * Add the attachment as user to the exported DMA-buf.
 */
static int amdgpu_dma_buf_attach(struct dma_buf *dmabuf,
				 struct dma_buf_attachment *attach)
{
	struct drm_gem_object *obj = dmabuf->priv;
	struct amdgpu_bo *bo = gem_to_amdgpu_bo(obj);
	struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
	int r;

183 184 185
	if (pci_p2pdma_distance_many(adev->pdev, &attach->dev, 1, true) < 0)
		attach->peer2peer = false;

186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228
	if (attach->dev->driver == adev->dev->driver)
		return 0;

	r = amdgpu_bo_reserve(bo, false);
	if (unlikely(r != 0))
		return r;

	/*
	 * We only create shared fences for internal use, but importers
	 * of the dmabuf rely on exclusive fences for implicitly
	 * tracking write hazards. As any of the current fences may
	 * correspond to a write, we need to convert all existing
	 * fences on the reservation object into a single exclusive
	 * fence.
	 */
	r = __dma_resv_make_exclusive(bo->tbo.base.resv);
	if (r)
		return r;

	bo->prime_shared_count++;
	amdgpu_bo_unreserve(bo);
	return 0;
}

/**
 * amdgpu_dma_buf_detach - &dma_buf_ops.detach implementation
 *
 * @dmabuf: DMA-buf where we remove the attachment from
 * @attach: the attachment to remove
 *
 * Called when an attachment is removed from the DMA-buf.
 */
static void amdgpu_dma_buf_detach(struct dma_buf *dmabuf,
				  struct dma_buf_attachment *attach)
{
	struct drm_gem_object *obj = dmabuf->priv;
	struct amdgpu_bo *bo = gem_to_amdgpu_bo(obj);
	struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);

	if (attach->dev->driver != adev->dev->driver && bo->prime_shared_count)
		bo->prime_shared_count--;
}

229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259
/**
 * amdgpu_dma_buf_pin - &dma_buf_ops.pin implementation
 *
 * @attach: attachment to pin down
 *
 * Pin the BO which is backing the DMA-buf so that it can't move any more.
 */
static int amdgpu_dma_buf_pin(struct dma_buf_attachment *attach)
{
	struct drm_gem_object *obj = attach->dmabuf->priv;
	struct amdgpu_bo *bo = gem_to_amdgpu_bo(obj);

	/* pin buffer into GTT */
	return amdgpu_bo_pin(bo, AMDGPU_GEM_DOMAIN_GTT);
}

/**
 * amdgpu_dma_buf_unpin - &dma_buf_ops.unpin implementation
 *
 * @attach: attachment to unpin
 *
 * Unpin a previously pinned BO to make it movable again.
 */
static void amdgpu_dma_buf_unpin(struct dma_buf_attachment *attach)
{
	struct drm_gem_object *obj = attach->dmabuf->priv;
	struct amdgpu_bo *bo = gem_to_amdgpu_bo(obj);

	amdgpu_bo_unpin(bo);
}

260 261
/**
 * amdgpu_dma_buf_map - &dma_buf_ops.map_dma_buf implementation
262
 * @attach: DMA-buf attachment
263
 * @dir: DMA direction
264 265 266 267 268 269
 *
 * Makes sure that the shared DMA buffer can be accessed by the target device.
 * For now, simply pins it to the GTT domain, where it should be accessible by
 * all DMA devices.
 *
 * Returns:
270 271
 * sg_table filled with the DMA addresses to use or ERR_PRT with negative error
 * code.
272
 */
273 274
static struct sg_table *amdgpu_dma_buf_map(struct dma_buf_attachment *attach,
					   enum dma_data_direction dir)
A
Alex Deucher 已提交
275
{
276
	struct dma_buf *dma_buf = attach->dmabuf;
277
	struct drm_gem_object *obj = dma_buf->priv;
A
Alex Deucher 已提交
278
	struct amdgpu_bo *bo = gem_to_amdgpu_bo(obj);
279
	struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
280
	struct sg_table *sgt;
281
	long r;
A
Alex Deucher 已提交
282

283
	if (!bo->pin_count) {
284
		/* move buffer into GTT or VRAM */
285
		struct ttm_operation_ctx ctx = { false, false };
286 287 288 289 290 291 292 293
		unsigned domains = AMDGPU_GEM_DOMAIN_GTT;

		if (bo->preferred_domains & AMDGPU_GEM_DOMAIN_VRAM &&
		    attach->peer2peer) {
			bo->flags |= AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED;
			domains |= AMDGPU_GEM_DOMAIN_VRAM;
		}
		amdgpu_bo_placement_from_domain(bo, domains);
294 295 296 297 298 299 300 301
		r = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
		if (r)
			return ERR_PTR(r);

	} else if (!(amdgpu_mem_type_to_domain(bo->tbo.mem.mem_type) &
		     AMDGPU_GEM_DOMAIN_GTT)) {
		return ERR_PTR(-EBUSY);
	}
302

303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323
	switch (bo->tbo.mem.mem_type) {
	case TTM_PL_TT:
		sgt = drm_prime_pages_to_sg(bo->tbo.ttm->pages,
					    bo->tbo.num_pages);
		if (IS_ERR(sgt))
			return sgt;

		if (!dma_map_sg_attrs(attach->dev, sgt->sgl, sgt->nents, dir,
				      DMA_ATTR_SKIP_CPU_SYNC))
			goto error_free;
		break;

	case TTM_PL_VRAM:
		r = amdgpu_vram_mgr_alloc_sgt(adev, &bo->tbo.mem, attach->dev,
					      dir, &sgt);
		if (r)
			return ERR_PTR(r);
		break;
	default:
		return ERR_PTR(-EINVAL);
	}
324

325 326 327 328 329
	return sgt;

error_free:
	sg_free_table(sgt);
	kfree(sgt);
330
	return ERR_PTR(-EBUSY);
A
Alex Deucher 已提交
331 332
}

333
/**
334
 * amdgpu_dma_buf_unmap - &dma_buf_ops.unmap_dma_buf implementation
335
 * @attach: DMA-buf attachment
336 337
 * @sgt: sg_table to unmap
 * @dir: DMA direction
338 339
 *
 * This is called when a shared DMA buffer no longer needs to be accessible by
340
 * another device. For now, simply unpins the buffer from GTT.
341
 */
342 343 344
static void amdgpu_dma_buf_unmap(struct dma_buf_attachment *attach,
				 struct sg_table *sgt,
				 enum dma_data_direction dir)
A
Alex Deucher 已提交
345
{
346 347 348 349 350 351 352 353 354 355 356 357
	struct dma_buf *dma_buf = attach->dmabuf;
	struct drm_gem_object *obj = dma_buf->priv;
	struct amdgpu_bo *bo = gem_to_amdgpu_bo(obj);
	struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);

	if (sgt->sgl->page_link) {
		dma_unmap_sg(attach->dev, sgt->sgl, sgt->nents, dir);
		sg_free_table(sgt);
		kfree(sgt);
	} else {
		amdgpu_vram_mgr_free_sgt(adev, attach->dev, dir, sgt);
	}
A
Alex Deucher 已提交
358 359
}

360
/**
361
 * amdgpu_dma_buf_begin_cpu_access - &dma_buf_ops.begin_cpu_access implementation
362 363
 * @dma_buf: Shared DMA buffer
 * @direction: Direction of DMA transfer
364 365 366 367 368 369
 *
 * This is called before CPU access to the shared DMA buffer's memory. If it's
 * a read access, the buffer is moved to the GTT domain if possible, for optimal
 * CPU read performance.
 *
 * Returns:
370
 * 0 on success or a negative error code on failure.
371
 */
372 373
static int amdgpu_dma_buf_begin_cpu_access(struct dma_buf *dma_buf,
					   enum dma_data_direction direction)
374 375 376 377
{
	struct amdgpu_bo *bo = gem_to_amdgpu_bo(dma_buf->priv);
	struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
	struct ttm_operation_ctx ctx = { true, false };
378
	u32 domain = amdgpu_display_supported_domains(adev, bo->flags);
379 380 381 382 383 384 385 386 387 388 389 390 391
	int ret;
	bool reads = (direction == DMA_BIDIRECTIONAL ||
		      direction == DMA_FROM_DEVICE);

	if (!reads || !(domain & AMDGPU_GEM_DOMAIN_GTT))
		return 0;

	/* move to gtt */
	ret = amdgpu_bo_reserve(bo, false);
	if (unlikely(ret != 0))
		return ret;

	if (!bo->pin_count && (bo->allowed_domains & AMDGPU_GEM_DOMAIN_GTT)) {
392
		amdgpu_bo_placement_from_domain(bo, AMDGPU_GEM_DOMAIN_GTT);
393 394 395 396 397 398 399
		ret = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
	}

	amdgpu_bo_unreserve(bo);
	return ret;
}

400
const struct dma_buf_ops amdgpu_dmabuf_ops = {
401 402
	.attach = amdgpu_dma_buf_attach,
	.detach = amdgpu_dma_buf_detach,
403 404
	.pin = amdgpu_dma_buf_pin,
	.unpin = amdgpu_dma_buf_unpin,
405 406
	.map_dma_buf = amdgpu_dma_buf_map,
	.unmap_dma_buf = amdgpu_dma_buf_unmap,
407
	.release = drm_gem_dmabuf_release,
408
	.begin_cpu_access = amdgpu_dma_buf_begin_cpu_access,
409 410 411 412 413
	.mmap = drm_gem_dmabuf_mmap,
	.vmap = drm_gem_dmabuf_vmap,
	.vunmap = drm_gem_dmabuf_vunmap,
};

414 415
/**
 * amdgpu_gem_prime_export - &drm_driver.gem_prime_export implementation
416 417
 * @gobj: GEM BO
 * @flags: Flags such as DRM_CLOEXEC and DRM_RDWR.
418
 *
419
 * The main work is done by the &drm_gem_prime_export helper.
420 421
 *
 * Returns:
422
 * Shared DMA buffer representing the GEM BO from the given device.
423
 */
424
struct dma_buf *amdgpu_gem_prime_export(struct drm_gem_object *gobj,
A
Alex Deucher 已提交
425 426 427
					int flags)
{
	struct amdgpu_bo *bo = gem_to_amdgpu_bo(gobj);
428
	struct dma_buf *buf;
A
Alex Deucher 已提交
429

430 431
	if (amdgpu_ttm_tt_get_usermm(bo->tbo.ttm) ||
	    bo->flags & AMDGPU_GEM_CREATE_VM_ALWAYS_VALID)
A
Alex Deucher 已提交
432 433
		return ERR_PTR(-EPERM);

434
	buf = drm_gem_prime_export(gobj, flags);
435
	if (!IS_ERR(buf))
436 437
		buf->ops = &amdgpu_dmabuf_ops;

438
	return buf;
A
Alex Deucher 已提交
439
}
440

441
/**
442 443
 * amdgpu_dma_buf_create_obj - create BO for DMA-buf import
 *
444
 * @dev: DRM device
445
 * @dma_buf: DMA-buf
446
 *
447
 * Creates an empty SG BO for DMA-buf import.
448 449 450 451 452
 *
 * Returns:
 * A new GEM BO of the given DRM device, representing the memory
 * described by the given DMA-buf attachment and scatter/gather table.
 */
453 454
static struct drm_gem_object *
amdgpu_dma_buf_create_obj(struct drm_device *dev, struct dma_buf *dma_buf)
455
{
456
	struct dma_resv *resv = dma_buf->resv;
457 458 459 460 461 462
	struct amdgpu_device *adev = dev->dev_private;
	struct amdgpu_bo *bo;
	struct amdgpu_bo_param bp;
	int ret;

	memset(&bp, 0, sizeof(bp));
463
	bp.size = dma_buf->size;
464 465 466 467 468
	bp.byte_align = PAGE_SIZE;
	bp.domain = AMDGPU_GEM_DOMAIN_CPU;
	bp.flags = 0;
	bp.type = ttm_bo_type_sg;
	bp.resv = resv;
469
	dma_resv_lock(resv, NULL);
470 471 472 473 474 475
	ret = amdgpu_bo_create(adev, &bp, &bo);
	if (ret)
		goto error;

	bo->allowed_domains = AMDGPU_GEM_DOMAIN_GTT;
	bo->preferred_domains = AMDGPU_GEM_DOMAIN_GTT;
476
	if (dma_buf->ops != &amdgpu_dmabuf_ops)
477 478
		bo->prime_shared_count = 1;

479
	dma_resv_unlock(resv);
480
	return &bo->tbo.base;
481 482

error:
483
	dma_resv_unlock(resv);
484 485 486
	return ERR_PTR(ret);
}

487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549
/**
 * amdgpu_dma_buf_move_notify - &attach.move_notify implementation
 *
 * @attach: the DMA-buf attachment
 *
 * Invalidate the DMA-buf attachment, making sure that the we re-create the
 * mapping before the next use.
 */
static void
amdgpu_dma_buf_move_notify(struct dma_buf_attachment *attach)
{
	struct drm_gem_object *obj = attach->importer_priv;
	struct ww_acquire_ctx *ticket = dma_resv_locking_ctx(obj->resv);
	struct amdgpu_bo *bo = gem_to_amdgpu_bo(obj);
	struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
	struct ttm_operation_ctx ctx = { false, false };
	struct ttm_placement placement = {};
	struct amdgpu_vm_bo_base *bo_base;
	int r;

	if (bo->tbo.mem.mem_type == TTM_PL_SYSTEM)
		return;

	r = ttm_bo_validate(&bo->tbo, &placement, &ctx);
	if (r) {
		DRM_ERROR("Failed to invalidate DMA-buf import (%d))\n", r);
		return;
	}

	for (bo_base = bo->vm_bo; bo_base; bo_base = bo_base->next) {
		struct amdgpu_vm *vm = bo_base->vm;
		struct dma_resv *resv = vm->root.base.bo->tbo.base.resv;

		if (ticket) {
			/* When we get an error here it means that somebody
			 * else is holding the VM lock and updating page tables
			 * So we can just continue here.
			 */
			r = dma_resv_lock(resv, ticket);
			if (r)
				continue;

		} else {
			/* TODO: This is more problematic and we actually need
			 * to allow page tables updates without holding the
			 * lock.
			 */
			if (!dma_resv_trylock(resv))
				continue;
		}

		r = amdgpu_vm_clear_freed(adev, vm, NULL);
		if (!r)
			r = amdgpu_vm_handle_moved(adev, vm);

		if (r && r != -EBUSY)
			DRM_ERROR("Failed to invalidate VM page tables (%d))\n",
				  r);

		dma_resv_unlock(resv);
	}
}

550
static const struct dma_buf_attach_ops amdgpu_dma_buf_attach_ops = {
551
	.allow_peer2peer = true,
552
	.move_notify = amdgpu_dma_buf_move_notify
553 554
};

555 556 557 558 559
/**
 * amdgpu_gem_prime_import - &drm_driver.gem_prime_import implementation
 * @dev: DRM device
 * @dma_buf: Shared DMA buffer
 *
560
 * Import a dma_buf into a the driver and potentially create a new GEM object.
561 562
 *
 * Returns:
563
 * GEM BO representing the shared DMA buffer for the given device.
564
 */
565
struct drm_gem_object *amdgpu_gem_prime_import(struct drm_device *dev,
566
					       struct dma_buf *dma_buf)
567
{
568
	struct dma_buf_attachment *attach;
569 570 571 572 573 574 575 576 577 578 579 580 581 582
	struct drm_gem_object *obj;

	if (dma_buf->ops == &amdgpu_dmabuf_ops) {
		obj = dma_buf->priv;
		if (obj->dev == dev) {
			/*
			 * Importing dmabuf exported from out own gem increases
			 * refcount on gem itself instead of f_count of dmabuf.
			 */
			drm_gem_object_get(obj);
			return obj;
		}
	}

583 584 585 586
	obj = amdgpu_dma_buf_create_obj(dev, dma_buf);
	if (IS_ERR(obj))
		return obj;

587
	attach = dma_buf_dynamic_attach(dma_buf, dev->dev,
588
					&amdgpu_dma_buf_attach_ops, obj);
589
	if (IS_ERR(attach)) {
590
		drm_gem_object_put(obj);
591 592 593 594 595 596
		return ERR_CAST(attach);
	}

	get_dma_buf(dma_buf);
	obj->import_attach = attach;
	return obj;
597
}