drm/radeon: Impl. new gtt allocate/free functions

This patch adds the implementation of the gtt interface functions. The allocate function will allocate a single bo, pin and map it to kernel memory. It will return the gpu address and cpu ptr as arguments. v2: The bulk of the allocations in the GART is for MQDs. MQDs represent active user-mode queues, which are on the current runlist. It is important to remember that active queues doesn't necessarily mean scheduled/running queues, especially if there is over-subscription of queues or more than a single HSA process. Because the scheduling of the user-mode queues is done by the CP firmware, amdkfd doesn't have any indication if the queue is scheduled or not. If the CP will try to schedule a queue, and its MQD is not present, this will probably stuck the CP permanently, as it will load garbage from the GART (the address of the MQD is given to the CP inside the runlist packet). In addition, there are a couple of small allocations which also should always be pinned - runlist packets (2 packets) and HPDs. runlist packets can be quite large, depending on number of processes and queues. This new allocate function represents the short/mid-term solution of limiting the total memory consumption to around 4MB by default. The long-term solution is to create a mechanism through which radeon/ttm can ask amdkfd to clear GART/VRAM memory due to memory pressure. Then, amdkfd will preempt the running queues and wait until the memory pressure is over. After that, amdkfd will reschedule the queues. Signed-off-by: N Oded Gabbay <oded.gabbay@amd.com> Reviewed-by: N Alexey Skidanov <Alexey.skidanov@amd.com> Reviewed-by: N Alex Deucher <alexander.deucher@amd.com>

drm/radeon: Impl. new gtt allocate/free functions
This patch adds the implementation of the gtt interface functions. The allocate function will allocate a single bo, pin and map it to kernel memory. It will return the gpu address and cpu ptr as arguments. v2: The bulk of the allocations in the GART is for MQDs. MQDs represent active user-mode queues, which are on the current runlist. It is important to remember that active queues doesn't necessarily mean scheduled/running queues, especially if there is over-subscription of queues or more than a single HSA process. Because the scheduling of the user-mode queues is done by the CP firmware, amdkfd doesn't have any indication if the queue is scheduled or not. If the CP will try to schedule a queue, and its MQD is not present, this will probably stuck the CP permanently, as it will load garbage from the GART (the address of the MQD is given to the CP inside the runlist packet). In addition, there are a couple of small allocations which also should always be pinned - runlist packets (2 packets) and HPDs. runlist packets can be quite large, depending on number of processes and queues. This new allocate function represents the short/mid-term solution of limiting the total memory consumption to around 4MB by default. The long-term solution is to create a mechanism through which radeon/ttm can ask amdkfd to clear GART/VRAM memory due to memory pressure. Then, amdkfd will preempt the running queues and wait until the memory pressure is over. After that, amdkfd will reschedule the queues. Signed-off-by: N Oded Gabbay <oded.gabbay@amd.com> Reviewed-by: N Alexey Skidanov <Alexey.skidanov@amd.com> Reviewed-by: N Alex Deucher <alexander.deucher@amd.com>
ceae881b · Oded Gabbay · e27ade73 · ceae881b
隐藏空白更改
内联并排

Showing with 85 addition and 0 deletion

drivers/gpu/drm/radeon/radeon_kfd.c drivers/gpu/drm/radeon/radeon_kfd.c +85 -0

未找到文件。
--- a/drivers/gpu/drm/radeon/radeon_kfd.c
+++ b/drivers/gpu/drm/radeon/radeon_kfd.c
@@ -37,6 +37,8 @@ struct kgd_mem {
 	struct radeon_sa_bo *sa_bo;
 	uint64_t gpu_addr;
 	void *ptr;
+	struct radeon_bo *bo;
+	void *cpu_ptr;
 };

 static int init_sa_manager(struct kgd_dev *kgd, unsigned int size);
@@ -47,6 +49,12 @@ static int allocate_mem(struct kgd_dev *kgd, size_t size, size_t alignment,

 static void free_mem(struct kgd_dev *kgd, struct kgd_mem *mem);

+static int alloc_gtt_mem(struct kgd_dev *kgd, size_t size,
+			void **mem_obj, uint64_t *gpu_addr,
+			void **cpu_ptr);
+
+static void free_gtt_mem(struct kgd_dev *kgd, void *mem_obj);
+
 static uint64_t get_vmem_size(struct kgd_dev *kgd);
 static uint64_t get_gpu_clock_counter(struct kgd_dev *kgd);

@@ -87,6 +95,8 @@ static const struct kfd2kgd_calls kfd2kgd = {
 	.fini_sa_manager = fini_sa_manager,
 	.allocate_mem = allocate_mem,
 	.free_mem = free_mem,
+	.init_gtt_mem_allocation = alloc_gtt_mem,
+	.free_gtt_mem = free_gtt_mem,
 	.get_vmem_size = get_vmem_size,
 	.get_gpu_clock_counter = get_gpu_clock_counter,
 	.get_max_engine_clock_in_mhz = get_max_engine_clock_in_mhz,
@@ -272,6 +282,81 @@ static void free_mem(struct kgd_dev *kgd, struct kgd_mem *mem)
 	kfree(mem);
 }

+static int alloc_gtt_mem(struct kgd_dev *kgd, size_t size,
+			void **mem_obj, uint64_t *gpu_addr,
+			void **cpu_ptr)
+{
+	struct radeon_device *rdev = (struct radeon_device *)kgd;
+	struct kgd_mem **mem = (struct kgd_mem **) mem_obj;
+	int r;
+
+	BUG_ON(kgd == NULL);
+	BUG_ON(gpu_addr == NULL);
+	BUG_ON(cpu_ptr == NULL);
+
+	*mem = kmalloc(sizeof(struct kgd_mem), GFP_KERNEL);
+	if ((*mem) == NULL)
+		return -ENOMEM;
+
+	r = radeon_bo_create(rdev, size, PAGE_SIZE, true, RADEON_GEM_DOMAIN_GTT,
+				RADEON_GEM_GTT_WC, NULL, NULL, &(*mem)->bo);
+	if (r) {
+		dev_err(rdev->dev,
+			"failed to allocate BO for amdkfd (%d)\n", r);
+		return r;
+	}
+
+	/* map the buffer */
+	r = radeon_bo_reserve((*mem)->bo, true);
+	if (r) {
+		dev_err(rdev->dev, "(%d) failed to reserve bo for amdkfd\n", r);
+		goto allocate_mem_reserve_bo_failed;
+	}
+
+	r = radeon_bo_pin((*mem)->bo, RADEON_GEM_DOMAIN_GTT,
+				&(*mem)->gpu_addr);
+	if (r) {
+		dev_err(rdev->dev, "(%d) failed to pin bo for amdkfd\n", r);
+		goto allocate_mem_pin_bo_failed;
+	}
+	*gpu_addr = (*mem)->gpu_addr;
+
+	r = radeon_bo_kmap((*mem)->bo, &(*mem)->cpu_ptr);
+	if (r) {
+		dev_err(rdev->dev,
+			"(%d) failed to map bo to kernel for amdkfd\n", r);
+		goto allocate_mem_kmap_bo_failed;
+	}
+	*cpu_ptr = (*mem)->cpu_ptr;
+
+	radeon_bo_unreserve((*mem)->bo);
+
+	return 0;
+
+allocate_mem_kmap_bo_failed:
+	radeon_bo_unpin((*mem)->bo);
+allocate_mem_pin_bo_failed:
+	radeon_bo_unreserve((*mem)->bo);
+allocate_mem_reserve_bo_failed:
+	radeon_bo_unref(&(*mem)->bo);
+
+	return r;
+}
+
+static void free_gtt_mem(struct kgd_dev *kgd, void *mem_obj)
+{
+	struct kgd_mem *mem = (struct kgd_mem *) mem_obj;
+
+	BUG_ON(mem == NULL);
+
+	radeon_bo_reserve(mem->bo, true);
+	radeon_bo_kunmap(mem->bo);
+	radeon_bo_unpin(mem->bo);
+	radeon_bo_unreserve(mem->bo);
+	radeon_bo_unref(&(mem->bo));
+	kfree(mem);
+}
+
 static uint64_t get_vmem_size(struct kgd_dev *kgd)
 {
 	struct radeon_device *rdev = (struct radeon_device *)kgd;