From 16f0013157bf8c95d10b9360491e3c920f85641e Mon Sep 17 00:00:00 2001 From: Felix Kuehling Date: Wed, 3 Aug 2022 14:45:45 -0400 Subject: [PATCH] drm/amdkfd: Allocate doorbells only when needed Only allocate doorbells when the first queue is created on a GPU or the doorbells need to be mapped into CPU or GPU virtual address space. This avoids allocating doorbells unnecessarily and can allow more processes to use KFD on multi-GPU systems. Signed-off-by: Felix Kuehling Reviewed-by: Kent Russell Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdkfd/kfd_chardev.c | 13 +++++++++++++ drivers/gpu/drm/amd/amdkfd/kfd_doorbell.c | 9 +++++++++ drivers/gpu/drm/amd/amdkfd/kfd_process.c | 5 ----- 3 files changed, 22 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c index dc774ddf3445..664e8b5d82c0 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c @@ -327,6 +327,12 @@ static int kfd_ioctl_create_queue(struct file *filep, struct kfd_process *p, goto err_bind_process; } + if (!pdd->doorbell_index && + kfd_alloc_process_doorbells(dev, &pdd->doorbell_index) < 0) { + err = -ENOMEM; + goto err_alloc_doorbells; + } + /* Starting with GFX11, wptr BOs must be mapped to GART for MES to determine work * on unmapped queues for usermode queue oversubscription (no aggregated doorbell) */ @@ -404,6 +410,7 @@ static int kfd_ioctl_create_queue(struct file *filep, struct kfd_process *p, if (wptr_bo) amdgpu_amdkfd_free_gtt_mem(dev->adev, wptr_bo); err_wptr_map_gart: +err_alloc_doorbells: err_bind_process: err_pdd: mutex_unlock(&p->mutex); @@ -1092,6 +1099,10 @@ static int kfd_ioctl_alloc_memory_of_gpu(struct file *filep, goto err_unlock; } offset = kfd_get_process_doorbells(pdd); + if (!offset) { + err = -ENOMEM; + goto err_unlock; + } } else if (flags & KFD_IOC_ALLOC_MEM_FLAGS_MMIO_REMAP) { if (args->size != PAGE_SIZE) { err = -EINVAL; @@ -2173,6 +2184,8 @@ static int criu_restore_memory_of_gpu(struct kfd_process_device *pdd, return -EINVAL; offset = kfd_get_process_doorbells(pdd); + if (!offset) + return -ENOMEM; } else if (bo_bucket->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_MMIO_REMAP) { /* MMIO BOs need remapped bus address */ if (bo_bucket->size != PAGE_SIZE) { diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_doorbell.c b/drivers/gpu/drm/amd/amdkfd/kfd_doorbell.c index cb3d2ccc5100..b33798f89ef0 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_doorbell.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_doorbell.c @@ -157,6 +157,8 @@ int kfd_doorbell_mmap(struct kfd_dev *dev, struct kfd_process *process, /* Calculate physical address of doorbell */ address = kfd_get_process_doorbells(pdd); + if (!address) + return -ENOMEM; vma->vm_flags |= VM_IO | VM_DONTCOPY | VM_DONTEXPAND | VM_NORESERVE | VM_DONTDUMP | VM_PFNMAP; @@ -275,6 +277,13 @@ uint64_t kfd_get_number_elems(struct kfd_dev *kfd) phys_addr_t kfd_get_process_doorbells(struct kfd_process_device *pdd) { + if (!pdd->doorbell_index) { + int r = kfd_alloc_process_doorbells(pdd->dev, + &pdd->doorbell_index); + if (r) + return 0; + } + return pdd->dev->doorbell_base + pdd->doorbell_index * kfd_doorbell_process_slice(pdd->dev); } diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c b/drivers/gpu/drm/amd/amdkfd/kfd_process.c index 6c83a519b3a1..951b63677248 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c @@ -1499,11 +1499,6 @@ struct kfd_process_device *kfd_create_process_device_data(struct kfd_dev *dev, if (!pdd) return NULL; - if (kfd_alloc_process_doorbells(dev, &pdd->doorbell_index) < 0) { - pr_err("Failed to alloc doorbell for pdd\n"); - goto err_free_pdd; - } - if (init_doorbell_bitmap(&pdd->qpd, dev)) { pr_err("Failed to init doorbell for process\n"); goto err_free_pdd; -- GitLab