提交 59d7115d 编写于 作者: M Mukul Joshi 提交者: Alex Deucher

drm/amdkfd: Move process doorbell allocation into kfd device

Move doorbell allocation for a process into kfd device and
allocate doorbell space in each PDD during process creation.
Currently, KFD manages its own doorbell space but for some
devices, amdgpu would allocate the complete doorbell
space instead of leaving a chunk of doorbell space for KFD to
manage. In a system with mix of such devices, KFD would need
to request process doorbell space based on the type of device,
either from amdgpu or from its own doorbell space.
Signed-off-by: NMukul Joshi <mukul.joshi@amd.com>
Reviewed-by: NFelix Kuehling <Felix.Kuehling@amd.com>
Signed-off-by: NAlex Deucher <alexander.deucher@amd.com>
上级 b7b6c385
......@@ -1291,18 +1291,6 @@ static int kfd_ioctl_alloc_memory_of_gpu(struct file *filep,
return -EINVAL;
}
if (flags & KFD_IOC_ALLOC_MEM_FLAGS_DOORBELL) {
if (args->size != kfd_doorbell_process_slice(dev))
return -EINVAL;
offset = kfd_get_process_doorbells(dev, p);
} else if (flags & KFD_IOC_ALLOC_MEM_FLAGS_MMIO_REMAP) {
if (args->size != PAGE_SIZE)
return -EINVAL;
offset = amdgpu_amdkfd_get_mmio_remap_phys_addr(dev->kgd);
if (!offset)
return -ENOMEM;
}
mutex_lock(&p->mutex);
pdd = kfd_bind_process_to_device(dev, p);
......@@ -1311,6 +1299,24 @@ static int kfd_ioctl_alloc_memory_of_gpu(struct file *filep,
goto err_unlock;
}
if (flags & KFD_IOC_ALLOC_MEM_FLAGS_DOORBELL) {
if (args->size != kfd_doorbell_process_slice(dev)) {
err = -EINVAL;
goto err_unlock;
}
offset = kfd_get_process_doorbells(pdd);
} else if (flags & KFD_IOC_ALLOC_MEM_FLAGS_MMIO_REMAP) {
if (args->size != PAGE_SIZE) {
err = -EINVAL;
goto err_unlock;
}
offset = amdgpu_amdkfd_get_mmio_remap_phys_addr(dev->kgd);
if (!offset) {
err = -ENOMEM;
goto err_unlock;
}
}
err = amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(
dev->kgd, args->va_addr, args->size,
pdd->vm, (struct kgd_mem **) &mem, &offset,
......
......@@ -583,6 +583,8 @@ struct kfd_dev *kgd2kfd_probe(struct kgd_dev *kgd,
atomic_set(&kfd->sram_ecc_flag, 0);
ida_init(&kfd->doorbell_ida);
return kfd;
}
......@@ -798,6 +800,7 @@ void kgd2kfd_device_exit(struct kfd_dev *kfd)
kfd_interrupt_exit(kfd);
kfd_topology_remove_device(kfd);
kfd_doorbell_fini(kfd);
ida_destroy(&kfd->doorbell_ida);
kfd_gtt_sa_fini(kfd);
amdgpu_amdkfd_free_gtt_mem(kfd->kgd, kfd->gtt_mem);
if (kfd->gws)
......
......@@ -191,9 +191,8 @@ static int allocate_doorbell(struct qcm_process_device *qpd, struct queue *q)
}
q->properties.doorbell_off =
kfd_get_doorbell_dw_offset_in_bar(dev, q->process,
kfd_get_doorbell_dw_offset_in_bar(dev, qpd_to_pdd(qpd),
q->doorbell_id);
return 0;
}
......
......@@ -31,9 +31,6 @@
* kernel queues using the first doorbell page reserved for the kernel.
*/
static DEFINE_IDA(doorbell_ida);
static unsigned int max_doorbell_slices;
/*
* Each device exposes a doorbell aperture, a PCI MMIO aperture that
* receives 32-bit writes that are passed to queues as wptr values.
......@@ -84,9 +81,9 @@ int kfd_doorbell_init(struct kfd_dev *kfd)
else
return -ENOSPC;
if (!max_doorbell_slices ||
doorbell_process_limit < max_doorbell_slices)
max_doorbell_slices = doorbell_process_limit;
if (!kfd->max_doorbell_slices ||
doorbell_process_limit < kfd->max_doorbell_slices)
kfd->max_doorbell_slices = doorbell_process_limit;
kfd->doorbell_base = kfd->shared_resources.doorbell_physical_address +
doorbell_start_offset;
......@@ -130,6 +127,7 @@ int kfd_doorbell_mmap(struct kfd_dev *dev, struct kfd_process *process,
struct vm_area_struct *vma)
{
phys_addr_t address;
struct kfd_process_device *pdd;
/*
* For simplicitly we only allow mapping of the entire doorbell
......@@ -138,9 +136,12 @@ int kfd_doorbell_mmap(struct kfd_dev *dev, struct kfd_process *process,
if (vma->vm_end - vma->vm_start != kfd_doorbell_process_slice(dev))
return -EINVAL;
/* Calculate physical address of doorbell */
address = kfd_get_process_doorbells(dev, process);
pdd = kfd_get_process_device_data(dev, process);
if (!pdd)
return -EINVAL;
/* Calculate physical address of doorbell */
address = kfd_get_process_doorbells(pdd);
vma->vm_flags |= VM_IO | VM_DONTCOPY | VM_DONTEXPAND | VM_NORESERVE |
VM_DONTDUMP | VM_PFNMAP;
......@@ -226,7 +227,7 @@ void write_kernel_doorbell64(void __iomem *db, u64 value)
}
unsigned int kfd_get_doorbell_dw_offset_in_bar(struct kfd_dev *kfd,
struct kfd_process *process,
struct kfd_process_device *pdd,
unsigned int doorbell_id)
{
/*
......@@ -236,7 +237,7 @@ unsigned int kfd_get_doorbell_dw_offset_in_bar(struct kfd_dev *kfd,
* units regardless of the ASIC-dependent doorbell size.
*/
return kfd->doorbell_base_dw_offset +
process->doorbell_index
pdd->doorbell_index
* kfd_doorbell_process_slice(kfd) / sizeof(u32) +
doorbell_id * kfd->device_info->doorbell_size / sizeof(u32);
}
......@@ -251,25 +252,24 @@ uint64_t kfd_get_number_elems(struct kfd_dev *kfd)
}
phys_addr_t kfd_get_process_doorbells(struct kfd_dev *dev,
struct kfd_process *process)
phys_addr_t kfd_get_process_doorbells(struct kfd_process_device *pdd)
{
return dev->doorbell_base +
process->doorbell_index * kfd_doorbell_process_slice(dev);
return pdd->dev->doorbell_base +
pdd->doorbell_index * kfd_doorbell_process_slice(pdd->dev);
}
int kfd_alloc_process_doorbells(struct kfd_process *process)
int kfd_alloc_process_doorbells(struct kfd_dev *kfd, unsigned int *doorbell_index)
{
int r = ida_simple_get(&doorbell_ida, 1, max_doorbell_slices,
int r = ida_simple_get(&kfd->doorbell_ida, 1, kfd->max_doorbell_slices,
GFP_KERNEL);
if (r > 0)
process->doorbell_index = r;
*doorbell_index = r;
return r;
}
void kfd_free_process_doorbells(struct kfd_process *process)
void kfd_free_process_doorbells(struct kfd_dev *kfd, unsigned int doorbell_index)
{
if (process->doorbell_index)
ida_simple_remove(&doorbell_ida, process->doorbell_index);
if (doorbell_index)
ida_simple_remove(&kfd->doorbell_ida, doorbell_index);
}
......@@ -314,6 +314,9 @@ struct kfd_dev {
spinlock_t smi_lock;
uint32_t reset_seq_num;
struct ida doorbell_ida;
unsigned int max_doorbell_slices;
};
enum kfd_mempool {
......@@ -699,6 +702,7 @@ struct kfd_process_device {
struct attribute attr_evict;
struct kobject *kobj_stats;
unsigned int doorbell_index;
};
#define qpd_to_pdd(x) container_of(x, struct kfd_process_device, qpd)
......@@ -736,7 +740,6 @@ struct kfd_process {
struct mmu_notifier mmu_notifier;
uint16_t pasid;
unsigned int doorbell_index;
/*
* List of kfd_process_device structures,
......@@ -869,13 +872,13 @@ u32 read_kernel_doorbell(u32 __iomem *db);
void write_kernel_doorbell(void __iomem *db, u32 value);
void write_kernel_doorbell64(void __iomem *db, u64 value);
unsigned int kfd_get_doorbell_dw_offset_in_bar(struct kfd_dev *kfd,
struct kfd_process *process,
struct kfd_process_device *pdd,
unsigned int doorbell_id);
phys_addr_t kfd_get_process_doorbells(struct kfd_dev *dev,
struct kfd_process *process);
int kfd_alloc_process_doorbells(struct kfd_process *process);
void kfd_free_process_doorbells(struct kfd_process *process);
phys_addr_t kfd_get_process_doorbells(struct kfd_process_device *pdd);
int kfd_alloc_process_doorbells(struct kfd_dev *kfd,
unsigned int *doorbell_index);
void kfd_free_process_doorbells(struct kfd_dev *kfd,
unsigned int doorbell_index);
/* GTT Sub-Allocator */
int kfd_gtt_sa_allocate(struct kfd_dev *kfd, unsigned int size,
......
......@@ -876,6 +876,8 @@ static void kfd_process_destroy_pdds(struct kfd_process *p)
kfree(pdd->qpd.doorbell_bitmap);
idr_destroy(&pdd->alloc_idr);
kfd_free_process_doorbells(pdd->dev, pdd->doorbell_index);
/*
* before destroying pdd, make sure to report availability
* for auto suspend
......@@ -932,8 +934,6 @@ static void kfd_process_wq_release(struct work_struct *work)
kfd_event_free_process(p);
kfd_pasid_free(p->pasid);
kfd_free_process_doorbells(p);
mutex_destroy(&p->mutex);
put_task_struct(p->lead_thread);
......@@ -1111,9 +1111,6 @@ static struct kfd_process *create_process(const struct task_struct *thread)
if (process->pasid == 0)
goto err_alloc_pasid;
if (kfd_alloc_process_doorbells(process) < 0)
goto err_alloc_doorbells;
err = pqm_init(&process->pqm, process);
if (err != 0)
goto err_process_pqm_init;
......@@ -1141,8 +1138,6 @@ static struct kfd_process *create_process(const struct task_struct *thread)
err_init_apertures:
pqm_uninit(&process->pqm);
err_process_pqm_init:
kfd_free_process_doorbells(process);
err_alloc_doorbells:
kfd_pasid_free(process->pasid);
err_alloc_pasid:
mutex_destroy(&process->mutex);
......@@ -1205,10 +1200,14 @@ struct kfd_process_device *kfd_create_process_device_data(struct kfd_dev *dev,
if (!pdd)
return NULL;
if (kfd_alloc_process_doorbells(dev, &pdd->doorbell_index) < 0) {
pr_err("Failed to alloc doorbell for pdd\n");
goto err_free_pdd;
}
if (init_doorbell_bitmap(&pdd->qpd, dev)) {
pr_err("Failed to init doorbell for process\n");
kfree(pdd);
return NULL;
goto err_free_pdd;
}
pdd->dev = dev;
......@@ -1231,6 +1230,10 @@ struct kfd_process_device *kfd_create_process_device_data(struct kfd_dev *dev,
idr_init(&pdd->alloc_idr);
return pdd;
err_free_pdd:
kfree(pdd);
return NULL;
}
/**
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册