提交 ee1782c3 编写于 作者: C Christian König 提交者: Alex Deucher

drm/amdgpu: keep the PTs validation list in the VM v2

This avoids allocating it on the fly.

v2: fix grammar in comment
Signed-off-by: NChristian König <christian.koenig@amd.com>
Reviewed-by: NAlex Deucher <alexander.deucher@amd.com>
Reviewed-by: NChunming Zhou <david1.zhou@amd.com>
上级 56467ebf
...@@ -917,8 +917,8 @@ struct amdgpu_ring { ...@@ -917,8 +917,8 @@ struct amdgpu_ring {
#define AMDGPU_VM_FAULT_STOP_ALWAYS 2 #define AMDGPU_VM_FAULT_STOP_ALWAYS 2
struct amdgpu_vm_pt { struct amdgpu_vm_pt {
struct amdgpu_bo *bo; struct amdgpu_bo_list_entry entry;
uint64_t addr; uint64_t addr;
}; };
struct amdgpu_vm_id { struct amdgpu_vm_id {
...@@ -983,8 +983,7 @@ void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm); ...@@ -983,8 +983,7 @@ void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm);
void amdgpu_vm_get_pd_bo(struct amdgpu_vm *vm, void amdgpu_vm_get_pd_bo(struct amdgpu_vm *vm,
struct list_head *validated, struct list_head *validated,
struct amdgpu_bo_list_entry *entry); struct amdgpu_bo_list_entry *entry);
struct amdgpu_bo_list_entry *amdgpu_vm_get_pt_bos(struct amdgpu_vm *vm, void amdgpu_vm_get_pt_bos(struct amdgpu_vm *vm, struct list_head *duplicates);
struct list_head *duplicates);
int amdgpu_vm_grab_id(struct amdgpu_vm *vm, struct amdgpu_ring *ring, int amdgpu_vm_grab_id(struct amdgpu_vm *vm, struct amdgpu_ring *ring,
struct amdgpu_sync *sync); struct amdgpu_sync *sync);
void amdgpu_vm_flush(struct amdgpu_ring *ring, void amdgpu_vm_flush(struct amdgpu_ring *ring,
...@@ -1255,7 +1254,6 @@ struct amdgpu_cs_parser { ...@@ -1255,7 +1254,6 @@ struct amdgpu_cs_parser {
struct amdgpu_cs_chunk *chunks; struct amdgpu_cs_chunk *chunks;
/* relocations */ /* relocations */
struct amdgpu_bo_list_entry vm_pd; struct amdgpu_bo_list_entry vm_pd;
struct amdgpu_bo_list_entry *vm_bos;
struct list_head validated; struct list_head validated;
struct fence *fence; struct fence *fence;
......
...@@ -396,11 +396,7 @@ static int amdgpu_cs_parser_relocs(struct amdgpu_cs_parser *p) ...@@ -396,11 +396,7 @@ static int amdgpu_cs_parser_relocs(struct amdgpu_cs_parser *p)
if (unlikely(r != 0)) if (unlikely(r != 0))
goto error_reserve; goto error_reserve;
p->vm_bos = amdgpu_vm_get_pt_bos(&fpriv->vm, &duplicates); amdgpu_vm_get_pt_bos(&fpriv->vm, &duplicates);
if (!p->vm_bos) {
r = -ENOMEM;
goto error_validate;
}
r = amdgpu_cs_list_validate(p->adev, &fpriv->vm, &p->validated); r = amdgpu_cs_list_validate(p->adev, &fpriv->vm, &p->validated);
if (r) if (r)
...@@ -483,7 +479,6 @@ static void amdgpu_cs_parser_fini(struct amdgpu_cs_parser *parser, int error, bo ...@@ -483,7 +479,6 @@ static void amdgpu_cs_parser_fini(struct amdgpu_cs_parser *parser, int error, bo
if (parser->bo_list) if (parser->bo_list)
amdgpu_bo_list_put(parser->bo_list); amdgpu_bo_list_put(parser->bo_list);
drm_free_large(parser->vm_bos);
for (i = 0; i < parser->nchunks; i++) for (i = 0; i < parser->nchunks; i++)
drm_free_large(parser->chunks[i].kdata); drm_free_large(parser->chunks[i].kdata);
kfree(parser->chunks); kfree(parser->chunks);
......
...@@ -447,7 +447,6 @@ static void amdgpu_gem_va_update_vm(struct amdgpu_device *adev, ...@@ -447,7 +447,6 @@ static void amdgpu_gem_va_update_vm(struct amdgpu_device *adev,
struct amdgpu_bo_va *bo_va, uint32_t operation) struct amdgpu_bo_va *bo_va, uint32_t operation)
{ {
struct ttm_validate_buffer tv, *entry; struct ttm_validate_buffer tv, *entry;
struct amdgpu_bo_list_entry *vm_bos;
struct amdgpu_bo_list_entry vm_pd; struct amdgpu_bo_list_entry vm_pd;
struct ww_acquire_ctx ticket; struct ww_acquire_ctx ticket;
struct list_head list, duplicates; struct list_head list, duplicates;
...@@ -468,12 +467,7 @@ static void amdgpu_gem_va_update_vm(struct amdgpu_device *adev, ...@@ -468,12 +467,7 @@ static void amdgpu_gem_va_update_vm(struct amdgpu_device *adev,
if (r) if (r)
goto error_print; goto error_print;
vm_bos = amdgpu_vm_get_pt_bos(bo_va->vm, &duplicates); amdgpu_vm_get_pt_bos(bo_va->vm, &duplicates);
if (!vm_bos) {
r = -ENOMEM;
goto error_unreserve;
}
list_for_each_entry(entry, &list, head) { list_for_each_entry(entry, &list, head) {
domain = amdgpu_mem_type_to_domain(entry->bo->mem.mem_type); domain = amdgpu_mem_type_to_domain(entry->bo->mem.mem_type);
/* if anything is swapped out don't swap it in here, /* if anything is swapped out don't swap it in here,
...@@ -494,7 +488,6 @@ static void amdgpu_gem_va_update_vm(struct amdgpu_device *adev, ...@@ -494,7 +488,6 @@ static void amdgpu_gem_va_update_vm(struct amdgpu_device *adev,
error_unreserve: error_unreserve:
ttm_eu_backoff_reservation(&ticket, &list); ttm_eu_backoff_reservation(&ticket, &list);
drm_free_large(vm_bos);
error_print: error_print:
if (r && r != -ERESTARTSYS) if (r && r != -ERESTARTSYS)
......
...@@ -98,40 +98,27 @@ void amdgpu_vm_get_pd_bo(struct amdgpu_vm *vm, ...@@ -98,40 +98,27 @@ void amdgpu_vm_get_pd_bo(struct amdgpu_vm *vm,
} }
/** /**
* amdgpu_vm_get_bos - add the vm BOs to a validation list * amdgpu_vm_get_bos - add the vm BOs to a duplicates list
* *
* @vm: vm providing the BOs * @vm: vm providing the BOs
* @duplicates: head of duplicates list * @duplicates: head of duplicates list
* *
* Add the page directory to the list of BOs to * Add the page directory to the BO duplicates list
* validate for command submission (cayman+). * for command submission.
*/ */
struct amdgpu_bo_list_entry *amdgpu_vm_get_pt_bos(struct amdgpu_vm *vm, void amdgpu_vm_get_pt_bos(struct amdgpu_vm *vm, struct list_head *duplicates)
struct list_head *duplicates)
{ {
struct amdgpu_bo_list_entry *list; unsigned i;
unsigned i, idx;
list = drm_malloc_ab(vm->max_pde_used + 1,
sizeof(struct amdgpu_bo_list_entry));
if (!list)
return NULL;
/* add the vm page table to the list */ /* add the vm page table to the list */
for (i = 0, idx = 0; i <= vm->max_pde_used; i++) { for (i = 0; i <= vm->max_pde_used; ++i) {
if (!vm->page_tables[i].bo) struct amdgpu_bo_list_entry *entry = &vm->page_tables[i].entry;
if (!entry->robj)
continue; continue;
list[idx].robj = vm->page_tables[i].bo; list_add(&entry->tv.head, duplicates);
list[idx].prefered_domains = AMDGPU_GEM_DOMAIN_VRAM;
list[idx].allowed_domains = AMDGPU_GEM_DOMAIN_VRAM;
list[idx].priority = 0;
list[idx].tv.bo = &list[idx].robj->tbo;
list[idx].tv.shared = true;
list_add(&list[idx++].tv.head, duplicates);
} }
return list;
} }
/** /**
...@@ -474,7 +461,7 @@ int amdgpu_vm_update_page_directory(struct amdgpu_device *adev, ...@@ -474,7 +461,7 @@ int amdgpu_vm_update_page_directory(struct amdgpu_device *adev,
/* walk over the address space and update the page directory */ /* walk over the address space and update the page directory */
for (pt_idx = 0; pt_idx <= vm->max_pde_used; ++pt_idx) { for (pt_idx = 0; pt_idx <= vm->max_pde_used; ++pt_idx) {
struct amdgpu_bo *bo = vm->page_tables[pt_idx].bo; struct amdgpu_bo *bo = vm->page_tables[pt_idx].entry.robj;
uint64_t pde, pt; uint64_t pde, pt;
if (bo == NULL) if (bo == NULL)
...@@ -651,7 +638,7 @@ static int amdgpu_vm_update_ptes(struct amdgpu_device *adev, ...@@ -651,7 +638,7 @@ static int amdgpu_vm_update_ptes(struct amdgpu_device *adev,
/* walk over the address space and update the page tables */ /* walk over the address space and update the page tables */
for (addr = start; addr < end; ) { for (addr = start; addr < end; ) {
uint64_t pt_idx = addr >> amdgpu_vm_block_size; uint64_t pt_idx = addr >> amdgpu_vm_block_size;
struct amdgpu_bo *pt = vm->page_tables[pt_idx].bo; struct amdgpu_bo *pt = vm->page_tables[pt_idx].entry.robj;
unsigned nptes; unsigned nptes;
uint64_t pte; uint64_t pte;
int r; int r;
...@@ -1083,9 +1070,11 @@ int amdgpu_vm_bo_map(struct amdgpu_device *adev, ...@@ -1083,9 +1070,11 @@ int amdgpu_vm_bo_map(struct amdgpu_device *adev,
/* walk over the address space and allocate the page tables */ /* walk over the address space and allocate the page tables */
for (pt_idx = saddr; pt_idx <= eaddr; ++pt_idx) { for (pt_idx = saddr; pt_idx <= eaddr; ++pt_idx) {
struct reservation_object *resv = vm->page_directory->tbo.resv; struct reservation_object *resv = vm->page_directory->tbo.resv;
struct amdgpu_bo_list_entry *entry;
struct amdgpu_bo *pt; struct amdgpu_bo *pt;
if (vm->page_tables[pt_idx].bo) entry = &vm->page_tables[pt_idx].entry;
if (entry->robj)
continue; continue;
r = amdgpu_bo_create(adev, AMDGPU_VM_PTE_COUNT * 8, r = amdgpu_bo_create(adev, AMDGPU_VM_PTE_COUNT * 8,
...@@ -1102,8 +1091,13 @@ int amdgpu_vm_bo_map(struct amdgpu_device *adev, ...@@ -1102,8 +1091,13 @@ int amdgpu_vm_bo_map(struct amdgpu_device *adev,
goto error_free; goto error_free;
} }
entry->robj = pt;
entry->prefered_domains = AMDGPU_GEM_DOMAIN_VRAM;
entry->allowed_domains = AMDGPU_GEM_DOMAIN_VRAM;
entry->priority = 0;
entry->tv.bo = &entry->robj->tbo;
entry->tv.shared = true;
vm->page_tables[pt_idx].addr = 0; vm->page_tables[pt_idx].addr = 0;
vm->page_tables[pt_idx].bo = pt;
} }
return 0; return 0;
...@@ -1334,7 +1328,7 @@ void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm) ...@@ -1334,7 +1328,7 @@ void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm)
} }
for (i = 0; i < amdgpu_vm_num_pdes(adev); i++) for (i = 0; i < amdgpu_vm_num_pdes(adev); i++)
amdgpu_bo_unref(&vm->page_tables[i].bo); amdgpu_bo_unref(&vm->page_tables[i].entry.robj);
kfree(vm->page_tables); kfree(vm->page_tables);
amdgpu_bo_unref(&vm->page_directory); amdgpu_bo_unref(&vm->page_directory);
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册