提交 738f64cc 编写于 作者: R Roger.He 提交者: Alex Deucher

drm/amdgpu: extend lock range for race condition when gpu reset

to cover below case:
1. A task gart bind/unbind but not add to adev->gtt_list yet
2. at this time gpu reset, gtt only recover those gtt in adev->gtt_list
Reviewed-by: NChunming Zhou <david1.zhou@amd.com>
Reviewed-by: NChristian König <christian.koenig@amd.com>
Signed-off-by: NRoger.He <Hongbo.He@amd.com>
Signed-off-by: NAlex Deucher <alexander.deucher@amd.com>
上级 455a7bc2
...@@ -554,7 +554,7 @@ int amdgpu_gart_table_vram_pin(struct amdgpu_device *adev); ...@@ -554,7 +554,7 @@ int amdgpu_gart_table_vram_pin(struct amdgpu_device *adev);
void amdgpu_gart_table_vram_unpin(struct amdgpu_device *adev); void amdgpu_gart_table_vram_unpin(struct amdgpu_device *adev);
int amdgpu_gart_init(struct amdgpu_device *adev); int amdgpu_gart_init(struct amdgpu_device *adev);
void amdgpu_gart_fini(struct amdgpu_device *adev); void amdgpu_gart_fini(struct amdgpu_device *adev);
void amdgpu_gart_unbind(struct amdgpu_device *adev, uint64_t offset, int amdgpu_gart_unbind(struct amdgpu_device *adev, uint64_t offset,
int pages); int pages);
int amdgpu_gart_bind(struct amdgpu_device *adev, uint64_t offset, int amdgpu_gart_bind(struct amdgpu_device *adev, uint64_t offset,
int pages, struct page **pagelist, int pages, struct page **pagelist,
......
...@@ -224,8 +224,9 @@ void amdgpu_gart_table_vram_free(struct amdgpu_device *adev) ...@@ -224,8 +224,9 @@ void amdgpu_gart_table_vram_free(struct amdgpu_device *adev)
* *
* Unbinds the requested pages from the gart page table and * Unbinds the requested pages from the gart page table and
* replaces them with the dummy page (all asics). * replaces them with the dummy page (all asics).
* Returns 0 for success, -EINVAL for failure.
*/ */
void amdgpu_gart_unbind(struct amdgpu_device *adev, uint64_t offset, int amdgpu_gart_unbind(struct amdgpu_device *adev, uint64_t offset,
int pages) int pages)
{ {
unsigned t; unsigned t;
...@@ -237,7 +238,7 @@ void amdgpu_gart_unbind(struct amdgpu_device *adev, uint64_t offset, ...@@ -237,7 +238,7 @@ void amdgpu_gart_unbind(struct amdgpu_device *adev, uint64_t offset,
if (!adev->gart.ready) { if (!adev->gart.ready) {
WARN(1, "trying to unbind memory from uninitialized GART !\n"); WARN(1, "trying to unbind memory from uninitialized GART !\n");
return; return -EINVAL;
} }
t = offset / AMDGPU_GPU_PAGE_SIZE; t = offset / AMDGPU_GPU_PAGE_SIZE;
...@@ -258,6 +259,7 @@ void amdgpu_gart_unbind(struct amdgpu_device *adev, uint64_t offset, ...@@ -258,6 +259,7 @@ void amdgpu_gart_unbind(struct amdgpu_device *adev, uint64_t offset,
} }
mb(); mb();
amdgpu_gart_flush_gpu_tlb(adev, 0); amdgpu_gart_flush_gpu_tlb(adev, 0);
return 0;
} }
/** /**
......
...@@ -745,6 +745,7 @@ int amdgpu_ttm_bind(struct ttm_buffer_object *bo, struct ttm_mem_reg *bo_mem) ...@@ -745,6 +745,7 @@ int amdgpu_ttm_bind(struct ttm_buffer_object *bo, struct ttm_mem_reg *bo_mem)
return r; return r;
} }
spin_lock(&gtt->adev->gtt_list_lock);
flags = amdgpu_ttm_tt_pte_flags(gtt->adev, ttm, bo_mem); flags = amdgpu_ttm_tt_pte_flags(gtt->adev, ttm, bo_mem);
gtt->offset = (u64)bo_mem->start << PAGE_SHIFT; gtt->offset = (u64)bo_mem->start << PAGE_SHIFT;
r = amdgpu_gart_bind(gtt->adev, gtt->offset, ttm->num_pages, r = amdgpu_gart_bind(gtt->adev, gtt->offset, ttm->num_pages,
...@@ -753,12 +754,13 @@ int amdgpu_ttm_bind(struct ttm_buffer_object *bo, struct ttm_mem_reg *bo_mem) ...@@ -753,12 +754,13 @@ int amdgpu_ttm_bind(struct ttm_buffer_object *bo, struct ttm_mem_reg *bo_mem)
if (r) { if (r) {
DRM_ERROR("failed to bind %lu pages at 0x%08llX\n", DRM_ERROR("failed to bind %lu pages at 0x%08llX\n",
ttm->num_pages, gtt->offset); ttm->num_pages, gtt->offset);
return r; goto error_gart_bind;
} }
spin_lock(&gtt->adev->gtt_list_lock);
list_add_tail(&gtt->list, &gtt->adev->gtt_list); list_add_tail(&gtt->list, &gtt->adev->gtt_list);
error_gart_bind:
spin_unlock(&gtt->adev->gtt_list_lock); spin_unlock(&gtt->adev->gtt_list_lock);
return 0; return r;
} }
int amdgpu_ttm_recover_gart(struct amdgpu_device *adev) int amdgpu_ttm_recover_gart(struct amdgpu_device *adev)
...@@ -789,6 +791,7 @@ int amdgpu_ttm_recover_gart(struct amdgpu_device *adev) ...@@ -789,6 +791,7 @@ int amdgpu_ttm_recover_gart(struct amdgpu_device *adev)
static int amdgpu_ttm_backend_unbind(struct ttm_tt *ttm) static int amdgpu_ttm_backend_unbind(struct ttm_tt *ttm)
{ {
struct amdgpu_ttm_tt *gtt = (void *)ttm; struct amdgpu_ttm_tt *gtt = (void *)ttm;
int r;
if (gtt->userptr) if (gtt->userptr)
amdgpu_ttm_tt_unpin_userptr(ttm); amdgpu_ttm_tt_unpin_userptr(ttm);
...@@ -797,14 +800,17 @@ static int amdgpu_ttm_backend_unbind(struct ttm_tt *ttm) ...@@ -797,14 +800,17 @@ static int amdgpu_ttm_backend_unbind(struct ttm_tt *ttm)
return 0; return 0;
/* unbind shouldn't be done for GDS/GWS/OA in ttm_bo_clean_mm */ /* unbind shouldn't be done for GDS/GWS/OA in ttm_bo_clean_mm */
if (gtt->adev->gart.ready)
amdgpu_gart_unbind(gtt->adev, gtt->offset, ttm->num_pages);
spin_lock(&gtt->adev->gtt_list_lock); spin_lock(&gtt->adev->gtt_list_lock);
r = amdgpu_gart_unbind(gtt->adev, gtt->offset, ttm->num_pages);
if (r) {
DRM_ERROR("failed to unbind %lu pages at 0x%08llX\n",
gtt->ttm.ttm.num_pages, gtt->offset);
goto error_unbind;
}
list_del_init(&gtt->list); list_del_init(&gtt->list);
error_unbind:
spin_unlock(&gtt->adev->gtt_list_lock); spin_unlock(&gtt->adev->gtt_list_lock);
return r;
return 0;
} }
static void amdgpu_ttm_backend_destroy(struct ttm_tt *ttm) static void amdgpu_ttm_backend_destroy(struct ttm_tt *ttm)
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册