提交 30514dec 编写于 作者: C Chunming Zhou 提交者: Alex Deucher

drm/amdgpu: fix dependency issue

The problem is that executing the jobs in the right order doesn't give you the right result
because consecutive jobs executed on the same engine are pipelined.
In other words job B does it buffer read before job A has written it's result.
Signed-off-by: NChunming Zhou <David1.Zhou@amd.com>
Reviewed-by: NChristian König <christian.koenig@amd.com>
Signed-off-by: NAlex Deucher <alexander.deucher@amd.com>
上级 cb3696fd
...@@ -1129,6 +1129,7 @@ struct amdgpu_job { ...@@ -1129,6 +1129,7 @@ struct amdgpu_job {
void *owner; void *owner;
uint64_t fence_ctx; /* the fence_context this job uses */ uint64_t fence_ctx; /* the fence_context this job uses */
bool vm_needs_flush; bool vm_needs_flush;
bool need_pipeline_sync;
unsigned vm_id; unsigned vm_id;
uint64_t vm_pd_addr; uint64_t vm_pd_addr;
uint32_t gds_base, gds_size; uint32_t gds_base, gds_size;
......
...@@ -160,6 +160,8 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs, ...@@ -160,6 +160,8 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs,
dev_err(adev->dev, "scheduling IB failed (%d).\n", r); dev_err(adev->dev, "scheduling IB failed (%d).\n", r);
return r; return r;
} }
if (ring->funcs->emit_pipeline_sync && job && job->need_pipeline_sync)
amdgpu_ring_emit_pipeline_sync(ring);
if (vm) { if (vm) {
r = amdgpu_vm_flush(ring, job); r = amdgpu_vm_flush(ring, job);
......
...@@ -57,6 +57,7 @@ int amdgpu_job_alloc(struct amdgpu_device *adev, unsigned num_ibs, ...@@ -57,6 +57,7 @@ int amdgpu_job_alloc(struct amdgpu_device *adev, unsigned num_ibs,
(*job)->vm = vm; (*job)->vm = vm;
(*job)->ibs = (void *)&(*job)[1]; (*job)->ibs = (void *)&(*job)[1];
(*job)->num_ibs = num_ibs; (*job)->num_ibs = num_ibs;
(*job)->need_pipeline_sync = false;
amdgpu_sync_create(&(*job)->sync); amdgpu_sync_create(&(*job)->sync);
...@@ -152,6 +153,9 @@ static struct dma_fence *amdgpu_job_dependency(struct amd_sched_job *sched_job) ...@@ -152,6 +153,9 @@ static struct dma_fence *amdgpu_job_dependency(struct amd_sched_job *sched_job)
fence = amdgpu_sync_get_fence(&job->sync); fence = amdgpu_sync_get_fence(&job->sync);
} }
if (amd_sched_dependency_optimized(fence, sched_job->s_entity))
job->need_pipeline_sync = true;
return fence; return fence;
} }
......
...@@ -614,7 +614,7 @@ int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job) ...@@ -614,7 +614,7 @@ int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job)
if (ring->funcs->init_cond_exec) if (ring->funcs->init_cond_exec)
patch_offset = amdgpu_ring_init_cond_exec(ring); patch_offset = amdgpu_ring_init_cond_exec(ring);
if (ring->funcs->emit_pipeline_sync) if (ring->funcs->emit_pipeline_sync && !job->need_pipeline_sync)
amdgpu_ring_emit_pipeline_sync(ring); amdgpu_ring_emit_pipeline_sync(ring);
if (ring->funcs->emit_vm_flush && vm_flush_needed) { if (ring->funcs->emit_vm_flush && vm_flush_needed) {
......
...@@ -236,6 +236,23 @@ static void amd_sched_entity_clear_dep(struct dma_fence *f, struct dma_fence_cb ...@@ -236,6 +236,23 @@ static void amd_sched_entity_clear_dep(struct dma_fence *f, struct dma_fence_cb
dma_fence_put(f); dma_fence_put(f);
} }
bool amd_sched_dependency_optimized(struct dma_fence* fence,
struct amd_sched_entity *entity)
{
struct amd_gpu_scheduler *sched = entity->sched;
struct amd_sched_fence *s_fence;
if (!fence || dma_fence_is_signaled(fence))
return false;
if (fence->context == entity->fence_context)
return true;
s_fence = to_amd_sched_fence(fence);
if (s_fence && s_fence->sched == sched)
return true;
return false;
}
static bool amd_sched_entity_add_dependency_cb(struct amd_sched_entity *entity) static bool amd_sched_entity_add_dependency_cb(struct amd_sched_entity *entity)
{ {
struct amd_gpu_scheduler *sched = entity->sched; struct amd_gpu_scheduler *sched = entity->sched;
......
...@@ -158,4 +158,6 @@ int amd_sched_job_init(struct amd_sched_job *job, ...@@ -158,4 +158,6 @@ int amd_sched_job_init(struct amd_sched_job *job,
void *owner); void *owner);
void amd_sched_hw_job_reset(struct amd_gpu_scheduler *sched); void amd_sched_hw_job_reset(struct amd_gpu_scheduler *sched);
void amd_sched_job_recovery(struct amd_gpu_scheduler *sched); void amd_sched_job_recovery(struct amd_gpu_scheduler *sched);
bool amd_sched_dependency_optimized(struct dma_fence* fence,
struct amd_sched_entity *entity);
#endif #endif
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册