diff --git a/drivers/gpu/drm/scheduler/gpu_scheduler.c b/drivers/gpu/drm/scheduler/gpu_scheduler.c index 1b733229201edfd4578ae58fae4552b1b3a36f6a..a70c7f7fd6fece9da31c3e138a2b3d0eba5ffe9f 100644 --- a/drivers/gpu/drm/scheduler/gpu_scheduler.c +++ b/drivers/gpu/drm/scheduler/gpu_scheduler.c @@ -552,24 +552,28 @@ static void drm_sched_job_finish(struct work_struct *work) finish_work); struct drm_gpu_scheduler *sched = s_job->sched; - /* remove job from ring_mirror_list */ - spin_lock(&sched->job_list_lock); - list_del_init(&s_job->node); - if (sched->timeout != MAX_SCHEDULE_TIMEOUT) { - struct drm_sched_job *next; - - spin_unlock(&sched->job_list_lock); - cancel_delayed_work_sync(&s_job->work_tdr); - spin_lock(&sched->job_list_lock); + /* + * Canceling the timeout without removing our job from the ring mirror + * list is safe, as we will only end up in this worker if our jobs + * finished fence has been signaled. So even if some another worker + * manages to find this job as the next job in the list, the fence + * signaled check below will prevent the timeout to be restarted. + */ + cancel_delayed_work_sync(&s_job->work_tdr); - /* queue TDR for next job */ - next = list_first_entry_or_null(&sched->ring_mirror_list, - struct drm_sched_job, node); + spin_lock(&sched->job_list_lock); + /* queue TDR for next job */ + if (sched->timeout != MAX_SCHEDULE_TIMEOUT && + !list_is_last(&s_job->node, &sched->ring_mirror_list)) { + struct drm_sched_job *next = list_next_entry(s_job, node); - if (next) + if (!dma_fence_is_signaled(&next->s_fence->finished)) schedule_delayed_work(&next->work_tdr, sched->timeout); } + /* remove job from ring_mirror_list */ + list_del(&s_job->node); spin_unlock(&sched->job_list_lock); + dma_fence_put(&s_job->s_fence->finished); sched->ops->free_job(s_job); }