diff --git a/drivers/gpu/drm/i915/i915_request.h b/drivers/gpu/drm/i915/i915_request.h index 74311fc53e2f9d373770a5aba344d622ad96ca32..7d6eb82eeb913636eb590e25121e752044f93fe2 100644 --- a/drivers/gpu/drm/i915/i915_request.h +++ b/drivers/gpu/drm/i915/i915_request.h @@ -44,8 +44,8 @@ struct intel_wait { }; struct intel_signal_node { - struct rb_node node; struct intel_wait wait; + struct list_head link; }; struct i915_dependency { diff --git a/drivers/gpu/drm/i915/intel_breadcrumbs.c b/drivers/gpu/drm/i915/intel_breadcrumbs.c index 094f010908b8b7e39eed52243a7b10b9ecff3e95..03bbc1dfbc51bd4a12e0c5dea4f8750c4f784c50 100644 --- a/drivers/gpu/drm/i915/intel_breadcrumbs.c +++ b/drivers/gpu/drm/i915/intel_breadcrumbs.c @@ -340,7 +340,8 @@ static inline void __intel_breadcrumbs_finish(struct intel_breadcrumbs *b, lockdep_assert_held(&b->rb_lock); GEM_BUG_ON(b->irq_wait == wait); - /* This request is completed, so remove it from the tree, mark it as + /* + * This request is completed, so remove it from the tree, mark it as * complete, and *then* wake up the associated task. N.B. when the * task wakes up, it will find the empty rb_node, discern that it * has already been removed from the tree and skip the serialisation @@ -351,7 +352,8 @@ static inline void __intel_breadcrumbs_finish(struct intel_breadcrumbs *b, rb_erase(&wait->node, &b->waiters); RB_CLEAR_NODE(&wait->node); - wake_up_process(wait->tsk); /* implicit smp_wmb() */ + if (wait->tsk->state != TASK_RUNNING) + wake_up_process(wait->tsk); /* implicit smp_wmb() */ } static inline void __intel_breadcrumbs_next(struct intel_engine_cs *engine, @@ -592,23 +594,6 @@ void intel_engine_remove_wait(struct intel_engine_cs *engine, spin_unlock_irq(&b->rb_lock); } -static bool signal_complete(const struct i915_request *request) -{ - if (!request) - return false; - - /* - * Carefully check if the request is complete, giving time for the - * seqno to be visible or if the GPU hung. - */ - return __i915_request_irq_complete(request); -} - -static struct i915_request *to_signaler(struct rb_node *rb) -{ - return rb_entry(rb, struct i915_request, signaling.node); -} - static void signaler_set_rtpriority(void) { struct sched_param param = { .sched_priority = 1 }; @@ -616,78 +601,26 @@ static void signaler_set_rtpriority(void) sched_setscheduler_nocheck(current, SCHED_FIFO, ¶m); } -static void __intel_engine_remove_signal(struct intel_engine_cs *engine, - struct i915_request *request) -{ - struct intel_breadcrumbs *b = &engine->breadcrumbs; - - lockdep_assert_held(&b->rb_lock); - - /* - * Wake up all other completed waiters and select the - * next bottom-half for the next user interrupt. - */ - __intel_engine_remove_wait(engine, &request->signaling.wait); - - /* - * Find the next oldest signal. Note that as we have - * not been holding the lock, another client may - * have installed an even older signal than the one - * we just completed - so double check we are still - * the oldest before picking the next one. - */ - if (request->signaling.wait.seqno) { - if (request == rcu_access_pointer(b->first_signal)) { - struct rb_node *rb = rb_next(&request->signaling.node); - rcu_assign_pointer(b->first_signal, - rb ? to_signaler(rb) : NULL); - } - - rb_erase(&request->signaling.node, &b->signals); - request->signaling.wait.seqno = 0; - } -} - -static struct i915_request * -get_first_signal_rcu(struct intel_breadcrumbs *b) -{ - /* - * See the big warnings for i915_gem_active_get_rcu() and similarly - * for dma_fence_get_rcu_safe() that explain the intricacies involved - * here with defeating CPU/compiler speculation and enforcing - * the required memory barriers. - */ - do { - struct i915_request *request; - - request = rcu_dereference(b->first_signal); - if (request) - request = i915_request_get_rcu(request); - - barrier(); - - if (!request || request == rcu_access_pointer(b->first_signal)) - return rcu_pointer_handoff(request); - - i915_request_put(request); - } while (1); -} - static int intel_breadcrumbs_signaler(void *arg) { struct intel_engine_cs *engine = arg; struct intel_breadcrumbs *b = &engine->breadcrumbs; - struct i915_request *request; + struct i915_request *rq, *n; /* Install ourselves with high priority to reduce signalling latency */ signaler_set_rtpriority(); do { bool do_schedule = true; + LIST_HEAD(list); + u32 seqno; set_current_state(TASK_INTERRUPTIBLE); + if (list_empty(&b->signals)) + goto sleep; - /* We are either woken up by the interrupt bottom-half, + /* + * We are either woken up by the interrupt bottom-half, * or by a client adding a new signaller. In both cases, * the GPU seqno may have advanced beyond our oldest signal. * If it has, propagate the signal, remove the waiter and @@ -695,25 +628,45 @@ static int intel_breadcrumbs_signaler(void *arg) * need to wait for a new interrupt from the GPU or for * a new client. */ - rcu_read_lock(); - request = get_first_signal_rcu(b); - rcu_read_unlock(); - if (signal_complete(request)) { - if (!test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, - &request->fence.flags)) { - local_bh_disable(); - dma_fence_signal(&request->fence); - GEM_BUG_ON(!i915_request_completed(request)); - local_bh_enable(); /* kick start the tasklets */ - } + seqno = intel_engine_get_seqno(engine); + + spin_lock_irq(&b->rb_lock); + list_for_each_entry_safe(rq, n, &b->signals, signaling.link) { + u32 this = rq->signaling.wait.seqno; + + GEM_BUG_ON(!rq->signaling.wait.seqno); - if (READ_ONCE(request->signaling.wait.seqno)) { - spin_lock_irq(&b->rb_lock); - __intel_engine_remove_signal(engine, request); - spin_unlock_irq(&b->rb_lock); + if (!i915_seqno_passed(seqno, this)) + break; + + if (likely(this == i915_request_global_seqno(rq))) { + __intel_engine_remove_wait(engine, + &rq->signaling.wait); + + rq->signaling.wait.seqno = 0; + __list_del_entry(&rq->signaling.link); + + if (!test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, + &rq->fence.flags)) { + list_add_tail(&rq->signaling.link, + &list); + i915_request_get(rq); + } + } + } + spin_unlock_irq(&b->rb_lock); + + if (!list_empty(&list)) { + local_bh_disable(); + list_for_each_entry_safe(rq, n, &list, signaling.link) { + dma_fence_signal(&rq->fence); + GEM_BUG_ON(!i915_request_completed(rq)); + i915_request_put(rq); } + local_bh_enable(); /* kick start the tasklets */ - /* If the engine is saturated we may be continually + /* + * If the engine is saturated we may be continually * processing completed requests. This angers the * NMI watchdog if we never let anything else * have access to the CPU. Let's pretend to be nice @@ -722,9 +675,19 @@ static int intel_breadcrumbs_signaler(void *arg) */ do_schedule = need_resched(); } - i915_request_put(request); if (unlikely(do_schedule)) { + /* Before we sleep, check for a missed seqno */ + if (current->state & TASK_NORMAL && + !list_empty(&b->signals) && + engine->irq_seqno_barrier && + test_and_clear_bit(ENGINE_IRQ_BREADCRUMB, + &engine->irq_posted)) { + engine->irq_seqno_barrier(engine); + intel_engine_wakeup(engine); + } + +sleep: if (kthread_should_park()) kthread_parkme(); @@ -739,13 +702,40 @@ static int intel_breadcrumbs_signaler(void *arg) return 0; } +static void insert_signal(struct intel_breadcrumbs *b, + struct i915_request *request, + const u32 seqno) +{ + struct i915_request *iter; + + lockdep_assert_held(&b->rb_lock); + + /* + * A reasonable assumption is that we are called to add signals + * in sequence, as the requests are submitted for execution and + * assigned a global_seqno. This will be the case for the majority + * of internally generated signals (inter-engine signaling). + * + * Out of order waiters triggering random signaling enabling will + * be more problematic, but hopefully rare enough and the list + * small enough that the O(N) insertion sort is not an issue. + */ + + list_for_each_entry_reverse(iter, &b->signals, signaling.link) + if (i915_seqno_passed(seqno, iter->signaling.wait.seqno)) + break; + + list_add(&request->signaling.link, &iter->signaling.link); +} + void intel_engine_enable_signaling(struct i915_request *request, bool wakeup) { struct intel_engine_cs *engine = request->engine; struct intel_breadcrumbs *b = &engine->breadcrumbs; u32 seqno; - /* Note that we may be called from an interrupt handler on another + /* + * Note that we may be called from an interrupt handler on another * device (e.g. nouveau signaling a fence completion causing us * to submit a request, and so enable signaling). As such, * we need to make sure that all other users of b->rb_lock protect @@ -757,17 +747,16 @@ void intel_engine_enable_signaling(struct i915_request *request, bool wakeup) lockdep_assert_held(&request->lock); seqno = i915_request_global_seqno(request); - if (!seqno) + if (!seqno) /* will be enabled later upon execution */ return; - spin_lock(&b->rb_lock); - GEM_BUG_ON(request->signaling.wait.seqno); request->signaling.wait.tsk = b->signaler; request->signaling.wait.request = request; request->signaling.wait.seqno = seqno; - /* First add ourselves into the list of waiters, but register our + /* + * Add ourselves into the list of waiters, but registering our * bottom-half as the signaller thread. As per usual, only the oldest * waiter (not just signaller) is tasked as the bottom-half waking * up all completed waiters after the user interrupt. @@ -775,39 +764,9 @@ void intel_engine_enable_signaling(struct i915_request *request, bool wakeup) * If we are the oldest waiter, enable the irq (after which we * must double check that the seqno did not complete). */ + spin_lock(&b->rb_lock); + insert_signal(b, request, seqno); wakeup &= __intel_engine_add_wait(engine, &request->signaling.wait); - - if (!__i915_request_completed(request, seqno)) { - struct rb_node *parent, **p; - bool first; - - /* Now insert ourselves into the retirement ordered list of - * signals on this engine. We track the oldest seqno as that - * will be the first signal to complete. - */ - parent = NULL; - first = true; - p = &b->signals.rb_node; - while (*p) { - parent = *p; - if (i915_seqno_passed(seqno, - to_signaler(parent)->signaling.wait.seqno)) { - p = &parent->rb_right; - first = false; - } else { - p = &parent->rb_left; - } - } - rb_link_node(&request->signaling.node, parent, p); - rb_insert_color(&request->signaling.node, &b->signals); - if (first) - rcu_assign_pointer(b->first_signal, request); - } else { - __intel_engine_remove_wait(engine, &request->signaling.wait); - request->signaling.wait.seqno = 0; - wakeup = false; - } - spin_unlock(&b->rb_lock); if (wakeup) @@ -816,17 +775,20 @@ void intel_engine_enable_signaling(struct i915_request *request, bool wakeup) void intel_engine_cancel_signaling(struct i915_request *request) { + struct intel_engine_cs *engine = request->engine; + struct intel_breadcrumbs *b = &engine->breadcrumbs; + GEM_BUG_ON(!irqs_disabled()); lockdep_assert_held(&request->lock); - if (READ_ONCE(request->signaling.wait.seqno)) { - struct intel_engine_cs *engine = request->engine; - struct intel_breadcrumbs *b = &engine->breadcrumbs; + if (!READ_ONCE(request->signaling.wait.seqno)) + return; - spin_lock(&b->rb_lock); - __intel_engine_remove_signal(engine, request); - spin_unlock(&b->rb_lock); - } + spin_lock(&b->rb_lock); + __intel_engine_remove_wait(engine, &request->signaling.wait); + if (fetch_and_zero(&request->signaling.wait.seqno)) + __list_del_entry(&request->signaling.link); + spin_unlock(&b->rb_lock); } int intel_engine_init_breadcrumbs(struct intel_engine_cs *engine) @@ -840,6 +802,8 @@ int intel_engine_init_breadcrumbs(struct intel_engine_cs *engine) timer_setup(&b->fake_irq, intel_breadcrumbs_fake_irq, 0); timer_setup(&b->hangcheck, intel_breadcrumbs_hangcheck, 0); + INIT_LIST_HEAD(&b->signals); + /* Spawn a thread to provide a common bottom-half for all signals. * As this is an asynchronous interface we cannot steal the current * task for handling the bottom-half to the user interrupt, therefore @@ -899,8 +863,7 @@ void intel_engine_fini_breadcrumbs(struct intel_engine_cs *engine) /* The engines should be idle and all requests accounted for! */ WARN_ON(READ_ONCE(b->irq_wait)); WARN_ON(!RB_EMPTY_ROOT(&b->waiters)); - WARN_ON(rcu_access_pointer(b->first_signal)); - WARN_ON(!RB_EMPTY_ROOT(&b->signals)); + WARN_ON(!list_empty(&b->signals)); if (!IS_ERR_OR_NULL(b->signaler)) kthread_stop(b->signaler); @@ -913,20 +876,22 @@ bool intel_breadcrumbs_busy(struct intel_engine_cs *engine) struct intel_breadcrumbs *b = &engine->breadcrumbs; bool busy = false; - spin_lock_irq(&b->rb_lock); - if (b->irq_wait) { - wake_up_process(b->irq_wait->tsk); - busy = true; + spin_lock_irq(&b->irq_lock); + + if (b->irq_wait) { + wake_up_process(b->irq_wait->tsk); + busy = true; + } + + spin_unlock_irq(&b->irq_lock); } - if (rcu_access_pointer(b->first_signal)) { + if (!busy && !list_empty(&b->signals)) { wake_up_process(b->signaler); busy = true; } - spin_unlock_irq(&b->rb_lock); - return busy; } diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h index 90e4380cbdd5e30fa18df20455cc5b0d97694320..e7526a4f05e5b953134f5b9f95263b8c4d8d571c 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.h +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h @@ -356,9 +356,9 @@ struct intel_engine_cs { spinlock_t rb_lock; /* protects the rb and wraps irq_lock */ struct rb_root waiters; /* sorted by retirement, priority */ - struct rb_root signals; /* sorted by retirement */ + struct list_head signals; /* sorted by retirement */ struct task_struct *signaler; /* used for fence signalling */ - struct i915_request __rcu *first_signal; + struct timer_list fake_irq; /* used after a missed interrupt */ struct timer_list hangcheck; /* detect missed interrupts */