diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile index 75318ebb8d25bda319216e717fd13ef88117743f..6092f0ea24df0444c83c5d8c01e9d27c41975b44 100644 --- a/drivers/gpu/drm/i915/Makefile +++ b/drivers/gpu/drm/i915/Makefile @@ -33,6 +33,7 @@ i915-y += i915_cmd_parser.o \ i915_gem_gtt.o \ i915_gem.o \ i915_gem_render_state.o \ + i915_gem_request.o \ i915_gem_shrinker.o \ i915_gem_stolen.o \ i915_gem_tiling.o \ diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 27d9b2c374b370fbc8b0bda67f1ab220c337c3a0..c97a75597099e325385b8c2e65facff3ea03c360 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -61,6 +61,7 @@ #include "i915_gem.h" #include "i915_gem_gtt.h" #include "i915_gem_render_state.h" +#include "i915_gem_request.h" #include "intel_gvt.h" @@ -2365,171 +2366,6 @@ static inline struct scatterlist *__sg_next(struct scatterlist *sg) (((__iter).curr += PAGE_SIZE) < (__iter).max) || \ ((__iter) = __sgt_iter(__sg_next((__iter).sgp), false), 0)) -/** - * Request queue structure. - * - * The request queue allows us to note sequence numbers that have been emitted - * and may be associated with active buffers to be retired. - * - * By keeping this list, we can avoid having to do questionable sequence - * number comparisons on buffer last_read|write_seqno. It also allows an - * emission time to be associated with the request for tracking how far ahead - * of the GPU the submission is. - * - * The requests are reference counted, so upon creation they should have an - * initial reference taken using kref_init - */ -struct drm_i915_gem_request { - struct kref ref; - - /** On Which ring this request was generated */ - struct drm_i915_private *i915; - struct intel_engine_cs *engine; - struct intel_signal_node signaling; - - /** GEM sequence number associated with the previous request, - * when the HWS breadcrumb is equal to this the GPU is processing - * this request. - */ - u32 previous_seqno; - - /** GEM sequence number associated with this request, - * when the HWS breadcrumb is equal or greater than this the GPU - * has finished processing this request. - */ - u32 seqno; - - /** Position in the ringbuffer of the start of the request */ - u32 head; - - /** - * Position in the ringbuffer of the start of the postfix. - * This is required to calculate the maximum available ringbuffer - * space without overwriting the postfix. - */ - u32 postfix; - - /** Position in the ringbuffer of the end of the whole request */ - u32 tail; - - /** Preallocate space in the ringbuffer for the emitting the request */ - u32 reserved_space; - - /** - * Context and ring buffer related to this request - * Contexts are refcounted, so when this request is associated with a - * context, we must increment the context's refcount, to guarantee that - * it persists while any request is linked to it. Requests themselves - * are also refcounted, so the request will only be freed when the last - * reference to it is dismissed, and the code in - * i915_gem_request_free() will then decrement the refcount on the - * context. - */ - struct i915_gem_context *ctx; - struct intel_ringbuffer *ringbuf; - - /** - * Context related to the previous request. - * As the contexts are accessed by the hardware until the switch is - * completed to a new context, the hardware may still be writing - * to the context object after the breadcrumb is visible. We must - * not unpin/unbind/prune that object whilst still active and so - * we keep the previous context pinned until the following (this) - * request is retired. - */ - struct i915_gem_context *previous_context; - - /** Batch buffer related to this request if any (used for - error state dump only) */ - struct drm_i915_gem_object *batch_obj; - - /** Time at which this request was emitted, in jiffies. */ - unsigned long emitted_jiffies; - - /** global list entry for this request */ - struct list_head list; - - struct drm_i915_file_private *file_priv; - /** file_priv list entry for this request */ - struct list_head client_list; - - /** process identifier submitting this request */ - struct pid *pid; - - /** - * The ELSP only accepts two elements at a time, so we queue - * context/tail pairs on a given queue (ring->execlist_queue) until the - * hardware is available. The queue serves a double purpose: we also use - * it to keep track of the up to 2 contexts currently in the hardware - * (usually one in execution and the other queued up by the GPU): We - * only remove elements from the head of the queue when the hardware - * informs us that an element has been completed. - * - * All accesses to the queue are mediated by a spinlock - * (ring->execlist_lock). - */ - - /** Execlist link in the submission queue.*/ - struct list_head execlist_link; - - /** Execlists no. of times this request has been sent to the ELSP */ - int elsp_submitted; - - /** Execlists context hardware id. */ - unsigned ctx_hw_id; -}; - -struct drm_i915_gem_request * __must_check -i915_gem_request_alloc(struct intel_engine_cs *engine, - struct i915_gem_context *ctx); -void i915_gem_request_free(struct kref *req_ref); -int i915_gem_request_add_to_client(struct drm_i915_gem_request *req, - struct drm_file *file); - -static inline uint32_t -i915_gem_request_get_seqno(struct drm_i915_gem_request *req) -{ - return req ? req->seqno : 0; -} - -static inline struct intel_engine_cs * -i915_gem_request_get_engine(struct drm_i915_gem_request *req) -{ - return req ? req->engine : NULL; -} - -static inline struct drm_i915_gem_request * -i915_gem_request_reference(struct drm_i915_gem_request *req) -{ - if (req) - kref_get(&req->ref); - return req; -} - -static inline void -i915_gem_request_unreference(struct drm_i915_gem_request *req) -{ - kref_put(&req->ref, i915_gem_request_free); -} - -static inline void i915_gem_request_assign(struct drm_i915_gem_request **pdst, - struct drm_i915_gem_request *src) -{ - if (src) - i915_gem_request_reference(src); - - if (*pdst) - i915_gem_request_unreference(*pdst); - - *pdst = src; -} - -/* - * XXX: i915_gem_request_completed should be here but currently needs the - * definition of i915_seqno_passed() which is below. It will be moved in - * a later patch when the call to i915_seqno_passed() is obsoleted... - */ - /* * A command that requires special handling by the command parser. */ @@ -3297,37 +3133,6 @@ void i915_gem_track_fb(struct drm_i915_gem_object *old, struct drm_i915_gem_object *new, unsigned frontbuffer_bits); -/** - * Returns true if seq1 is later than seq2. - */ -static inline bool -i915_seqno_passed(uint32_t seq1, uint32_t seq2) -{ - return (int32_t)(seq1 - seq2) >= 0; -} - -static inline bool i915_gem_request_started(const struct drm_i915_gem_request *req) -{ - return i915_seqno_passed(intel_engine_get_seqno(req->engine), - req->previous_seqno); -} - -static inline bool i915_gem_request_completed(const struct drm_i915_gem_request *req) -{ - return i915_seqno_passed(intel_engine_get_seqno(req->engine), - req->seqno); -} - -bool __i915_spin_request(const struct drm_i915_gem_request *request, - int state, unsigned long timeout_us); -static inline bool i915_spin_request(const struct drm_i915_gem_request *request, - int state, unsigned long timeout_us) -{ - return (i915_gem_request_started(request) && - __i915_spin_request(request, state, timeout_us)); -} - -int __must_check i915_gem_get_seqno(struct drm_i915_private *dev_priv, u32 *seqno); int __must_check i915_gem_set_seqno(struct drm_device *dev, u32 seqno); struct drm_i915_gem_request * @@ -3385,18 +3190,6 @@ void i915_gem_cleanup_engines(struct drm_device *dev); int __must_check i915_gem_wait_for_idle(struct drm_i915_private *dev_priv); int __must_check i915_gem_suspend(struct drm_device *dev); void i915_gem_resume(struct drm_device *dev); -void __i915_add_request(struct drm_i915_gem_request *req, - struct drm_i915_gem_object *batch_obj, - bool flush_caches); -#define i915_add_request(req) \ - __i915_add_request(req, NULL, true) -#define i915_add_request_no_flush(req) \ - __i915_add_request(req, NULL, false) -int __i915_wait_request(struct drm_i915_gem_request *req, - bool interruptible, - s64 *timeout, - struct intel_rps_client *rps); -int __must_check i915_wait_request(struct drm_i915_gem_request *req); int i915_gem_fault(struct vm_area_struct *vma, struct vm_fault *vmf); int __must_check i915_gem_object_wait_rendering(struct drm_i915_gem_object *obj, diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index e40fab114bf010bd0840d7594faf7a86a39e6cbf..6df14058b3fedbaa7a498e5b4d43625c12dfebf9 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -1325,365 +1325,6 @@ i915_gem_pwrite_ioctl(struct drm_device *dev, void *data, return ret; } -static int -i915_gem_check_wedge(unsigned reset_counter, bool interruptible) -{ - if (__i915_terminally_wedged(reset_counter)) - return -EIO; - - if (__i915_reset_in_progress(reset_counter)) { - /* Non-interruptible callers can't handle -EAGAIN, hence return - * -EIO unconditionally for these. */ - if (!interruptible) - return -EIO; - - return -EAGAIN; - } - - return 0; -} - -static unsigned long local_clock_us(unsigned *cpu) -{ - unsigned long t; - - /* Cheaply and approximately convert from nanoseconds to microseconds. - * The result and subsequent calculations are also defined in the same - * approximate microseconds units. The principal source of timing - * error here is from the simple truncation. - * - * Note that local_clock() is only defined wrt to the current CPU; - * the comparisons are no longer valid if we switch CPUs. Instead of - * blocking preemption for the entire busywait, we can detect the CPU - * switch and use that as indicator of system load and a reason to - * stop busywaiting, see busywait_stop(). - */ - *cpu = get_cpu(); - t = local_clock() >> 10; - put_cpu(); - - return t; -} - -static bool busywait_stop(unsigned long timeout, unsigned cpu) -{ - unsigned this_cpu; - - if (time_after(local_clock_us(&this_cpu), timeout)) - return true; - - return this_cpu != cpu; -} - -bool __i915_spin_request(const struct drm_i915_gem_request *req, - int state, unsigned long timeout_us) -{ - unsigned cpu; - - /* When waiting for high frequency requests, e.g. during synchronous - * rendering split between the CPU and GPU, the finite amount of time - * required to set up the irq and wait upon it limits the response - * rate. By busywaiting on the request completion for a short while we - * can service the high frequency waits as quick as possible. However, - * if it is a slow request, we want to sleep as quickly as possible. - * The tradeoff between waiting and sleeping is roughly the time it - * takes to sleep on a request, on the order of a microsecond. - */ - - timeout_us += local_clock_us(&cpu); - do { - if (i915_gem_request_completed(req)) - return true; - - if (signal_pending_state(state, current)) - break; - - if (busywait_stop(timeout_us, cpu)) - break; - - cpu_relax_lowlatency(); - } while (!need_resched()); - - return false; -} - -/** - * __i915_wait_request - wait until execution of request has finished - * @req: duh! - * @interruptible: do an interruptible wait (normally yes) - * @timeout: in - how long to wait (NULL forever); out - how much time remaining - * @rps: RPS client - * - * Note: It is of utmost importance that the passed in seqno and reset_counter - * values have been read by the caller in an smp safe manner. Where read-side - * locks are involved, it is sufficient to read the reset_counter before - * unlocking the lock that protects the seqno. For lockless tricks, the - * reset_counter _must_ be read before, and an appropriate smp_rmb must be - * inserted. - * - * Returns 0 if the request was found within the alloted time. Else returns the - * errno with remaining time filled in timeout argument. - */ -int __i915_wait_request(struct drm_i915_gem_request *req, - bool interruptible, - s64 *timeout, - struct intel_rps_client *rps) -{ - int state = interruptible ? TASK_INTERRUPTIBLE : TASK_UNINTERRUPTIBLE; - DEFINE_WAIT(reset); - struct intel_wait wait; - unsigned long timeout_remain; - s64 before = 0; /* Only to silence a compiler warning. */ - int ret = 0; - - might_sleep(); - - if (list_empty(&req->list)) - return 0; - - if (i915_gem_request_completed(req)) - return 0; - - timeout_remain = MAX_SCHEDULE_TIMEOUT; - if (timeout) { - if (WARN_ON(*timeout < 0)) - return -EINVAL; - - if (*timeout == 0) - return -ETIME; - - timeout_remain = nsecs_to_jiffies_timeout(*timeout); - - /* - * Record current time in case interrupted by signal, or wedged. - */ - before = ktime_get_raw_ns(); - } - - trace_i915_gem_request_wait_begin(req); - - /* This client is about to stall waiting for the GPU. In many cases - * this is undesirable and limits the throughput of the system, as - * many clients cannot continue processing user input/output whilst - * blocked. RPS autotuning may take tens of milliseconds to respond - * to the GPU load and thus incurs additional latency for the client. - * We can circumvent that by promoting the GPU frequency to maximum - * before we wait. This makes the GPU throttle up much more quickly - * (good for benchmarks and user experience, e.g. window animations), - * but at a cost of spending more power processing the workload - * (bad for battery). Not all clients even want their results - * immediately and for them we should just let the GPU select its own - * frequency to maximise efficiency. To prevent a single client from - * forcing the clocks too high for the whole system, we only allow - * each client to waitboost once in a busy period. - */ - if (INTEL_INFO(req->i915)->gen >= 6) - gen6_rps_boost(req->i915, rps, req->emitted_jiffies); - - /* Optimistic spin for the next ~jiffie before touching IRQs */ - if (i915_spin_request(req, state, 5)) - goto complete; - - set_current_state(state); - add_wait_queue(&req->i915->gpu_error.wait_queue, &reset); - - intel_wait_init(&wait, req->seqno); - if (intel_engine_add_wait(req->engine, &wait)) - /* In order to check that we haven't missed the interrupt - * as we enabled it, we need to kick ourselves to do a - * coherent check on the seqno before we sleep. - */ - goto wakeup; - - for (;;) { - if (signal_pending_state(state, current)) { - ret = -ERESTARTSYS; - break; - } - - timeout_remain = io_schedule_timeout(timeout_remain); - if (timeout_remain == 0) { - ret = -ETIME; - break; - } - - if (intel_wait_complete(&wait)) - break; - - set_current_state(state); - -wakeup: - /* Carefully check if the request is complete, giving time - * for the seqno to be visible following the interrupt. - * We also have to check in case we are kicked by the GPU - * reset in order to drop the struct_mutex. - */ - if (__i915_request_irq_complete(req)) - break; - - /* Only spin if we know the GPU is processing this request */ - if (i915_spin_request(req, state, 2)) - break; - } - remove_wait_queue(&req->i915->gpu_error.wait_queue, &reset); - - intel_engine_remove_wait(req->engine, &wait); - __set_current_state(TASK_RUNNING); -complete: - trace_i915_gem_request_wait_end(req); - - if (timeout) { - s64 tres = *timeout - (ktime_get_raw_ns() - before); - - *timeout = tres < 0 ? 0 : tres; - - /* - * Apparently ktime isn't accurate enough and occasionally has a - * bit of mismatch in the jiffies<->nsecs<->ktime loop. So patch - * things up to make the test happy. We allow up to 1 jiffy. - * - * This is a regrssion from the timespec->ktime conversion. - */ - if (ret == -ETIME && *timeout < jiffies_to_usecs(1)*1000) - *timeout = 0; - } - - if (rps && req->seqno == req->engine->last_submitted_seqno) { - /* The GPU is now idle and this client has stalled. - * Since no other client has submitted a request in the - * meantime, assume that this client is the only one - * supplying work to the GPU but is unable to keep that - * work supplied because it is waiting. Since the GPU is - * then never kept fully busy, RPS autoclocking will - * keep the clocks relatively low, causing further delays. - * Compensate by giving the synchronous client credit for - * a waitboost next time. - */ - spin_lock(&req->i915->rps.client_lock); - list_del_init(&rps->link); - spin_unlock(&req->i915->rps.client_lock); - } - - return ret; -} - -int i915_gem_request_add_to_client(struct drm_i915_gem_request *req, - struct drm_file *file) -{ - struct drm_i915_file_private *file_priv; - - WARN_ON(!req || !file || req->file_priv); - - if (!req || !file) - return -EINVAL; - - if (req->file_priv) - return -EINVAL; - - file_priv = file->driver_priv; - - spin_lock(&file_priv->mm.lock); - req->file_priv = file_priv; - list_add_tail(&req->client_list, &file_priv->mm.request_list); - spin_unlock(&file_priv->mm.lock); - - req->pid = get_pid(task_pid(current)); - - return 0; -} - -static inline void -i915_gem_request_remove_from_client(struct drm_i915_gem_request *request) -{ - struct drm_i915_file_private *file_priv = request->file_priv; - - if (!file_priv) - return; - - spin_lock(&file_priv->mm.lock); - list_del(&request->client_list); - request->file_priv = NULL; - spin_unlock(&file_priv->mm.lock); - - put_pid(request->pid); - request->pid = NULL; -} - -static void i915_gem_request_retire(struct drm_i915_gem_request *request) -{ - trace_i915_gem_request_retire(request); - - /* We know the GPU must have read the request to have - * sent us the seqno + interrupt, so use the position - * of tail of the request to update the last known position - * of the GPU head. - * - * Note this requires that we are always called in request - * completion order. - */ - request->ringbuf->last_retired_head = request->postfix; - - list_del_init(&request->list); - i915_gem_request_remove_from_client(request); - - if (request->previous_context) { - if (i915.enable_execlists) - intel_lr_context_unpin(request->previous_context, - request->engine); - } - - i915_gem_context_unreference(request->ctx); - i915_gem_request_unreference(request); -} - -static void -__i915_gem_request_retire__upto(struct drm_i915_gem_request *req) -{ - struct intel_engine_cs *engine = req->engine; - struct drm_i915_gem_request *tmp; - - lockdep_assert_held(&engine->i915->drm.struct_mutex); - - if (list_empty(&req->list)) - return; - - do { - tmp = list_first_entry(&engine->request_list, - typeof(*tmp), list); - - i915_gem_request_retire(tmp); - } while (tmp != req); - - WARN_ON(i915_verify_lists(engine->dev)); -} - -/** - * Waits for a request to be signaled, and cleans up the - * request and object lists appropriately for that event. - * @req: request to wait on - */ -int -i915_wait_request(struct drm_i915_gem_request *req) -{ - struct drm_i915_private *dev_priv = req->i915; - bool interruptible; - int ret; - - interruptible = dev_priv->mm.interruptible; - - BUG_ON(!mutex_is_locked(&dev_priv->drm.struct_mutex)); - - ret = __i915_wait_request(req, interruptible, NULL, NULL); - if (ret) - return ret; - - /* If the GPU hung, we want to keep the requests to find the guilty. */ - if (!i915_reset_in_progress(&dev_priv->gpu_error)) - __i915_gem_request_retire__upto(req); - - return 0; -} - /** * Ensures that all rendering to the object has completed and the object is * safe to unbind from the GTT or access from the CPU. @@ -1740,7 +1381,7 @@ i915_gem_object_retire_request(struct drm_i915_gem_object *obj, i915_gem_object_retire__write(obj); if (!i915_reset_in_progress(&req->i915->gpu_error)) - __i915_gem_request_retire__upto(req); + i915_gem_request_retire_upto(req); } /* A nonblocking variant of the above wait. This is a highly dangerous routine @@ -2761,193 +2402,6 @@ i915_gem_object_retire__read(struct drm_i915_gem_object *obj, int ring) drm_gem_object_unreference(&obj->base); } -static int -i915_gem_init_seqno(struct drm_i915_private *dev_priv, u32 seqno) -{ - struct intel_engine_cs *engine; - int ret; - - /* Carefully retire all requests without writing to the rings */ - for_each_engine(engine, dev_priv) { - ret = intel_engine_idle(engine); - if (ret) - return ret; - } - i915_gem_retire_requests(dev_priv); - - /* If the seqno wraps around, we need to clear the breadcrumb rbtree */ - if (!i915_seqno_passed(seqno, dev_priv->next_seqno)) { - while (intel_kick_waiters(dev_priv) || - intel_kick_signalers(dev_priv)) - yield(); - } - - /* Finally reset hw state */ - for_each_engine(engine, dev_priv) - intel_ring_init_seqno(engine, seqno); - - return 0; -} - -int i915_gem_set_seqno(struct drm_device *dev, u32 seqno) -{ - struct drm_i915_private *dev_priv = to_i915(dev); - int ret; - - if (seqno == 0) - return -EINVAL; - - /* HWS page needs to be set less than what we - * will inject to ring - */ - ret = i915_gem_init_seqno(dev_priv, seqno - 1); - if (ret) - return ret; - - /* Carefully set the last_seqno value so that wrap - * detection still works - */ - dev_priv->next_seqno = seqno; - dev_priv->last_seqno = seqno - 1; - if (dev_priv->last_seqno == 0) - dev_priv->last_seqno--; - - return 0; -} - -int -i915_gem_get_seqno(struct drm_i915_private *dev_priv, u32 *seqno) -{ - /* reserve 0 for non-seqno */ - if (dev_priv->next_seqno == 0) { - int ret = i915_gem_init_seqno(dev_priv, 0); - if (ret) - return ret; - - dev_priv->next_seqno = 1; - } - - *seqno = dev_priv->last_seqno = dev_priv->next_seqno++; - return 0; -} - -static void i915_gem_mark_busy(const struct intel_engine_cs *engine) -{ - struct drm_i915_private *dev_priv = engine->i915; - - dev_priv->gt.active_engines |= intel_engine_flag(engine); - if (dev_priv->gt.awake) - return; - - intel_runtime_pm_get_noresume(dev_priv); - dev_priv->gt.awake = true; - - intel_enable_gt_powersave(dev_priv); - i915_update_gfx_val(dev_priv); - if (INTEL_GEN(dev_priv) >= 6) - gen6_rps_busy(dev_priv); - - queue_delayed_work(dev_priv->wq, - &dev_priv->gt.retire_work, - round_jiffies_up_relative(HZ)); -} - -/* - * NB: This function is not allowed to fail. Doing so would mean the the - * request is not being tracked for completion but the work itself is - * going to happen on the hardware. This would be a Bad Thing(tm). - */ -void __i915_add_request(struct drm_i915_gem_request *request, - struct drm_i915_gem_object *obj, - bool flush_caches) -{ - struct intel_engine_cs *engine; - struct intel_ringbuffer *ringbuf; - u32 request_start; - u32 reserved_tail; - int ret; - - if (WARN_ON(request == NULL)) - return; - - engine = request->engine; - ringbuf = request->ringbuf; - - /* - * To ensure that this call will not fail, space for its emissions - * should already have been reserved in the ring buffer. Let the ring - * know that it is time to use that space up. - */ - request_start = intel_ring_get_tail(ringbuf); - reserved_tail = request->reserved_space; - request->reserved_space = 0; - - /* - * Emit any outstanding flushes - execbuf can fail to emit the flush - * after having emitted the batchbuffer command. Hence we need to fix - * things up similar to emitting the lazy request. The difference here - * is that the flush _must_ happen before the next request, no matter - * what. - */ - if (flush_caches) { - if (i915.enable_execlists) - ret = logical_ring_flush_all_caches(request); - else - ret = intel_ring_flush_all_caches(request); - /* Not allowed to fail! */ - WARN(ret, "*_ring_flush_all_caches failed: %d!\n", ret); - } - - trace_i915_gem_request_add(request); - - request->head = request_start; - - /* Whilst this request exists, batch_obj will be on the - * active_list, and so will hold the active reference. Only when this - * request is retired will the the batch_obj be moved onto the - * inactive_list and lose its active reference. Hence we do not need - * to explicitly hold another reference here. - */ - request->batch_obj = obj; - - /* Seal the request and mark it as pending execution. Note that - * we may inspect this state, without holding any locks, during - * hangcheck. Hence we apply the barrier to ensure that we do not - * see a more recent value in the hws than we are tracking. - */ - request->emitted_jiffies = jiffies; - request->previous_seqno = engine->last_submitted_seqno; - smp_store_mb(engine->last_submitted_seqno, request->seqno); - list_add_tail(&request->list, &engine->request_list); - - /* Record the position of the start of the request so that - * should we detect the updated seqno part-way through the - * GPU processing the request, we never over-estimate the - * position of the head. - */ - request->postfix = intel_ring_get_tail(ringbuf); - - if (i915.enable_execlists) - ret = engine->emit_request(request); - else { - ret = engine->add_request(request); - - request->tail = intel_ring_get_tail(ringbuf); - } - /* Not allowed to fail! */ - WARN(ret, "emit|add_request failed: %d!\n", ret); - /* Sanity check that the reserved size was large enough. */ - ret = intel_ring_get_tail(ringbuf) - request_start; - if (ret < 0) - ret += ringbuf->size; - WARN_ONCE(ret > reserved_tail, - "Not enough space reserved (%d bytes) " - "for adding the request (%d bytes)\n", - reserved_tail, ret); - - i915_gem_mark_busy(engine); -} - static bool i915_context_is_banned(const struct i915_gem_context *ctx) { unsigned long elapsed; @@ -2979,101 +2433,6 @@ static void i915_set_reset_status(struct i915_gem_context *ctx, } } -void i915_gem_request_free(struct kref *req_ref) -{ - struct drm_i915_gem_request *req = container_of(req_ref, - typeof(*req), ref); - kmem_cache_free(req->i915->requests, req); -} - -static inline int -__i915_gem_request_alloc(struct intel_engine_cs *engine, - struct i915_gem_context *ctx, - struct drm_i915_gem_request **req_out) -{ - struct drm_i915_private *dev_priv = engine->i915; - unsigned reset_counter = i915_reset_counter(&dev_priv->gpu_error); - struct drm_i915_gem_request *req; - int ret; - - if (!req_out) - return -EINVAL; - - *req_out = NULL; - - /* ABI: Before userspace accesses the GPU (e.g. execbuffer), report - * EIO if the GPU is already wedged, or EAGAIN to drop the struct_mutex - * and restart. - */ - ret = i915_gem_check_wedge(reset_counter, dev_priv->mm.interruptible); - if (ret) - return ret; - - req = kmem_cache_zalloc(dev_priv->requests, GFP_KERNEL); - if (req == NULL) - return -ENOMEM; - - ret = i915_gem_get_seqno(engine->i915, &req->seqno); - if (ret) - goto err; - - kref_init(&req->ref); - req->i915 = dev_priv; - req->engine = engine; - req->ctx = ctx; - i915_gem_context_reference(req->ctx); - - /* - * Reserve space in the ring buffer for all the commands required to - * eventually emit this request. This is to guarantee that the - * i915_add_request() call can't fail. Note that the reserve may need - * to be redone if the request is not actually submitted straight - * away, e.g. because a GPU scheduler has deferred it. - */ - req->reserved_space = MIN_SPACE_FOR_ADD_REQUEST; - - if (i915.enable_execlists) - ret = intel_logical_ring_alloc_request_extras(req); - else - ret = intel_ring_alloc_request_extras(req); - if (ret) - goto err_ctx; - - *req_out = req; - return 0; - -err_ctx: - i915_gem_context_unreference(ctx); -err: - kmem_cache_free(dev_priv->requests, req); - return ret; -} - -/** - * i915_gem_request_alloc - allocate a request structure - * - * @engine: engine that we wish to issue the request on. - * @ctx: context that the request will be associated with. - * This can be NULL if the request is not directly related to - * any specific user context, in which case this function will - * choose an appropriate context to use. - * - * Returns a pointer to the allocated request if successful, - * or an error code if not. - */ -struct drm_i915_gem_request * -i915_gem_request_alloc(struct intel_engine_cs *engine, - struct i915_gem_context *ctx) -{ - struct drm_i915_gem_request *req; - int err; - - if (ctx == NULL) - ctx = engine->i915->kernel_context; - err = __i915_gem_request_alloc(engine, ctx, &req); - return err ? ERR_PTR(err) : req; -} - struct drm_i915_gem_request * i915_gem_find_active_request(struct intel_engine_cs *engine) { @@ -3147,14 +2506,14 @@ static void i915_gem_reset_engine_cleanup(struct intel_engine_cs *engine) * implicit references on things like e.g. ppgtt address spaces through * the request. */ - while (!list_empty(&engine->request_list)) { + if (!list_empty(&engine->request_list)) { struct drm_i915_gem_request *request; - request = list_first_entry(&engine->request_list, - struct drm_i915_gem_request, - list); + request = list_last_entry(&engine->request_list, + struct drm_i915_gem_request, + list); - i915_gem_request_retire(request); + i915_gem_request_retire_upto(request); } /* Having flushed all requests from all queues, we know that all @@ -3222,7 +2581,7 @@ i915_gem_retire_requests_ring(struct intel_engine_cs *engine) if (!i915_gem_request_completed(request)) break; - i915_gem_request_retire(request); + i915_gem_request_retire_upto(request); } /* Move any buffers on the active list that are no longer referenced diff --git a/drivers/gpu/drm/i915/i915_gem_request.c b/drivers/gpu/drm/i915/i915_gem_request.c new file mode 100644 index 0000000000000000000000000000000000000000..9e9aa6b725f75940a3fd8079657c90874ab70e78 --- /dev/null +++ b/drivers/gpu/drm/i915/i915_gem_request.c @@ -0,0 +1,658 @@ +/* + * Copyright © 2008-2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + */ + +#include "i915_drv.h" + +int i915_gem_request_add_to_client(struct drm_i915_gem_request *req, + struct drm_file *file) +{ + struct drm_i915_private *dev_private; + struct drm_i915_file_private *file_priv; + + WARN_ON(!req || !file || req->file_priv); + + if (!req || !file) + return -EINVAL; + + if (req->file_priv) + return -EINVAL; + + dev_private = req->i915; + file_priv = file->driver_priv; + + spin_lock(&file_priv->mm.lock); + req->file_priv = file_priv; + list_add_tail(&req->client_list, &file_priv->mm.request_list); + spin_unlock(&file_priv->mm.lock); + + req->pid = get_pid(task_pid(current)); + + return 0; +} + +static inline void +i915_gem_request_remove_from_client(struct drm_i915_gem_request *request) +{ + struct drm_i915_file_private *file_priv = request->file_priv; + + if (!file_priv) + return; + + spin_lock(&file_priv->mm.lock); + list_del(&request->client_list); + request->file_priv = NULL; + spin_unlock(&file_priv->mm.lock); + + put_pid(request->pid); + request->pid = NULL; +} + +static void i915_gem_request_retire(struct drm_i915_gem_request *request) +{ + trace_i915_gem_request_retire(request); + list_del_init(&request->list); + + /* We know the GPU must have read the request to have + * sent us the seqno + interrupt, so use the position + * of tail of the request to update the last known position + * of the GPU head. + * + * Note this requires that we are always called in request + * completion order. + */ + request->ringbuf->last_retired_head = request->postfix; + + i915_gem_request_remove_from_client(request); + + if (request->previous_context) { + if (i915.enable_execlists) + intel_lr_context_unpin(request->previous_context, + request->engine); + } + + i915_gem_context_unreference(request->ctx); + i915_gem_request_unreference(request); +} + +void i915_gem_request_retire_upto(struct drm_i915_gem_request *req) +{ + struct intel_engine_cs *engine = req->engine; + struct drm_i915_gem_request *tmp; + + lockdep_assert_held(&req->i915->drm.struct_mutex); + + if (list_empty(&req->list)) + return; + + do { + tmp = list_first_entry(&engine->request_list, + typeof(*tmp), list); + + i915_gem_request_retire(tmp); + } while (tmp != req); + + WARN_ON(i915_verify_lists(engine->dev)); +} + +static int i915_gem_check_wedge(unsigned int reset_counter, bool interruptible) +{ + if (__i915_terminally_wedged(reset_counter)) + return -EIO; + + if (__i915_reset_in_progress(reset_counter)) { + /* Non-interruptible callers can't handle -EAGAIN, hence return + * -EIO unconditionally for these. + */ + if (!interruptible) + return -EIO; + + return -EAGAIN; + } + + return 0; +} + +static int i915_gem_init_seqno(struct drm_i915_private *dev_priv, u32 seqno) +{ + struct intel_engine_cs *engine; + int ret; + + /* Carefully retire all requests without writing to the rings */ + for_each_engine(engine, dev_priv) { + ret = intel_engine_idle(engine); + if (ret) + return ret; + } + i915_gem_retire_requests(dev_priv); + + /* If the seqno wraps around, we need to clear the breadcrumb rbtree */ + if (!i915_seqno_passed(seqno, dev_priv->next_seqno)) { + while (intel_kick_waiters(dev_priv) || + intel_kick_signalers(dev_priv)) + yield(); + } + + /* Finally reset hw state */ + for_each_engine(engine, dev_priv) + intel_ring_init_seqno(engine, seqno); + + return 0; +} + +int i915_gem_set_seqno(struct drm_device *dev, u32 seqno) +{ + struct drm_i915_private *dev_priv = to_i915(dev); + int ret; + + if (seqno == 0) + return -EINVAL; + + /* HWS page needs to be set less than what we + * will inject to ring + */ + ret = i915_gem_init_seqno(dev_priv, seqno - 1); + if (ret) + return ret; + + /* Carefully set the last_seqno value so that wrap + * detection still works + */ + dev_priv->next_seqno = seqno; + dev_priv->last_seqno = seqno - 1; + if (dev_priv->last_seqno == 0) + dev_priv->last_seqno--; + + return 0; +} + +static int i915_gem_get_seqno(struct drm_i915_private *dev_priv, u32 *seqno) +{ + /* reserve 0 for non-seqno */ + if (unlikely(dev_priv->next_seqno == 0)) { + int ret; + + ret = i915_gem_init_seqno(dev_priv, 0); + if (ret) + return ret; + + dev_priv->next_seqno = 1; + } + + *seqno = dev_priv->last_seqno = dev_priv->next_seqno++; + return 0; +} + +static inline int +__i915_gem_request_alloc(struct intel_engine_cs *engine, + struct i915_gem_context *ctx, + struct drm_i915_gem_request **req_out) +{ + struct drm_i915_private *dev_priv = engine->i915; + unsigned int reset_counter = i915_reset_counter(&dev_priv->gpu_error); + struct drm_i915_gem_request *req; + int ret; + + if (!req_out) + return -EINVAL; + + *req_out = NULL; + + /* ABI: Before userspace accesses the GPU (e.g. execbuffer), report + * EIO if the GPU is already wedged, or EAGAIN to drop the struct_mutex + * and restart. + */ + ret = i915_gem_check_wedge(reset_counter, dev_priv->mm.interruptible); + if (ret) + return ret; + + req = kmem_cache_zalloc(dev_priv->requests, GFP_KERNEL); + if (!req) + return -ENOMEM; + + ret = i915_gem_get_seqno(dev_priv, &req->seqno); + if (ret) + goto err; + + kref_init(&req->ref); + req->i915 = dev_priv; + req->engine = engine; + req->ctx = ctx; + i915_gem_context_reference(ctx); + + /* + * Reserve space in the ring buffer for all the commands required to + * eventually emit this request. This is to guarantee that the + * i915_add_request() call can't fail. Note that the reserve may need + * to be redone if the request is not actually submitted straight + * away, e.g. because a GPU scheduler has deferred it. + */ + req->reserved_space = MIN_SPACE_FOR_ADD_REQUEST; + + if (i915.enable_execlists) + ret = intel_logical_ring_alloc_request_extras(req); + else + ret = intel_ring_alloc_request_extras(req); + if (ret) + goto err_ctx; + + *req_out = req; + return 0; + +err_ctx: + i915_gem_context_unreference(ctx); +err: + kmem_cache_free(dev_priv->requests, req); + return ret; +} + +/** + * i915_gem_request_alloc - allocate a request structure + * + * @engine: engine that we wish to issue the request on. + * @ctx: context that the request will be associated with. + * This can be NULL if the request is not directly related to + * any specific user context, in which case this function will + * choose an appropriate context to use. + * + * Returns a pointer to the allocated request if successful, + * or an error code if not. + */ +struct drm_i915_gem_request * +i915_gem_request_alloc(struct intel_engine_cs *engine, + struct i915_gem_context *ctx) +{ + struct drm_i915_gem_request *req; + int err; + + if (!ctx) + ctx = engine->i915->kernel_context; + err = __i915_gem_request_alloc(engine, ctx, &req); + return err ? ERR_PTR(err) : req; +} + +static void i915_gem_mark_busy(const struct intel_engine_cs *engine) +{ + struct drm_i915_private *dev_priv = engine->i915; + + dev_priv->gt.active_engines |= intel_engine_flag(engine); + if (dev_priv->gt.awake) + return; + + intel_runtime_pm_get_noresume(dev_priv); + dev_priv->gt.awake = true; + + intel_enable_gt_powersave(dev_priv); + i915_update_gfx_val(dev_priv); + if (INTEL_GEN(dev_priv) >= 6) + gen6_rps_busy(dev_priv); + + queue_delayed_work(dev_priv->wq, + &dev_priv->gt.retire_work, + round_jiffies_up_relative(HZ)); +} + +/* + * NB: This function is not allowed to fail. Doing so would mean the the + * request is not being tracked for completion but the work itself is + * going to happen on the hardware. This would be a Bad Thing(tm). + */ +void __i915_add_request(struct drm_i915_gem_request *request, + struct drm_i915_gem_object *obj, + bool flush_caches) +{ + struct intel_engine_cs *engine; + struct intel_ringbuffer *ringbuf; + u32 request_start; + u32 reserved_tail; + int ret; + + if (WARN_ON(!request)) + return; + + engine = request->engine; + ringbuf = request->ringbuf; + + /* + * To ensure that this call will not fail, space for its emissions + * should already have been reserved in the ring buffer. Let the ring + * know that it is time to use that space up. + */ + request_start = intel_ring_get_tail(ringbuf); + reserved_tail = request->reserved_space; + request->reserved_space = 0; + + /* + * Emit any outstanding flushes - execbuf can fail to emit the flush + * after having emitted the batchbuffer command. Hence we need to fix + * things up similar to emitting the lazy request. The difference here + * is that the flush _must_ happen before the next request, no matter + * what. + */ + if (flush_caches) { + if (i915.enable_execlists) + ret = logical_ring_flush_all_caches(request); + else + ret = intel_ring_flush_all_caches(request); + /* Not allowed to fail! */ + WARN(ret, "*_ring_flush_all_caches failed: %d!\n", ret); + } + + trace_i915_gem_request_add(request); + + request->head = request_start; + + /* Whilst this request exists, batch_obj will be on the + * active_list, and so will hold the active reference. Only when this + * request is retired will the the batch_obj be moved onto the + * inactive_list and lose its active reference. Hence we do not need + * to explicitly hold another reference here. + */ + request->batch_obj = obj; + + /* Seal the request and mark it as pending execution. Note that + * we may inspect this state, without holding any locks, during + * hangcheck. Hence we apply the barrier to ensure that we do not + * see a more recent value in the hws than we are tracking. + */ + request->emitted_jiffies = jiffies; + request->previous_seqno = engine->last_submitted_seqno; + smp_store_mb(engine->last_submitted_seqno, request->seqno); + list_add_tail(&request->list, &engine->request_list); + + /* Record the position of the start of the request so that + * should we detect the updated seqno part-way through the + * GPU processing the request, we never over-estimate the + * position of the head. + */ + request->postfix = intel_ring_get_tail(ringbuf); + + if (i915.enable_execlists) { + ret = engine->emit_request(request); + } else { + ret = engine->add_request(request); + + request->tail = intel_ring_get_tail(ringbuf); + } + /* Not allowed to fail! */ + WARN(ret, "emit|add_request failed: %d!\n", ret); + /* Sanity check that the reserved size was large enough. */ + ret = intel_ring_get_tail(ringbuf) - request_start; + if (ret < 0) + ret += ringbuf->size; + WARN_ONCE(ret > reserved_tail, + "Not enough space reserved (%d bytes) " + "for adding the request (%d bytes)\n", + reserved_tail, ret); + + i915_gem_mark_busy(engine); +} + +static unsigned long local_clock_us(unsigned int *cpu) +{ + unsigned long t; + + /* Cheaply and approximately convert from nanoseconds to microseconds. + * The result and subsequent calculations are also defined in the same + * approximate microseconds units. The principal source of timing + * error here is from the simple truncation. + * + * Note that local_clock() is only defined wrt to the current CPU; + * the comparisons are no longer valid if we switch CPUs. Instead of + * blocking preemption for the entire busywait, we can detect the CPU + * switch and use that as indicator of system load and a reason to + * stop busywaiting, see busywait_stop(). + */ + *cpu = get_cpu(); + t = local_clock() >> 10; + put_cpu(); + + return t; +} + +static bool busywait_stop(unsigned long timeout, unsigned int cpu) +{ + unsigned int this_cpu; + + if (time_after(local_clock_us(&this_cpu), timeout)) + return true; + + return this_cpu != cpu; +} + +bool __i915_spin_request(const struct drm_i915_gem_request *req, + int state, unsigned long timeout_us) +{ + unsigned int cpu; + + /* When waiting for high frequency requests, e.g. during synchronous + * rendering split between the CPU and GPU, the finite amount of time + * required to set up the irq and wait upon it limits the response + * rate. By busywaiting on the request completion for a short while we + * can service the high frequency waits as quick as possible. However, + * if it is a slow request, we want to sleep as quickly as possible. + * The tradeoff between waiting and sleeping is roughly the time it + * takes to sleep on a request, on the order of a microsecond. + */ + + timeout_us += local_clock_us(&cpu); + do { + if (i915_gem_request_completed(req)) + return true; + + if (signal_pending_state(state, current)) + break; + + if (busywait_stop(timeout_us, cpu)) + break; + + cpu_relax_lowlatency(); + } while (!need_resched()); + + return false; +} + +/** + * __i915_wait_request - wait until execution of request has finished + * @req: duh! + * @interruptible: do an interruptible wait (normally yes) + * @timeout: in - how long to wait (NULL forever); out - how much time remaining + * @rps: client to charge for RPS boosting + * + * Note: It is of utmost importance that the passed in seqno and reset_counter + * values have been read by the caller in an smp safe manner. Where read-side + * locks are involved, it is sufficient to read the reset_counter before + * unlocking the lock that protects the seqno. For lockless tricks, the + * reset_counter _must_ be read before, and an appropriate smp_rmb must be + * inserted. + * + * Returns 0 if the request was found within the alloted time. Else returns the + * errno with remaining time filled in timeout argument. + */ +int __i915_wait_request(struct drm_i915_gem_request *req, + bool interruptible, + s64 *timeout, + struct intel_rps_client *rps) +{ + int state = interruptible ? TASK_INTERRUPTIBLE : TASK_UNINTERRUPTIBLE; + DEFINE_WAIT(reset); + struct intel_wait wait; + unsigned long timeout_remain; + int ret = 0; + + might_sleep(); + + if (list_empty(&req->list)) + return 0; + + if (i915_gem_request_completed(req)) + return 0; + + timeout_remain = MAX_SCHEDULE_TIMEOUT; + if (timeout) { + if (WARN_ON(*timeout < 0)) + return -EINVAL; + + if (*timeout == 0) + return -ETIME; + + /* Record current time in case interrupted, or wedged */ + timeout_remain = nsecs_to_jiffies_timeout(*timeout); + *timeout += ktime_get_raw_ns(); + } + + trace_i915_gem_request_wait_begin(req); + + /* This client is about to stall waiting for the GPU. In many cases + * this is undesirable and limits the throughput of the system, as + * many clients cannot continue processing user input/output whilst + * blocked. RPS autotuning may take tens of milliseconds to respond + * to the GPU load and thus incurs additional latency for the client. + * We can circumvent that by promoting the GPU frequency to maximum + * before we wait. This makes the GPU throttle up much more quickly + * (good for benchmarks and user experience, e.g. window animations), + * but at a cost of spending more power processing the workload + * (bad for battery). Not all clients even want their results + * immediately and for them we should just let the GPU select its own + * frequency to maximise efficiency. To prevent a single client from + * forcing the clocks too high for the whole system, we only allow + * each client to waitboost once in a busy period. + */ + if (INTEL_GEN(req->i915) >= 6) + gen6_rps_boost(req->i915, rps, req->emitted_jiffies); + + /* Optimistic spin for the next ~jiffie before touching IRQs */ + if (i915_spin_request(req, state, 5)) + goto complete; + + set_current_state(state); + add_wait_queue(&req->i915->gpu_error.wait_queue, &reset); + + intel_wait_init(&wait, req->seqno); + if (intel_engine_add_wait(req->engine, &wait)) + /* In order to check that we haven't missed the interrupt + * as we enabled it, we need to kick ourselves to do a + * coherent check on the seqno before we sleep. + */ + goto wakeup; + + for (;;) { + if (signal_pending_state(state, current)) { + ret = -ERESTARTSYS; + break; + } + + timeout_remain = io_schedule_timeout(timeout_remain); + if (timeout_remain == 0) { + ret = -ETIME; + break; + } + + if (intel_wait_complete(&wait)) + break; + + set_current_state(state); + +wakeup: + /* Carefully check if the request is complete, giving time + * for the seqno to be visible following the interrupt. + * We also have to check in case we are kicked by the GPU + * reset in order to drop the struct_mutex. + */ + if (__i915_request_irq_complete(req)) + break; + + /* Only spin if we know the GPU is processing this request */ + if (i915_spin_request(req, state, 2)) + break; + } + remove_wait_queue(&req->i915->gpu_error.wait_queue, &reset); + + intel_engine_remove_wait(req->engine, &wait); + __set_current_state(TASK_RUNNING); +complete: + trace_i915_gem_request_wait_end(req); + + if (timeout) { + *timeout -= ktime_get_raw_ns(); + if (*timeout < 0) + *timeout = 0; + + /* + * Apparently ktime isn't accurate enough and occasionally has a + * bit of mismatch in the jiffies<->nsecs<->ktime loop. So patch + * things up to make the test happy. We allow up to 1 jiffy. + * + * This is a regrssion from the timespec->ktime conversion. + */ + if (ret == -ETIME && *timeout < jiffies_to_usecs(1)*1000) + *timeout = 0; + } + + if (rps && req->seqno == req->engine->last_submitted_seqno) { + /* The GPU is now idle and this client has stalled. + * Since no other client has submitted a request in the + * meantime, assume that this client is the only one + * supplying work to the GPU but is unable to keep that + * work supplied because it is waiting. Since the GPU is + * then never kept fully busy, RPS autoclocking will + * keep the clocks relatively low, causing further delays. + * Compensate by giving the synchronous client credit for + * a waitboost next time. + */ + spin_lock(&req->i915->rps.client_lock); + list_del_init(&rps->link); + spin_unlock(&req->i915->rps.client_lock); + } + + return ret; +} + +/** + * Waits for a request to be signaled, and cleans up the + * request and object lists appropriately for that event. + */ +int i915_wait_request(struct drm_i915_gem_request *req) +{ + int ret; + + GEM_BUG_ON(!req); + lockdep_assert_held(&req->i915->drm.struct_mutex); + + ret = __i915_wait_request(req, req->i915->mm.interruptible, NULL, NULL); + if (ret) + return ret; + + /* If the GPU hung, we want to keep the requests to find the guilty. */ + if (!i915_reset_in_progress(&req->i915->gpu_error)) + i915_gem_request_retire_upto(req); + + return 0; +} + +void i915_gem_request_free(struct kref *req_ref) +{ + struct drm_i915_gem_request *req = + container_of(req_ref, typeof(*req), ref); + kmem_cache_free(req->i915->requests, req); +} diff --git a/drivers/gpu/drm/i915/i915_gem_request.h b/drivers/gpu/drm/i915/i915_gem_request.h new file mode 100644 index 0000000000000000000000000000000000000000..ea700befcc28fdd5552e2a8c9b188222b283b54e --- /dev/null +++ b/drivers/gpu/drm/i915/i915_gem_request.h @@ -0,0 +1,238 @@ +/* + * Copyright © 2008-2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + */ + +#ifndef I915_GEM_REQUEST_H +#define I915_GEM_REQUEST_H + +/** + * Request queue structure. + * + * The request queue allows us to note sequence numbers that have been emitted + * and may be associated with active buffers to be retired. + * + * By keeping this list, we can avoid having to do questionable sequence + * number comparisons on buffer last_read|write_seqno. It also allows an + * emission time to be associated with the request for tracking how far ahead + * of the GPU the submission is. + * + * The requests are reference counted, so upon creation they should have an + * initial reference taken using kref_init + */ +struct drm_i915_gem_request { + struct kref ref; + + /** On Which ring this request was generated */ + struct drm_i915_private *i915; + + /** + * Context and ring buffer related to this request + * Contexts are refcounted, so when this request is associated with a + * context, we must increment the context's refcount, to guarantee that + * it persists while any request is linked to it. Requests themselves + * are also refcounted, so the request will only be freed when the last + * reference to it is dismissed, and the code in + * i915_gem_request_free() will then decrement the refcount on the + * context. + */ + struct i915_gem_context *ctx; + struct intel_engine_cs *engine; + struct intel_ringbuffer *ringbuf; + struct intel_signal_node signaling; + + /** GEM sequence number associated with the previous request, + * when the HWS breadcrumb is equal to this the GPU is processing + * this request. + */ + u32 previous_seqno; + + /** GEM sequence number associated with this request, + * when the HWS breadcrumb is equal or greater than this the GPU + * has finished processing this request. + */ + u32 seqno; + + /** Position in the ringbuffer of the start of the request */ + u32 head; + + /** + * Position in the ringbuffer of the start of the postfix. + * This is required to calculate the maximum available ringbuffer + * space without overwriting the postfix. + */ + u32 postfix; + + /** Position in the ringbuffer of the end of the whole request */ + u32 tail; + + /** Preallocate space in the ringbuffer for the emitting the request */ + u32 reserved_space; + + /** + * Context related to the previous request. + * As the contexts are accessed by the hardware until the switch is + * completed to a new context, the hardware may still be writing + * to the context object after the breadcrumb is visible. We must + * not unpin/unbind/prune that object whilst still active and so + * we keep the previous context pinned until the following (this) + * request is retired. + */ + struct i915_gem_context *previous_context; + + /** Batch buffer related to this request if any (used for + * error state dump only). + */ + struct drm_i915_gem_object *batch_obj; + + /** Time at which this request was emitted, in jiffies. */ + unsigned long emitted_jiffies; + + /** global list entry for this request */ + struct list_head list; + + struct drm_i915_file_private *file_priv; + /** file_priv list entry for this request */ + struct list_head client_list; + + /** process identifier submitting this request */ + struct pid *pid; + + /** + * The ELSP only accepts two elements at a time, so we queue + * context/tail pairs on a given queue (ring->execlist_queue) until the + * hardware is available. The queue serves a double purpose: we also use + * it to keep track of the up to 2 contexts currently in the hardware + * (usually one in execution and the other queued up by the GPU): We + * only remove elements from the head of the queue when the hardware + * informs us that an element has been completed. + * + * All accesses to the queue are mediated by a spinlock + * (ring->execlist_lock). + */ + + /** Execlist link in the submission queue.*/ + struct list_head execlist_link; + + /** Execlists no. of times this request has been sent to the ELSP */ + int elsp_submitted; + + /** Execlists context hardware id. */ + unsigned int ctx_hw_id; +}; + +struct drm_i915_gem_request * __must_check +i915_gem_request_alloc(struct intel_engine_cs *engine, + struct i915_gem_context *ctx); +void i915_gem_request_free(struct kref *req_ref); +int i915_gem_request_add_to_client(struct drm_i915_gem_request *req, + struct drm_file *file); +void i915_gem_request_retire_upto(struct drm_i915_gem_request *req); + +static inline u32 +i915_gem_request_get_seqno(struct drm_i915_gem_request *req) +{ + return req ? req->seqno : 0; +} + +static inline struct intel_engine_cs * +i915_gem_request_get_engine(struct drm_i915_gem_request *req) +{ + return req ? req->engine : NULL; +} + +static inline struct drm_i915_gem_request * +i915_gem_request_reference(struct drm_i915_gem_request *req) +{ + if (req) + kref_get(&req->ref); + return req; +} + +static inline void +i915_gem_request_unreference(struct drm_i915_gem_request *req) +{ + kref_put(&req->ref, i915_gem_request_free); +} + +static inline void i915_gem_request_assign(struct drm_i915_gem_request **pdst, + struct drm_i915_gem_request *src) +{ + if (src) + i915_gem_request_reference(src); + + if (*pdst) + i915_gem_request_unreference(*pdst); + + *pdst = src; +} + +void __i915_add_request(struct drm_i915_gem_request *req, + struct drm_i915_gem_object *batch_obj, + bool flush_caches); +#define i915_add_request(req) \ + __i915_add_request(req, NULL, true) +#define i915_add_request_no_flush(req) \ + __i915_add_request(req, NULL, false) + +struct intel_rps_client; + +int __i915_wait_request(struct drm_i915_gem_request *req, + bool interruptible, + s64 *timeout, + struct intel_rps_client *rps); +int __must_check i915_wait_request(struct drm_i915_gem_request *req); + +static inline u32 intel_engine_get_seqno(struct intel_engine_cs *engine); + +/** + * Returns true if seq1 is later than seq2. + */ +static inline bool i915_seqno_passed(u32 seq1, u32 seq2) +{ + return (s32)(seq1 - seq2) >= 0; +} + +static inline bool +i915_gem_request_started(const struct drm_i915_gem_request *req) +{ + return i915_seqno_passed(intel_engine_get_seqno(req->engine), + req->previous_seqno); +} + +static inline bool +i915_gem_request_completed(const struct drm_i915_gem_request *req) +{ + return i915_seqno_passed(intel_engine_get_seqno(req->engine), + req->seqno); +} + +bool __i915_spin_request(const struct drm_i915_gem_request *request, + int state, unsigned long timeout_us); +static inline bool i915_spin_request(const struct drm_i915_gem_request *request, + int state, unsigned long timeout_us) +{ + return (i915_gem_request_started(request) && + __i915_spin_request(request, state, timeout_us)); +} + +#endif /* I915_GEM_REQUEST_H */