From 6b95a207c1fd552e7d017837c5eaf1b0173a48c9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kristian=20H=C3=B8gsberg?= Date: Wed, 18 Nov 2009 11:25:18 -0500 Subject: [PATCH] drm/i915: Add intel implementation of the pageflip ioctl MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Acked-by: Jakob Bornecrantz Acked-by: Thomas Hellström Review-by: Chris Wilson Signed-off-by: Jesse "Orange Smoothie" Barnes Signed-off-by: Kristian Høgsberg Signed-off-by: Eric Anholt --- drivers/gpu/drm/i915/i915_drv.h | 12 ++ drivers/gpu/drm/i915/i915_gem.c | 64 +++++++- drivers/gpu/drm/i915/i915_irq.c | 19 ++- drivers/gpu/drm/i915/i915_reg.h | 2 + drivers/gpu/drm/i915/intel_display.c | 228 +++++++++++++++++++++++---- drivers/gpu/drm/i915/intel_drv.h | 4 + 6 files changed, 291 insertions(+), 38 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 450dcf0d25c8..ca1ba42af566 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -536,6 +536,10 @@ typedef struct drm_i915_private { /* indicate whether the LVDS_BORDER should be enabled or not */ unsigned int lvds_border_bits; + struct drm_crtc *plane_to_crtc_mapping[2]; + struct drm_crtc *pipe_to_crtc_mapping[2]; + wait_queue_head_t pending_flip_queue; + /* Reclocking support */ bool render_reclock_avail; bool lvds_downclock_avail; @@ -639,6 +643,13 @@ struct drm_i915_gem_object { * Advice: are the backing pages purgeable? */ int madv; + + /** + * Number of crtcs where this object is currently the fb, but + * will be page flipped away on the next vblank. When it + * reaches 0, dev_priv->pending_flip_queue will be woken up. + */ + atomic_t pending_flip; }; /** @@ -830,6 +841,7 @@ void i915_gem_free_all_phys_object(struct drm_device *dev); int i915_gem_object_get_pages(struct drm_gem_object *obj); void i915_gem_object_put_pages(struct drm_gem_object *obj); void i915_gem_release(struct drm_device * dev, struct drm_file *file_priv); +void i915_gem_object_flush_write_domain(struct drm_gem_object *obj); void i915_gem_shrinker_init(void); void i915_gem_shrinker_exit(void); diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 481c0ab888c8..214fb1864710 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -2771,6 +2771,22 @@ i915_gem_object_flush_cpu_write_domain(struct drm_gem_object *obj) old_write_domain); } +void +i915_gem_object_flush_write_domain(struct drm_gem_object *obj) +{ + switch (obj->write_domain) { + case I915_GEM_DOMAIN_GTT: + i915_gem_object_flush_gtt_write_domain(obj); + break; + case I915_GEM_DOMAIN_CPU: + i915_gem_object_flush_cpu_write_domain(obj); + break; + default: + i915_gem_object_flush_gpu_write_domain(obj); + break; + } +} + /** * Moves a single object to the GTT read, and possibly write domain. * @@ -3536,6 +3552,41 @@ i915_gem_check_execbuffer (struct drm_i915_gem_execbuffer *exec, return 0; } +static int +i915_gem_wait_for_pending_flip(struct drm_device *dev, + struct drm_gem_object **object_list, + int count) +{ + drm_i915_private_t *dev_priv = dev->dev_private; + struct drm_i915_gem_object *obj_priv; + DEFINE_WAIT(wait); + int i, ret = 0; + + for (;;) { + prepare_to_wait(&dev_priv->pending_flip_queue, + &wait, TASK_INTERRUPTIBLE); + for (i = 0; i < count; i++) { + obj_priv = object_list[i]->driver_private; + if (atomic_read(&obj_priv->pending_flip) > 0) + break; + } + if (i == count) + break; + + if (!signal_pending(current)) { + mutex_unlock(&dev->struct_mutex); + schedule(); + mutex_lock(&dev->struct_mutex); + continue; + } + ret = -ERESTARTSYS; + break; + } + finish_wait(&dev_priv->pending_flip_queue, &wait); + + return ret; +} + int i915_gem_execbuffer(struct drm_device *dev, void *data, struct drm_file *file_priv) @@ -3551,7 +3602,7 @@ i915_gem_execbuffer(struct drm_device *dev, void *data, int ret, ret2, i, pinned = 0; uint64_t exec_offset; uint32_t seqno, flush_domains, reloc_index; - int pin_tries; + int pin_tries, flips; #if WATCH_EXEC DRM_INFO("buffers_ptr %d buffer_count %d len %08x\n", @@ -3623,6 +3674,7 @@ i915_gem_execbuffer(struct drm_device *dev, void *data, } /* Look up object handles */ + flips = 0; for (i = 0; i < args->buffer_count; i++) { object_list[i] = drm_gem_object_lookup(dev, file_priv, exec_list[i].handle); @@ -3641,6 +3693,14 @@ i915_gem_execbuffer(struct drm_device *dev, void *data, goto err; } obj_priv->in_execbuffer = true; + flips += atomic_read(&obj_priv->pending_flip); + } + + if (flips > 0) { + ret = i915_gem_wait_for_pending_flip(dev, object_list, + args->buffer_count); + if (ret) + goto err; } /* Pin and relocate */ @@ -4625,8 +4685,8 @@ i915_gem_load(struct drm_device *dev) for (i = 0; i < 8; i++) I915_WRITE(FENCE_REG_945_8 + (i * 4), 0); } - i915_gem_detect_bit_6_swizzle(dev); + init_waitqueue_head(&dev_priv->pending_flip_queue); } /* diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c index 77bc1d28f744..e2d01b3fa171 100644 --- a/drivers/gpu/drm/i915/i915_irq.c +++ b/drivers/gpu/drm/i915/i915_irq.c @@ -43,10 +43,13 @@ * we leave them always unmasked in IMR and then control enabling them through * PIPESTAT alone. */ -#define I915_INTERRUPT_ENABLE_FIX (I915_ASLE_INTERRUPT | \ - I915_DISPLAY_PIPE_A_EVENT_INTERRUPT | \ - I915_DISPLAY_PIPE_B_EVENT_INTERRUPT | \ - I915_RENDER_COMMAND_PARSER_ERROR_INTERRUPT) +#define I915_INTERRUPT_ENABLE_FIX \ + (I915_ASLE_INTERRUPT | \ + I915_DISPLAY_PIPE_A_EVENT_INTERRUPT | \ + I915_DISPLAY_PIPE_B_EVENT_INTERRUPT | \ + I915_DISPLAY_PLANE_A_FLIP_PENDING_INTERRUPT | \ + I915_DISPLAY_PLANE_B_FLIP_PENDING_INTERRUPT | \ + I915_RENDER_COMMAND_PARSER_ERROR_INTERRUPT) /** Interrupts that we mask and unmask at runtime. */ #define I915_INTERRUPT_ENABLE_VAR (I915_USER_INTERRUPT) @@ -643,14 +646,22 @@ irqreturn_t i915_driver_irq_handler(DRM_IRQ_ARGS) mod_timer(&dev_priv->hangcheck_timer, jiffies + DRM_I915_HANGCHECK_PERIOD); } + if (iir & I915_DISPLAY_PLANE_A_FLIP_PENDING_INTERRUPT) + intel_prepare_page_flip(dev, 0); + + if (iir & I915_DISPLAY_PLANE_B_FLIP_PENDING_INTERRUPT) + intel_prepare_page_flip(dev, 1); + if (pipea_stats & vblank_status) { vblank++; drm_handle_vblank(dev, 0); + intel_finish_page_flip(dev, 0); } if (pipeb_stats & vblank_status) { vblank++; drm_handle_vblank(dev, 1); + intel_finish_page_flip(dev, 1); } if ((pipeb_stats & I915_LEGACY_BLC_EVENT_STATUS) || diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index d58f7ad91161..120c77dabcff 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -157,6 +157,8 @@ #define MI_OVERLAY_ON (0x1<<21) #define MI_OVERLAY_OFF (0x2<<21) #define MI_LOAD_SCAN_LINES_INCL MI_INSTR(0x12, 0) +#define MI_DISPLAY_FLIP MI_INSTR(0x14, 2) +#define MI_DISPLAY_FLIP_PLANE(n) ((n) << 20) #define MI_STORE_DWORD_IMM MI_INSTR(0x20, 1) #define MI_MEM_VIRTUAL (1 << 22) /* 965+ only */ #define MI_STORE_DWORD_INDEX MI_INSTR(0x21, 1) diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index 002c07daf9b8..b63a25f0f86d 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -1190,6 +1190,51 @@ static void intel_update_fbc(struct drm_crtc *crtc, dev_priv->display.disable_fbc(dev); } +static int +intel_pin_and_fence_fb_obj(struct drm_device *dev, struct drm_gem_object *obj) +{ + struct drm_i915_gem_object *obj_priv = obj->driver_private; + u32 alignment; + int ret; + + switch (obj_priv->tiling_mode) { + case I915_TILING_NONE: + alignment = 64 * 1024; + break; + case I915_TILING_X: + /* pin() will align the object as required by fence */ + alignment = 0; + break; + case I915_TILING_Y: + /* FIXME: Is this true? */ + DRM_ERROR("Y tiled not allowed for scan out buffers\n"); + return -EINVAL; + default: + BUG(); + } + + alignment = 256 * 1024; + ret = i915_gem_object_pin(obj, alignment); + if (ret != 0) + return ret; + + /* Install a fence for tiled scan-out. Pre-i965 always needs a + * fence, whereas 965+ only requires a fence if using + * framebuffer compression. For simplicity, we always install + * a fence as the cost is not that onerous. + */ + if (obj_priv->fence_reg == I915_FENCE_REG_NONE && + obj_priv->tiling_mode != I915_TILING_NONE) { + ret = i915_gem_object_get_fence_reg(obj); + if (ret != 0) { + i915_gem_object_unpin(obj); + return ret; + } + } + + return 0; +} + static int intel_pipe_set_base(struct drm_crtc *crtc, int x, int y, struct drm_framebuffer *old_fb) @@ -1209,7 +1254,7 @@ intel_pipe_set_base(struct drm_crtc *crtc, int x, int y, int dspstride = (plane == 0) ? DSPASTRIDE : DSPBSTRIDE; int dsptileoff = (plane == 0 ? DSPATILEOFF : DSPBTILEOFF); int dspcntr_reg = (plane == 0) ? DSPACNTR : DSPBCNTR; - u32 dspcntr, alignment; + u32 dspcntr; int ret; /* no fb bound */ @@ -1231,24 +1276,8 @@ intel_pipe_set_base(struct drm_crtc *crtc, int x, int y, obj = intel_fb->obj; obj_priv = obj->driver_private; - switch (obj_priv->tiling_mode) { - case I915_TILING_NONE: - alignment = 64 * 1024; - break; - case I915_TILING_X: - /* pin() will align the object as required by fence */ - alignment = 0; - break; - case I915_TILING_Y: - /* FIXME: Is this true? */ - DRM_ERROR("Y tiled not allowed for scan out buffers\n"); - return -EINVAL; - default: - BUG(); - } - mutex_lock(&dev->struct_mutex); - ret = i915_gem_object_pin(obj, alignment); + ret = intel_pin_and_fence_fb_obj(dev, obj); if (ret != 0) { mutex_unlock(&dev->struct_mutex); return ret; @@ -1261,20 +1290,6 @@ intel_pipe_set_base(struct drm_crtc *crtc, int x, int y, return ret; } - /* Install a fence for tiled scan-out. Pre-i965 always needs a fence, - * whereas 965+ only requires a fence if using framebuffer compression. - * For simplicity, we always install a fence as the cost is not that onerous. - */ - if (obj_priv->fence_reg == I915_FENCE_REG_NONE && - obj_priv->tiling_mode != I915_TILING_NONE) { - ret = i915_gem_object_get_fence_reg(obj); - if (ret != 0) { - i915_gem_object_unpin(obj); - mutex_unlock(&dev->struct_mutex); - return ret; - } - } - dspcntr = I915_READ(dspcntr_reg); /* Mask out pixel format bits in case we change it */ dspcntr &= ~DISPPLANE_PIXFORMAT_MASK; @@ -4068,6 +4083,153 @@ static void intel_crtc_destroy(struct drm_crtc *crtc) kfree(intel_crtc); } +struct intel_unpin_work { + struct work_struct work; + struct drm_device *dev; + struct drm_gem_object *obj; + struct drm_pending_vblank_event *event; + int pending; +}; + +static void intel_unpin_work_fn(struct work_struct *__work) +{ + struct intel_unpin_work *work = + container_of(__work, struct intel_unpin_work, work); + + mutex_lock(&work->dev->struct_mutex); + i915_gem_object_unpin(work->obj); + drm_gem_object_unreference(work->obj); + mutex_unlock(&work->dev->struct_mutex); + kfree(work); +} + +void intel_finish_page_flip(struct drm_device *dev, int pipe) +{ + drm_i915_private_t *dev_priv = dev->dev_private; + struct drm_crtc *crtc = dev_priv->pipe_to_crtc_mapping[pipe]; + struct intel_crtc *intel_crtc = to_intel_crtc(crtc); + struct intel_unpin_work *work; + struct drm_i915_gem_object *obj_priv; + struct drm_pending_vblank_event *e; + struct timeval now; + unsigned long flags; + + /* Ignore early vblank irqs */ + if (intel_crtc == NULL) + return; + + spin_lock_irqsave(&dev->event_lock, flags); + work = intel_crtc->unpin_work; + if (work == NULL || !work->pending) { + spin_unlock_irqrestore(&dev->event_lock, flags); + return; + } + + intel_crtc->unpin_work = NULL; + drm_vblank_put(dev, intel_crtc->pipe); + + if (work->event) { + e = work->event; + do_gettimeofday(&now); + e->event.sequence = drm_vblank_count(dev, intel_crtc->pipe); + e->event.tv_sec = now.tv_sec; + e->event.tv_usec = now.tv_usec; + list_add_tail(&e->base.link, + &e->base.file_priv->event_list); + wake_up_interruptible(&e->base.file_priv->event_wait); + } + + spin_unlock_irqrestore(&dev->event_lock, flags); + + obj_priv = work->obj->driver_private; + if (atomic_dec_and_test(&obj_priv->pending_flip)) + DRM_WAKEUP(&dev_priv->pending_flip_queue); + schedule_work(&work->work); +} + +void intel_prepare_page_flip(struct drm_device *dev, int plane) +{ + drm_i915_private_t *dev_priv = dev->dev_private; + struct intel_crtc *intel_crtc = + to_intel_crtc(dev_priv->plane_to_crtc_mapping[plane]); + unsigned long flags; + + spin_lock_irqsave(&dev->event_lock, flags); + if (intel_crtc->unpin_work) + intel_crtc->unpin_work->pending = 1; + spin_unlock_irqrestore(&dev->event_lock, flags); +} + +static int intel_crtc_page_flip(struct drm_crtc *crtc, + struct drm_framebuffer *fb, + struct drm_pending_vblank_event *event) +{ + struct drm_device *dev = crtc->dev; + struct drm_i915_private *dev_priv = dev->dev_private; + struct intel_framebuffer *intel_fb; + struct drm_i915_gem_object *obj_priv; + struct drm_gem_object *obj; + struct intel_crtc *intel_crtc = to_intel_crtc(crtc); + struct intel_unpin_work *work; + unsigned long flags; + int ret; + RING_LOCALS; + + work = kzalloc(sizeof *work, GFP_KERNEL); + if (work == NULL) + return -ENOMEM; + + mutex_lock(&dev->struct_mutex); + + work->event = event; + work->dev = crtc->dev; + intel_fb = to_intel_framebuffer(crtc->fb); + work->obj = intel_fb->obj; + INIT_WORK(&work->work, intel_unpin_work_fn); + + /* We borrow the event spin lock for protecting unpin_work */ + spin_lock_irqsave(&dev->event_lock, flags); + if (intel_crtc->unpin_work) { + spin_unlock_irqrestore(&dev->event_lock, flags); + kfree(work); + mutex_unlock(&dev->struct_mutex); + return -EBUSY; + } + intel_crtc->unpin_work = work; + spin_unlock_irqrestore(&dev->event_lock, flags); + + intel_fb = to_intel_framebuffer(fb); + obj = intel_fb->obj; + + ret = intel_pin_and_fence_fb_obj(dev, obj); + if (ret != 0) { + kfree(work); + mutex_unlock(&dev->struct_mutex); + return ret; + } + + /* Reference the old fb object for the scheduled work. */ + drm_gem_object_reference(work->obj); + + crtc->fb = fb; + i915_gem_object_flush_write_domain(obj); + drm_vblank_get(dev, intel_crtc->pipe); + obj_priv = obj->driver_private; + atomic_inc(&obj_priv->pending_flip); + + BEGIN_LP_RING(4); + OUT_RING(MI_DISPLAY_FLIP | + MI_DISPLAY_FLIP_PLANE(intel_crtc->plane)); + OUT_RING(fb->pitch); + OUT_RING(obj_priv->gtt_offset | obj_priv->tiling_mode); + OUT_RING((fb->width << 16) | fb->height); + ADVANCE_LP_RING(); + + mutex_unlock(&dev->struct_mutex); + + return 0; +} + static const struct drm_crtc_helper_funcs intel_helper_funcs = { .dpms = intel_crtc_dpms, .mode_fixup = intel_crtc_mode_fixup, @@ -4084,12 +4246,14 @@ static const struct drm_crtc_funcs intel_crtc_funcs = { .gamma_set = intel_crtc_gamma_set, .set_config = drm_crtc_helper_set_config, .destroy = intel_crtc_destroy, + .page_flip = intel_crtc_page_flip, }; static void intel_crtc_init(struct drm_device *dev, int pipe) { struct intel_crtc *intel_crtc; + struct drm_i915_private *dev_priv = dev->dev_private; int i; intel_crtc = kzalloc(sizeof(struct intel_crtc) + (INTELFB_CONN_LIMIT * sizeof(struct drm_connector *)), GFP_KERNEL); diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h index 497240581c6a..8a22f2508899 100644 --- a/drivers/gpu/drm/i915/intel_drv.h +++ b/drivers/gpu/drm/i915/intel_drv.h @@ -148,6 +148,7 @@ struct intel_crtc { struct timer_list idle_timer; bool lowfreq_avail; struct intel_overlay *overlay; + struct intel_unpin_work *unpin_work; }; #define to_intel_crtc(x) container_of(x, struct intel_crtc, base) @@ -211,6 +212,9 @@ extern int intel_framebuffer_create(struct drm_device *dev, struct drm_framebuffer **fb, struct drm_gem_object *obj); +extern void intel_prepare_page_flip(struct drm_device *dev, int plane); +extern void intel_finish_page_flip(struct drm_device *dev, int pipe); + extern void intel_setup_overlay(struct drm_device *dev); extern void intel_cleanup_overlay(struct drm_device *dev); extern int intel_overlay_switch_off(struct intel_overlay *overlay); -- GitLab