diff --git a/drivers/gpu/drm/vc4/vc4_crtc.c b/drivers/gpu/drm/vc4/vc4_crtc.c index 7a9f4768591eabe54877327bbf6c1cc220c6d525..a3193321df395ab958bfe087b9c050ea61fd7330 100644 --- a/drivers/gpu/drm/vc4/vc4_crtc.c +++ b/drivers/gpu/drm/vc4/vc4_crtc.c @@ -35,6 +35,7 @@ #include "drm_atomic_helper.h" #include "drm_crtc_helper.h" #include "linux/clk.h" +#include "drm_fb_cma_helper.h" #include "linux/component.h" #include "linux/of_device.h" #include "vc4_drv.h" @@ -475,10 +476,106 @@ static irqreturn_t vc4_crtc_irq_handler(int irq, void *data) return ret; } +struct vc4_async_flip_state { + struct drm_crtc *crtc; + struct drm_framebuffer *fb; + struct drm_pending_vblank_event *event; + + struct vc4_seqno_cb cb; +}; + +/* Called when the V3D execution for the BO being flipped to is done, so that + * we can actually update the plane's address to point to it. + */ +static void +vc4_async_page_flip_complete(struct vc4_seqno_cb *cb) +{ + struct vc4_async_flip_state *flip_state = + container_of(cb, struct vc4_async_flip_state, cb); + struct drm_crtc *crtc = flip_state->crtc; + struct drm_device *dev = crtc->dev; + struct vc4_dev *vc4 = to_vc4_dev(dev); + struct drm_plane *plane = crtc->primary; + + vc4_plane_async_set_fb(plane, flip_state->fb); + if (flip_state->event) { + unsigned long flags; + + spin_lock_irqsave(&dev->event_lock, flags); + drm_crtc_send_vblank_event(crtc, flip_state->event); + spin_unlock_irqrestore(&dev->event_lock, flags); + } + + drm_framebuffer_unreference(flip_state->fb); + kfree(flip_state); + + up(&vc4->async_modeset); +} + +/* Implements async (non-vblank-synced) page flips. + * + * The page flip ioctl needs to return immediately, so we grab the + * modeset semaphore on the pipe, and queue the address update for + * when V3D is done with the BO being flipped to. + */ +static int vc4_async_page_flip(struct drm_crtc *crtc, + struct drm_framebuffer *fb, + struct drm_pending_vblank_event *event, + uint32_t flags) +{ + struct drm_device *dev = crtc->dev; + struct vc4_dev *vc4 = to_vc4_dev(dev); + struct drm_plane *plane = crtc->primary; + int ret = 0; + struct vc4_async_flip_state *flip_state; + struct drm_gem_cma_object *cma_bo = drm_fb_cma_get_gem_obj(fb, 0); + struct vc4_bo *bo = to_vc4_bo(&cma_bo->base); + + flip_state = kzalloc(sizeof(*flip_state), GFP_KERNEL); + if (!flip_state) + return -ENOMEM; + + drm_framebuffer_reference(fb); + flip_state->fb = fb; + flip_state->crtc = crtc; + flip_state->event = event; + + /* Make sure all other async modesetes have landed. */ + ret = down_interruptible(&vc4->async_modeset); + if (ret) { + kfree(flip_state); + return ret; + } + + /* Immediately update the plane's legacy fb pointer, so that later + * modeset prep sees the state that will be present when the semaphore + * is released. + */ + drm_atomic_set_fb_for_plane(plane->state, fb); + plane->fb = fb; + + vc4_queue_seqno_cb(dev, &flip_state->cb, bo->seqno, + vc4_async_page_flip_complete); + + /* Driver takes ownership of state on successful async commit. */ + return 0; +} + +static int vc4_page_flip(struct drm_crtc *crtc, + struct drm_framebuffer *fb, + struct drm_pending_vblank_event *event, + uint32_t flags) +{ + if (flags & DRM_MODE_PAGE_FLIP_ASYNC) + return vc4_async_page_flip(crtc, fb, event, flags); + else + return drm_atomic_helper_page_flip(crtc, fb, event, flags); +} + static const struct drm_crtc_funcs vc4_crtc_funcs = { .set_config = drm_atomic_helper_set_config, .destroy = vc4_crtc_destroy, - .page_flip = drm_atomic_helper_page_flip, + .page_flip = vc4_page_flip, .set_property = NULL, .cursor_set = NULL, /* handled by drm_mode_cursor_universal */ .cursor_move = NULL, /* handled by drm_mode_cursor_universal */ diff --git a/drivers/gpu/drm/vc4/vc4_drv.h b/drivers/gpu/drm/vc4/vc4_drv.h index 0bc8c57196acae69984ed09e716f8d24b03fef23..f9927d87369f8338fd4f9d5587747cc15d0a6153 100644 --- a/drivers/gpu/drm/vc4/vc4_drv.h +++ b/drivers/gpu/drm/vc4/vc4_drv.h @@ -76,6 +76,11 @@ struct vc4_dev { wait_queue_head_t job_wait_queue; struct work_struct job_done_work; + /* List of struct vc4_seqno_cb for callbacks to be made from a + * workqueue when the given seqno is passed. + */ + struct list_head seqno_cb_list; + /* The binner overflow memory that's currently set up in * BPOA/BPOS registers. When overflow occurs and a new one is * allocated, the previous one will be moved to @@ -128,6 +133,12 @@ to_vc4_bo(struct drm_gem_object *bo) return (struct vc4_bo *)bo; } +struct vc4_seqno_cb { + struct work_struct work; + uint64_t seqno; + void (*func)(struct vc4_seqno_cb *cb); +}; + struct vc4_v3d { struct platform_device *pdev; void __iomem *regs; @@ -384,6 +395,9 @@ void vc4_submit_next_job(struct drm_device *dev); int vc4_wait_for_seqno(struct drm_device *dev, uint64_t seqno, uint64_t timeout_ns, bool interruptible); void vc4_job_handle_completed(struct vc4_dev *vc4); +int vc4_queue_seqno_cb(struct drm_device *dev, + struct vc4_seqno_cb *cb, uint64_t seqno, + void (*func)(struct vc4_seqno_cb *cb)); /* vc4_hdmi.c */ extern struct platform_driver vc4_hdmi_driver; @@ -409,6 +423,8 @@ struct drm_plane *vc4_plane_init(struct drm_device *dev, enum drm_plane_type type); u32 vc4_plane_write_dlist(struct drm_plane *plane, u32 __iomem *dlist); u32 vc4_plane_dlist_size(struct drm_plane_state *state); +void vc4_plane_async_set_fb(struct drm_plane *plane, + struct drm_framebuffer *fb); /* vc4_v3d.c */ extern struct platform_driver vc4_v3d_driver; diff --git a/drivers/gpu/drm/vc4/vc4_gem.c b/drivers/gpu/drm/vc4/vc4_gem.c index 936dddfa890f29e2d866e520ab969b357ec82b1e..5fb0556e001e4b1b83226157902d38569aa9fc4f 100644 --- a/drivers/gpu/drm/vc4/vc4_gem.c +++ b/drivers/gpu/drm/vc4/vc4_gem.c @@ -461,6 +461,7 @@ void vc4_job_handle_completed(struct vc4_dev *vc4) { unsigned long irqflags; + struct vc4_seqno_cb *cb, *cb_temp; spin_lock_irqsave(&vc4->job_lock, irqflags); while (!list_empty(&vc4->job_done_list)) { @@ -473,7 +474,45 @@ vc4_job_handle_completed(struct vc4_dev *vc4) vc4_complete_exec(vc4->dev, exec); spin_lock_irqsave(&vc4->job_lock, irqflags); } + + list_for_each_entry_safe(cb, cb_temp, &vc4->seqno_cb_list, work.entry) { + if (cb->seqno <= vc4->finished_seqno) { + list_del_init(&cb->work.entry); + schedule_work(&cb->work); + } + } + + spin_unlock_irqrestore(&vc4->job_lock, irqflags); +} + +static void vc4_seqno_cb_work(struct work_struct *work) +{ + struct vc4_seqno_cb *cb = container_of(work, struct vc4_seqno_cb, work); + + cb->func(cb); +} + +int vc4_queue_seqno_cb(struct drm_device *dev, + struct vc4_seqno_cb *cb, uint64_t seqno, + void (*func)(struct vc4_seqno_cb *cb)) +{ + struct vc4_dev *vc4 = to_vc4_dev(dev); + int ret = 0; + unsigned long irqflags; + + cb->func = func; + INIT_WORK(&cb->work, vc4_seqno_cb_work); + + spin_lock_irqsave(&vc4->job_lock, irqflags); + if (seqno > vc4->finished_seqno) { + cb->seqno = seqno; + list_add_tail(&cb->work.entry, &vc4->seqno_cb_list); + } else { + schedule_work(&cb->work); + } spin_unlock_irqrestore(&vc4->job_lock, irqflags); + + return ret; } /* Scheduled when any job has been completed, this walks the list of @@ -610,6 +649,7 @@ vc4_gem_init(struct drm_device *dev) INIT_LIST_HEAD(&vc4->job_list); INIT_LIST_HEAD(&vc4->job_done_list); + INIT_LIST_HEAD(&vc4->seqno_cb_list); spin_lock_init(&vc4->job_lock); INIT_WORK(&vc4->hangcheck.reset_work, vc4_reset_work); diff --git a/drivers/gpu/drm/vc4/vc4_kms.c b/drivers/gpu/drm/vc4/vc4_kms.c index 2e5597d10cc629e08ccde726a64d6e36da69a48f..f95f2df5f8d1a52031a35dd26b03331c513fe13e 100644 --- a/drivers/gpu/drm/vc4/vc4_kms.c +++ b/drivers/gpu/drm/vc4/vc4_kms.c @@ -15,6 +15,7 @@ */ #include "drm_crtc.h" +#include "drm_atomic.h" #include "drm_atomic_helper.h" #include "drm_crtc_helper.h" #include "drm_plane_helper.h" @@ -29,10 +30,152 @@ static void vc4_output_poll_changed(struct drm_device *dev) drm_fbdev_cma_hotplug_event(vc4->fbdev); } +struct vc4_commit { + struct drm_device *dev; + struct drm_atomic_state *state; + struct vc4_seqno_cb cb; +}; + +static void +vc4_atomic_complete_commit(struct vc4_commit *c) +{ + struct drm_atomic_state *state = c->state; + struct drm_device *dev = state->dev; + struct vc4_dev *vc4 = to_vc4_dev(dev); + + drm_atomic_helper_commit_modeset_disables(dev, state); + + drm_atomic_helper_commit_planes(dev, state, false); + + drm_atomic_helper_commit_modeset_enables(dev, state); + + drm_atomic_helper_wait_for_vblanks(dev, state); + + drm_atomic_helper_cleanup_planes(dev, state); + + drm_atomic_state_free(state); + + up(&vc4->async_modeset); + + kfree(c); +} + +static void +vc4_atomic_complete_commit_seqno_cb(struct vc4_seqno_cb *cb) +{ + struct vc4_commit *c = container_of(cb, struct vc4_commit, cb); + + vc4_atomic_complete_commit(c); +} + +static struct vc4_commit *commit_init(struct drm_atomic_state *state) +{ + struct vc4_commit *c = kzalloc(sizeof(*c), GFP_KERNEL); + + if (!c) + return NULL; + c->dev = state->dev; + c->state = state; + + return c; +} + +/** + * vc4_atomic_commit - commit validated state object + * @dev: DRM device + * @state: the driver state object + * @async: asynchronous commit + * + * This function commits a with drm_atomic_helper_check() pre-validated state + * object. This can still fail when e.g. the framebuffer reservation fails. For + * now this doesn't implement asynchronous commits. + * + * RETURNS + * Zero for success or -errno. + */ +static int vc4_atomic_commit(struct drm_device *dev, + struct drm_atomic_state *state, + bool async) +{ + struct vc4_dev *vc4 = to_vc4_dev(dev); + int ret; + int i; + uint64_t wait_seqno = 0; + struct vc4_commit *c; + + c = commit_init(state); + if (!c) + return -ENOMEM; + + /* Make sure that any outstanding modesets have finished. */ + ret = down_interruptible(&vc4->async_modeset); + if (ret) { + kfree(c); + return ret; + } + + ret = drm_atomic_helper_prepare_planes(dev, state); + if (ret) { + kfree(c); + up(&vc4->async_modeset); + return ret; + } + + for (i = 0; i < dev->mode_config.num_total_plane; i++) { + struct drm_plane *plane = state->planes[i]; + struct drm_plane_state *new_state = state->plane_states[i]; + + if (!plane) + continue; + + if ((plane->state->fb != new_state->fb) && new_state->fb) { + struct drm_gem_cma_object *cma_bo = + drm_fb_cma_get_gem_obj(new_state->fb, 0); + struct vc4_bo *bo = to_vc4_bo(&cma_bo->base); + + wait_seqno = max(bo->seqno, wait_seqno); + } + } + + /* + * This is the point of no return - everything below never fails except + * when the hw goes bonghits. Which means we can commit the new state on + * the software side now. + */ + + drm_atomic_helper_swap_state(dev, state); + + /* + * Everything below can be run asynchronously without the need to grab + * any modeset locks at all under one condition: It must be guaranteed + * that the asynchronous work has either been cancelled (if the driver + * supports it, which at least requires that the framebuffers get + * cleaned up with drm_atomic_helper_cleanup_planes()) or completed + * before the new state gets committed on the software side with + * drm_atomic_helper_swap_state(). + * + * This scheme allows new atomic state updates to be prepared and + * checked in parallel to the asynchronous completion of the previous + * update. Which is important since compositors need to figure out the + * composition of the next frame right after having submitted the + * current layout. + */ + + if (async) { + vc4_queue_seqno_cb(dev, &c->cb, wait_seqno, + vc4_atomic_complete_commit_seqno_cb); + } else { + vc4_wait_for_seqno(dev, wait_seqno, ~0ull, false); + vc4_atomic_complete_commit(c); + } + + return 0; +} + static const struct drm_mode_config_funcs vc4_mode_funcs = { .output_poll_changed = vc4_output_poll_changed, .atomic_check = drm_atomic_helper_check, - .atomic_commit = drm_atomic_helper_commit, + .atomic_commit = vc4_atomic_commit, .fb_create = drm_fb_cma_create, }; @@ -41,6 +184,8 @@ int vc4_kms_load(struct drm_device *dev) struct vc4_dev *vc4 = to_vc4_dev(dev); int ret; + sema_init(&vc4->async_modeset, 1); + ret = drm_vblank_init(dev, dev->mode_config.num_crtc); if (ret < 0) { dev_err(dev->dev, "failed to initialize vblank\n"); @@ -51,6 +196,8 @@ int vc4_kms_load(struct drm_device *dev) dev->mode_config.max_height = 2048; dev->mode_config.funcs = &vc4_mode_funcs; dev->mode_config.preferred_depth = 24; + dev->mode_config.async_page_flip = true; + dev->vblank_disable_allowed = true; drm_mode_config_reset(dev); diff --git a/drivers/gpu/drm/vc4/vc4_plane.c b/drivers/gpu/drm/vc4/vc4_plane.c index cdd8b10c014742cb8079837e2723163afb8d04c1..db32c37335b1ba46b22f04dcc40e0eb53fd7669b 100644 --- a/drivers/gpu/drm/vc4/vc4_plane.c +++ b/drivers/gpu/drm/vc4/vc4_plane.c @@ -29,6 +29,14 @@ struct vc4_plane_state { u32 *dlist; u32 dlist_size; /* Number of dwords in allocated for the display list */ u32 dlist_count; /* Number of used dwords in the display list. */ + + /* Offset in the dlist to pointer word 0. */ + u32 pw0_offset; + + /* Offset where the plane's dlist was last stored in the + hardware at vc4_crtc_atomic_flush() time. + */ + u32 *hw_dlist; }; static inline struct vc4_plane_state * @@ -197,6 +205,8 @@ static int vc4_plane_mode_set(struct drm_plane *plane, /* Position Word 3: Context. Written by the HVS. */ vc4_dlist_write(vc4_state, 0xc0c0c0c0); + vc4_state->pw0_offset = vc4_state->dlist_count; + /* Pointer Word 0: RGB / Y Pointer */ vc4_dlist_write(vc4_state, bo->paddr + offset); @@ -248,6 +258,8 @@ u32 vc4_plane_write_dlist(struct drm_plane *plane, u32 __iomem *dlist) struct vc4_plane_state *vc4_state = to_vc4_plane_state(plane->state); int i; + vc4_state->hw_dlist = dlist; + /* Can't memcpy_toio() because it needs to be 32-bit writes. */ for (i = 0; i < vc4_state->dlist_count; i++) writel(vc4_state->dlist[i], &dlist[i]); @@ -262,6 +274,34 @@ u32 vc4_plane_dlist_size(struct drm_plane_state *state) return vc4_state->dlist_count; } +/* Updates the plane to immediately (well, once the FIFO needs + * refilling) scan out from at a new framebuffer. + */ +void vc4_plane_async_set_fb(struct drm_plane *plane, struct drm_framebuffer *fb) +{ + struct vc4_plane_state *vc4_state = to_vc4_plane_state(plane->state); + struct drm_gem_cma_object *bo = drm_fb_cma_get_gem_obj(fb, 0); + uint32_t addr; + + /* We're skipping the address adjustment for negative origin, + * because this is only called on the primary plane. + */ + WARN_ON_ONCE(plane->state->crtc_x < 0 || plane->state->crtc_y < 0); + addr = bo->paddr + fb->offsets[0]; + + /* Write the new address into the hardware immediately. The + * scanout will start from this address as soon as the FIFO + * needs to refill with pixels. + */ + writel(addr, &vc4_state->hw_dlist[vc4_state->pw0_offset]); + + /* Also update the CPU-side dlist copy, so that any later + * atomic updates that don't do a new modeset on our plane + * also use our updated address. + */ + vc4_state->dlist[vc4_state->pw0_offset] = addr; +} + static const struct drm_plane_helper_funcs vc4_plane_helper_funcs = { .prepare_fb = NULL, .cleanup_fb = NULL,