From 1758b90e38f53b93821c908201826e825a37cb65 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Thu, 30 Jun 2016 15:32:44 +0100 Subject: [PATCH] drm/i915: Use a hybrid scheme for fast register waits MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Ville Syrjälä reported that in the majority of wait_for(I915_READ()) he inspect, most completed within the first couple of reads and that the delay between those wait_for() reads was the ratelimiting step for many code paths. For example, __gen6_update_ring_freq() was blamed for slowing down boot by many milliseconds, but under Ville's scrutiny the issue was just excessive delay waiting for sandybridge_pcode_write(). We can eliminate the wait by initially using a busyspin upon the register read and only fallback to the sleeping loop in cases where the hardware is indeed too slow. A threshold of 2 microseconds is used as the initial ballpark. To avoid excessive code bloating from converting every wait_for() into a hybrid busy/sleep loop, we extend wait_for_register_fw() and export it for use by other callers. Signed-off-by: Chris Wilson Cc: Mika Kuoppala Cc: Ville Syrjälä Cc: Tvrtko Ursulin Link: http://patchwork.freedesktop.org/patch/msgid/1467297225-21379-1-git-send-email-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_drv.h | 11 ++++ drivers/gpu/drm/i915/intel_uncore.c | 83 +++++++++++++++++++++++++---- 2 files changed, 83 insertions(+), 11 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 82b20e14b065..48d30676455e 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -2968,6 +2968,17 @@ u64 intel_uncore_edram_size(struct drm_i915_private *dev_priv); void assert_forcewakes_inactive(struct drm_i915_private *dev_priv); +int intel_wait_for_register(struct drm_i915_private *dev_priv, + i915_reg_t reg, + const u32 mask, + const u32 value, + const unsigned long timeout_ms); +int intel_wait_for_register_fw(struct drm_i915_private *dev_priv, + i915_reg_t reg, + const u32 mask, + const u32 value, + const unsigned long timeout_ms); + static inline bool intel_gvt_active(struct drm_i915_private *dev_priv) { return dev_priv->gvt.initialized; diff --git a/drivers/gpu/drm/i915/intel_uncore.c b/drivers/gpu/drm/i915/intel_uncore.c index c1ca458d688e..4c166f6550be 100644 --- a/drivers/gpu/drm/i915/intel_uncore.c +++ b/drivers/gpu/drm/i915/intel_uncore.c @@ -1609,13 +1609,74 @@ static int gen6_reset_engines(struct drm_i915_private *dev_priv, return ret; } -static int wait_for_register_fw(struct drm_i915_private *dev_priv, - i915_reg_t reg, - const u32 mask, - const u32 value, - const unsigned long timeout_ms) +/** + * intel_wait_for_register_fw - wait until register matches expected state + * @dev_priv: the i915 device + * @reg: the register to read + * @mask: mask to apply to register value + * @value: expected value + * @timeout_ms: timeout in millisecond + * + * This routine waits until the target register @reg contains the expected + * @value after applying the @mask, i.e. it waits until + * (I915_READ_FW(@reg) & @mask) == @value + * Otherwise, the wait will timeout after @timeout_ms milliseconds. + * + * Note that this routine assumes the caller holds forcewake asserted, it is + * not suitable for very long waits. See intel_wait_for_register() if you + * wish to wait without holding forcewake for the duration (i.e. you expect + * the wait to be slow). + * + * Returns 0 if the register matches the desired condition, or -ETIMEOUT. + */ +int intel_wait_for_register_fw(struct drm_i915_private *dev_priv, + i915_reg_t reg, + const u32 mask, + const u32 value, + const unsigned long timeout_ms) +{ +#define done ((I915_READ_FW(reg) & mask) == value) + int ret = wait_for_us(done, 2); + if (ret) + ret = wait_for(done, timeout_ms); + return ret; +#undef done +} + +/** + * intel_wait_for_register - wait until register matches expected state + * @dev_priv: the i915 device + * @reg: the register to read + * @mask: mask to apply to register value + * @value: expected value + * @timeout_ms: timeout in millisecond + * + * This routine waits until the target register @reg contains the expected + * @value after applying the @mask, i.e. it waits until + * (I915_READ(@reg) & @mask) == @value + * Otherwise, the wait will timeout after @timeout_ms milliseconds. + * + * Returns 0 if the register matches the desired condition, or -ETIMEOUT. + */ +int intel_wait_for_register(struct drm_i915_private *dev_priv, + i915_reg_t reg, + const u32 mask, + const u32 value, + const unsigned long timeout_ms) { - return wait_for((I915_READ_FW(reg) & mask) == value, timeout_ms); + + unsigned fw = + intel_uncore_forcewake_for_reg(dev_priv, reg, FW_REG_READ); + int ret; + + intel_uncore_forcewake_get(dev_priv, fw); + ret = wait_for_us((I915_READ_FW(reg) & mask) == value, 2); + intel_uncore_forcewake_put(dev_priv, fw); + if (ret) + ret = wait_for((I915_READ_NOTRACE(reg) & mask) == value, + timeout_ms); + + return ret; } static int gen8_request_engine_reset(struct intel_engine_cs *engine) @@ -1626,11 +1687,11 @@ static int gen8_request_engine_reset(struct intel_engine_cs *engine) I915_WRITE_FW(RING_RESET_CTL(engine->mmio_base), _MASKED_BIT_ENABLE(RESET_CTL_REQUEST_RESET)); - ret = wait_for_register_fw(dev_priv, - RING_RESET_CTL(engine->mmio_base), - RESET_CTL_READY_TO_RESET, - RESET_CTL_READY_TO_RESET, - 700); + ret = intel_wait_for_register_fw(dev_priv, + RING_RESET_CTL(engine->mmio_base), + RESET_CTL_READY_TO_RESET, + RESET_CTL_READY_TO_RESET, + 700); if (ret) DRM_ERROR("%s: reset request timeout\n", engine->name); -- GitLab