diff --git a/drivers/gpu/drm/radeon/evergreen_cs.c b/drivers/gpu/drm/radeon/evergreen_cs.c index 74c6b42d259788fab5f35fd3d10343b16b38f8f2..7a445666e71f1221ca92cd07bcdea61b29867ded 100644 --- a/drivers/gpu/drm/radeon/evergreen_cs.c +++ b/drivers/gpu/drm/radeon/evergreen_cs.c @@ -2654,6 +2654,35 @@ static int evergreen_packet3_check(struct radeon_cs_parser *p, ib[idx+4] = upper_32_bits(offset) & 0xff; } break; + case PACKET3_MEM_WRITE: + { + u64 offset; + + if (pkt->count != 3) { + DRM_ERROR("bad MEM_WRITE (invalid count)\n"); + return -EINVAL; + } + r = evergreen_cs_packet_next_reloc(p, &reloc); + if (r) { + DRM_ERROR("bad MEM_WRITE (missing reloc)\n"); + return -EINVAL; + } + offset = radeon_get_ib_value(p, idx+0); + offset += ((u64)(radeon_get_ib_value(p, idx+1) & 0xff)) << 32UL; + if (offset & 0x7) { + DRM_ERROR("bad MEM_WRITE (address not qwords aligned)\n"); + return -EINVAL; + } + if ((offset + 8) > radeon_bo_size(reloc->robj)) { + DRM_ERROR("bad MEM_WRITE bo too small: 0x%llx, 0x%lx\n", + offset + 8, radeon_bo_size(reloc->robj)); + return -EINVAL; + } + offset += reloc->lobj.gpu_offset; + ib[idx+0] = offset; + ib[idx+1] = upper_32_bits(offset) & 0xff; + break; + } case PACKET3_COPY_DW: if (pkt->count != 4) { DRM_ERROR("bad COPY_DW (invalid count)\n"); @@ -3287,6 +3316,7 @@ static bool evergreen_vm_reg_valid(u32 reg) /* check config regs */ switch (reg) { + case WAIT_UNTIL: case GRBM_GFX_INDEX: case CP_STRMOUT_CNTL: case CP_COHER_CNTL: diff --git a/drivers/gpu/drm/radeon/r600_cs.c b/drivers/gpu/drm/radeon/r600_cs.c index 0be768be530c7ceea8e0b4125a3932ed4e696c35..9ea13d07cc55c75fbb7df5ffff7865661a526c5f 100644 --- a/drivers/gpu/drm/radeon/r600_cs.c +++ b/drivers/gpu/drm/radeon/r600_cs.c @@ -2294,6 +2294,35 @@ static int r600_packet3_check(struct radeon_cs_parser *p, ib[idx+4] = upper_32_bits(offset) & 0xff; } break; + case PACKET3_MEM_WRITE: + { + u64 offset; + + if (pkt->count != 3) { + DRM_ERROR("bad MEM_WRITE (invalid count)\n"); + return -EINVAL; + } + r = r600_cs_packet_next_reloc(p, &reloc); + if (r) { + DRM_ERROR("bad MEM_WRITE (missing reloc)\n"); + return -EINVAL; + } + offset = radeon_get_ib_value(p, idx+0); + offset += ((u64)(radeon_get_ib_value(p, idx+1) & 0xff)) << 32UL; + if (offset & 0x7) { + DRM_ERROR("bad MEM_WRITE (address not qwords aligned)\n"); + return -EINVAL; + } + if ((offset + 8) > radeon_bo_size(reloc->robj)) { + DRM_ERROR("bad MEM_WRITE bo too small: 0x%llx, 0x%lx\n", + offset + 8, radeon_bo_size(reloc->robj)); + return -EINVAL; + } + offset += reloc->lobj.gpu_offset; + ib[idx+0] = offset; + ib[idx+1] = upper_32_bits(offset) & 0xff; + break; + } case PACKET3_COPY_DW: if (pkt->count != 4) { DRM_ERROR("bad COPY_DW (invalid count)\n"); diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h index 5dc744d43d128a537078153a5cf94803a4906101..9b9422c4403a40280e1a16af91e79b820fadad58 100644 --- a/drivers/gpu/drm/radeon/radeon.h +++ b/drivers/gpu/drm/radeon/radeon.h @@ -225,12 +225,13 @@ struct radeon_fence { int radeon_fence_driver_start_ring(struct radeon_device *rdev, int ring); int radeon_fence_driver_init(struct radeon_device *rdev); void radeon_fence_driver_fini(struct radeon_device *rdev); +void radeon_fence_driver_force_completion(struct radeon_device *rdev); int radeon_fence_emit(struct radeon_device *rdev, struct radeon_fence **fence, int ring); void radeon_fence_process(struct radeon_device *rdev, int ring); bool radeon_fence_signaled(struct radeon_fence *fence); int radeon_fence_wait(struct radeon_fence *fence, bool interruptible); int radeon_fence_wait_next_locked(struct radeon_device *rdev, int ring); -void radeon_fence_wait_empty_locked(struct radeon_device *rdev, int ring); +int radeon_fence_wait_empty_locked(struct radeon_device *rdev, int ring); int radeon_fence_wait_any(struct radeon_device *rdev, struct radeon_fence **fences, bool intr); diff --git a/drivers/gpu/drm/radeon/radeon_device.c b/drivers/gpu/drm/radeon/radeon_device.c index 49b06590001e1a30ca65761cfab2e20d901bc215..cd756262924d3f9e2f05a970613df77ac0d0af51 100644 --- a/drivers/gpu/drm/radeon/radeon_device.c +++ b/drivers/gpu/drm/radeon/radeon_device.c @@ -1164,6 +1164,7 @@ int radeon_suspend_kms(struct drm_device *dev, pm_message_t state) struct drm_crtc *crtc; struct drm_connector *connector; int i, r; + bool force_completion = false; if (dev == NULL || dev->dev_private == NULL) { return -ENODEV; @@ -1206,8 +1207,16 @@ int radeon_suspend_kms(struct drm_device *dev, pm_message_t state) mutex_lock(&rdev->ring_lock); /* wait for gpu to finish processing current batch */ - for (i = 0; i < RADEON_NUM_RINGS; i++) - radeon_fence_wait_empty_locked(rdev, i); + for (i = 0; i < RADEON_NUM_RINGS; i++) { + r = radeon_fence_wait_empty_locked(rdev, i); + if (r) { + /* delay GPU reset to resume */ + force_completion = true; + } + } + if (force_completion) { + radeon_fence_driver_force_completion(rdev); + } mutex_unlock(&rdev->ring_lock); radeon_save_bios_scratch_regs(rdev); @@ -1338,7 +1347,6 @@ int radeon_gpu_reset(struct radeon_device *rdev) } radeon_restore_bios_scratch_regs(rdev); - drm_helper_resume_force_mode(rdev->ddev); if (!r) { for (i = 0; i < RADEON_NUM_RINGS; ++i) { @@ -1358,11 +1366,14 @@ int radeon_gpu_reset(struct radeon_device *rdev) } } } else { + radeon_fence_driver_force_completion(rdev); for (i = 0; i < RADEON_NUM_RINGS; ++i) { kfree(ring_data[i]); } } + drm_helper_resume_force_mode(rdev->ddev); + ttm_bo_unlock_delayed_workqueue(&rdev->mman.bdev, resched); if (r) { /* bad news, how to tell it to userspace ? */ diff --git a/drivers/gpu/drm/radeon/radeon_drv.c b/drivers/gpu/drm/radeon/radeon_drv.c index 9b1a727d3c9e450b926cae872735fd7c05d3456a..ff7593498a7488f2cf39f956cf535db5a3502ad6 100644 --- a/drivers/gpu/drm/radeon/radeon_drv.c +++ b/drivers/gpu/drm/radeon/radeon_drv.c @@ -68,9 +68,10 @@ * 2.25.0 - eg+: new info request for num SE and num SH * 2.26.0 - r600-eg: fix htile size computation * 2.27.0 - r600-SI: Add CS ioctl support for async DMA + * 2.28.0 - r600-eg: Add MEM_WRITE packet support */ #define KMS_DRIVER_MAJOR 2 -#define KMS_DRIVER_MINOR 27 +#define KMS_DRIVER_MINOR 28 #define KMS_DRIVER_PATCHLEVEL 0 int radeon_driver_load_kms(struct drm_device *dev, unsigned long flags); int radeon_driver_unload_kms(struct drm_device *dev); diff --git a/drivers/gpu/drm/radeon/radeon_fence.c b/drivers/gpu/drm/radeon/radeon_fence.c index 410a975a8eec8f7e042a608cfcea0d7c274f1932..34356252567ad23291d9a60c6df1a06ed211c149 100644 --- a/drivers/gpu/drm/radeon/radeon_fence.c +++ b/drivers/gpu/drm/radeon/radeon_fence.c @@ -609,26 +609,20 @@ int radeon_fence_wait_next_locked(struct radeon_device *rdev, int ring) * Returns 0 if the fences have passed, error for all other cases. * Caller must hold ring lock. */ -void radeon_fence_wait_empty_locked(struct radeon_device *rdev, int ring) +int radeon_fence_wait_empty_locked(struct radeon_device *rdev, int ring) { uint64_t seq = rdev->fence_drv[ring].sync_seq[ring]; + int r; - while(1) { - int r; - r = radeon_fence_wait_seq(rdev, seq, ring, false, false); + r = radeon_fence_wait_seq(rdev, seq, ring, false, false); + if (r) { if (r == -EDEADLK) { - mutex_unlock(&rdev->ring_lock); - r = radeon_gpu_reset(rdev); - mutex_lock(&rdev->ring_lock); - if (!r) - continue; - } - if (r) { - dev_err(rdev->dev, "error waiting for ring to become" - " idle (%d)\n", r); + return -EDEADLK; } - return; + dev_err(rdev->dev, "error waiting for ring[%d] to become idle (%d)\n", + ring, r); } + return 0; } /** @@ -854,13 +848,17 @@ int radeon_fence_driver_init(struct radeon_device *rdev) */ void radeon_fence_driver_fini(struct radeon_device *rdev) { - int ring; + int ring, r; mutex_lock(&rdev->ring_lock); for (ring = 0; ring < RADEON_NUM_RINGS; ring++) { if (!rdev->fence_drv[ring].initialized) continue; - radeon_fence_wait_empty_locked(rdev, ring); + r = radeon_fence_wait_empty_locked(rdev, ring); + if (r) { + /* no need to trigger GPU reset as we are unloading */ + radeon_fence_driver_force_completion(rdev); + } wake_up_all(&rdev->fence_queue); radeon_scratch_free(rdev, rdev->fence_drv[ring].scratch_reg); rdev->fence_drv[ring].initialized = false; @@ -868,6 +866,25 @@ void radeon_fence_driver_fini(struct radeon_device *rdev) mutex_unlock(&rdev->ring_lock); } +/** + * radeon_fence_driver_force_completion - force all fence waiter to complete + * + * @rdev: radeon device pointer + * + * In case of GPU reset failure make sure no process keep waiting on fence + * that will never complete. + */ +void radeon_fence_driver_force_completion(struct radeon_device *rdev) +{ + int ring; + + for (ring = 0; ring < RADEON_NUM_RINGS; ring++) { + if (!rdev->fence_drv[ring].initialized) + continue; + radeon_fence_write(rdev, rdev->fence_drv[ring].sync_seq[ring], ring); + } +} + /* * Fence debugfs diff --git a/drivers/gpu/drm/radeon/radeon_pm.c b/drivers/gpu/drm/radeon/radeon_pm.c index aa14dbb7e4fbac2a81070f3d14abb28a24c3db30..0bfa656aa87d482fead90b86c73aac94e94849de 100644 --- a/drivers/gpu/drm/radeon/radeon_pm.c +++ b/drivers/gpu/drm/radeon/radeon_pm.c @@ -234,7 +234,7 @@ static void radeon_set_power_state(struct radeon_device *rdev) static void radeon_pm_set_clocks(struct radeon_device *rdev) { - int i; + int i, r; /* no need to take locks, etc. if nothing's going to change */ if ((rdev->pm.requested_clock_mode_index == rdev->pm.current_clock_mode_index) && @@ -248,8 +248,17 @@ static void radeon_pm_set_clocks(struct radeon_device *rdev) /* wait for the rings to drain */ for (i = 0; i < RADEON_NUM_RINGS; i++) { struct radeon_ring *ring = &rdev->ring[i]; - if (ring->ready) - radeon_fence_wait_empty_locked(rdev, i); + if (!ring->ready) { + continue; + } + r = radeon_fence_wait_empty_locked(rdev, i); + if (r) { + /* needs a GPU reset dont reset here */ + mutex_unlock(&rdev->ring_lock); + up_write(&rdev->pm.mclk_lock); + mutex_unlock(&rdev->ddev->struct_mutex); + return; + } } radeon_unmap_vram_bos(rdev);