diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c index 2957404bd44a5bf5c32e13ff4b902877d06c1e49..c40c1a16e72e2acbcc9dbe24c7bb405be1e39c57 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c @@ -912,7 +912,7 @@ static int amdgpu_cs_ib_fill(struct amdgpu_device *adev, /* each GFX command submit allows 0 or 1 IB preemptible for CE & DE */ if (ce_preempt > 1 || de_preempt > 1) - BUG(); + return -EINVAL; } r = amdgpu_cs_get_ring(adev, chunk_ib->ip_type, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c index c4857083d834e899f3f3d701fa43b65555c9283b..6b8bb1b070cca3078e76c48b3f08be4bfd6021ff 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c @@ -161,9 +161,6 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs, return r; } - if (ring->funcs->init_cond_exec) - patch_offset = amdgpu_ring_init_cond_exec(ring); - if (vm) { r = amdgpu_vm_flush(ring, job); if (r) { @@ -172,7 +169,10 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs, } } - if (ring->funcs->emit_hdp_flush + if (ring->funcs->init_cond_exec) + patch_offset = amdgpu_ring_init_cond_exec(ring); + + if (ring->funcs->emit_hdp_flush #ifdef CONFIG_X86_64 && !(adev->flags & AMD_IS_APU) #endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index 10e8232d6cacb342d983fddab0fa98faa32abc4e..72bef223a080b95f18a67c357f4ab13dcd5994f0 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c @@ -577,42 +577,59 @@ int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job) id->oa_size != job->oa_size); int r; - if (ring->funcs->emit_pipeline_sync && ( - job->vm_needs_flush || gds_switch_needed || - amdgpu_vm_ring_has_compute_vm_bug(ring))) - amdgpu_ring_emit_pipeline_sync(ring); + if (job->vm_needs_flush || gds_switch_needed || + amdgpu_vm_is_gpu_reset(adev, id) || + amdgpu_vm_ring_has_compute_vm_bug(ring)) { + unsigned patch_offset = 0; - if (ring->funcs->emit_vm_flush && (job->vm_needs_flush || - amdgpu_vm_is_gpu_reset(adev, id))) { - struct dma_fence *fence; - u64 pd_addr = amdgpu_vm_adjust_mc_addr(adev, job->vm_pd_addr); + if (ring->funcs->init_cond_exec) + patch_offset = amdgpu_ring_init_cond_exec(ring); - trace_amdgpu_vm_flush(pd_addr, ring->idx, job->vm_id); - amdgpu_ring_emit_vm_flush(ring, job->vm_id, pd_addr); + if (ring->funcs->emit_pipeline_sync && + (job->vm_needs_flush || gds_switch_needed || + amdgpu_vm_ring_has_compute_vm_bug(ring))) + amdgpu_ring_emit_pipeline_sync(ring); - r = amdgpu_fence_emit(ring, &fence); - if (r) - return r; + if (ring->funcs->emit_vm_flush && (job->vm_needs_flush || + amdgpu_vm_is_gpu_reset(adev, id))) { + struct dma_fence *fence; + u64 pd_addr = amdgpu_vm_adjust_mc_addr(adev, job->vm_pd_addr); - mutex_lock(&adev->vm_manager.lock); - dma_fence_put(id->last_flush); - id->last_flush = fence; - mutex_unlock(&adev->vm_manager.lock); - } + trace_amdgpu_vm_flush(pd_addr, ring->idx, job->vm_id); + amdgpu_ring_emit_vm_flush(ring, job->vm_id, pd_addr); - if (gds_switch_needed) { - id->gds_base = job->gds_base; - id->gds_size = job->gds_size; - id->gws_base = job->gws_base; - id->gws_size = job->gws_size; - id->oa_base = job->oa_base; - id->oa_size = job->oa_size; - amdgpu_ring_emit_gds_switch(ring, job->vm_id, - job->gds_base, job->gds_size, - job->gws_base, job->gws_size, - job->oa_base, job->oa_size); - } + r = amdgpu_fence_emit(ring, &fence); + if (r) + return r; + mutex_lock(&adev->vm_manager.lock); + dma_fence_put(id->last_flush); + id->last_flush = fence; + mutex_unlock(&adev->vm_manager.lock); + } + + if (gds_switch_needed) { + id->gds_base = job->gds_base; + id->gds_size = job->gds_size; + id->gws_base = job->gws_base; + id->gws_size = job->gws_size; + id->oa_base = job->oa_base; + id->oa_size = job->oa_size; + amdgpu_ring_emit_gds_switch(ring, job->vm_id, + job->gds_base, job->gds_size, + job->gws_base, job->gws_size, + job->oa_base, job->oa_size); + } + + if (ring->funcs->patch_cond_exec) + amdgpu_ring_patch_cond_exec(ring, patch_offset); + + /* the double SWITCH_BUFFER here *cannot* be skipped by COND_EXEC */ + if (ring->funcs->emit_switch_buffer) { + amdgpu_ring_emit_switch_buffer(ring); + amdgpu_ring_emit_switch_buffer(ring); + } + } return 0; } diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c index c59bb38c091f438c7e6478fad91f35bd54605300..e0fa0d30e162a0fbe4fa24821b4b0765920ba5a4 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c @@ -6675,8 +6675,6 @@ static void gfx_v8_0_ring_emit_vm_flush(struct amdgpu_ring *ring, /* sync PFP to ME, otherwise we might get invalid PFP reads */ amdgpu_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0)); amdgpu_ring_write(ring, 0x0); - /* GFX8 emits 128 dw nop to prevent CE access VM before vm_flush finish */ - amdgpu_ring_insert_nop(ring, 128); } } @@ -7078,15 +7076,24 @@ static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_gfx = { .get_rptr = gfx_v8_0_ring_get_rptr, .get_wptr = gfx_v8_0_ring_get_wptr_gfx, .set_wptr = gfx_v8_0_ring_set_wptr_gfx, - .emit_frame_size = - 20 + /* gfx_v8_0_ring_emit_gds_switch */ - 7 + /* gfx_v8_0_ring_emit_hdp_flush */ - 5 + /* gfx_v8_0_ring_emit_hdp_invalidate */ - 6 + 6 + 6 +/* gfx_v8_0_ring_emit_fence_gfx x3 for user fence, vm fence */ - 7 + /* gfx_v8_0_ring_emit_pipeline_sync */ - 128 + 19 + /* gfx_v8_0_ring_emit_vm_flush */ - 2 + /* gfx_v8_ring_emit_sb */ - 3 + 4 + 29, /* gfx_v8_ring_emit_cntxcntl including vgt flush/meta-data */ + .emit_frame_size = /* maximum 215dw if count 16 IBs in */ + 5 + /* COND_EXEC */ + 7 + /* PIPELINE_SYNC */ + 19 + /* VM_FLUSH */ + 8 + /* FENCE for VM_FLUSH */ + 20 + /* GDS switch */ + 4 + /* double SWITCH_BUFFER, + the first COND_EXEC jump to the place just + prior to this double SWITCH_BUFFER */ + 5 + /* COND_EXEC */ + 7 + /* HDP_flush */ + 4 + /* VGT_flush */ + 14 + /* CE_META */ + 31 + /* DE_META */ + 3 + /* CNTX_CTRL */ + 5 + /* HDP_INVL */ + 8 + 8 + /* FENCE x2 */ + 2, /* SWITCH_BUFFER */ .emit_ib_size = 4, /* gfx_v8_0_ring_emit_ib_gfx */ .emit_ib = gfx_v8_0_ring_emit_ib_gfx, .emit_fence = gfx_v8_0_ring_emit_fence_gfx, diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c index f124f6d3b86916ac77c46bc7a842c19069f7891d..7666add21519d592fea972a80c47b16e9850903e 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c @@ -3186,8 +3186,6 @@ static void gfx_v9_0_ring_emit_vm_flush(struct amdgpu_ring *ring, /* sync PFP to ME, otherwise we might get invalid PFP reads */ amdgpu_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0)); amdgpu_ring_write(ring, 0x0); - /* Emits 128 dw nop to prevent CE access VM before vm_flush finish */ - amdgpu_ring_insert_nop(ring, 128); } } @@ -3682,15 +3680,24 @@ static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_gfx = { .get_rptr = gfx_v9_0_ring_get_rptr_gfx, .get_wptr = gfx_v9_0_ring_get_wptr_gfx, .set_wptr = gfx_v9_0_ring_set_wptr_gfx, - .emit_frame_size = - 20 + /* gfx_v9_0_ring_emit_gds_switch */ - 7 + /* gfx_v9_0_ring_emit_hdp_flush */ - 5 + /* gfx_v9_0_ring_emit_hdp_invalidate */ - 8 + 8 + 8 +/* gfx_v9_0_ring_emit_fence x3 for user fence, vm fence */ - 7 + /* gfx_v9_0_ring_emit_pipeline_sync */ - 128 + 66 + /* gfx_v9_0_ring_emit_vm_flush */ - 2 + /* gfx_v9_ring_emit_sb */ - 3, /* gfx_v9_ring_emit_cntxcntl */ + .emit_frame_size = /* totally 242 maximum if 16 IBs */ + 5 + /* COND_EXEC */ + 7 + /* PIPELINE_SYNC */ + 46 + /* VM_FLUSH */ + 8 + /* FENCE for VM_FLUSH */ + 20 + /* GDS switch */ + 4 + /* double SWITCH_BUFFER, + the first COND_EXEC jump to the place just + prior to this double SWITCH_BUFFER */ + 5 + /* COND_EXEC */ + 7 + /* HDP_flush */ + 4 + /* VGT_flush */ + 14 + /* CE_META */ + 31 + /* DE_META */ + 3 + /* CNTX_CTRL */ + 5 + /* HDP_INVL */ + 8 + 8 + /* FENCE x2 */ + 2, /* SWITCH_BUFFER */ .emit_ib_size = 4, /* gfx_v9_0_ring_emit_ib_gfx */ .emit_ib = gfx_v9_0_ring_emit_ib_gfx, .emit_fence = gfx_v9_0_ring_emit_fence,