提交 761c77c1 编写于 作者: A Andres Rodriguez 提交者: Alex Deucher

drm/amdgpu: add high priority compute support for gfx9

We follow the same approach as gfx8. The only changes are register
access macros.

Tested on vega10. The execution latency results fall within the expected
ranges from the polaris10 data.
Reviewed-by: NAlex Deucher <alexander.deucher@amd.com>
Signed-off-by: NAndres Rodriguez <andresx7@gmail.com>
Signed-off-by: NAlex Deucher <alexander.deucher@amd.com>
上级 10cd19c8
......@@ -3735,6 +3735,105 @@ static u64 gfx_v9_0_ring_get_wptr_compute(struct amdgpu_ring *ring)
return wptr;
}
static void gfx_v9_0_ring_set_pipe_percent(struct amdgpu_ring *ring,
bool acquire)
{
struct amdgpu_device *adev = ring->adev;
int pipe_num, tmp, reg;
int pipe_percent = acquire ? SPI_WCL_PIPE_PERCENT_GFX__VALUE_MASK : 0x1;
pipe_num = ring->me * adev->gfx.mec.num_pipe_per_mec + ring->pipe;
/* first me only has 2 entries, GFX and HP3D */
if (ring->me > 0)
pipe_num -= 2;
reg = SOC15_REG_OFFSET(GC, 0, mmSPI_WCL_PIPE_PERCENT_GFX) + pipe_num;
tmp = RREG32(reg);
tmp = REG_SET_FIELD(tmp, SPI_WCL_PIPE_PERCENT_GFX, VALUE, pipe_percent);
WREG32(reg, tmp);
}
static void gfx_v9_0_pipe_reserve_resources(struct amdgpu_device *adev,
struct amdgpu_ring *ring,
bool acquire)
{
int i, pipe;
bool reserve;
struct amdgpu_ring *iring;
mutex_lock(&adev->gfx.pipe_reserve_mutex);
pipe = amdgpu_gfx_queue_to_bit(adev, ring->me, ring->pipe, 0);
if (acquire)
set_bit(pipe, adev->gfx.pipe_reserve_bitmap);
else
clear_bit(pipe, adev->gfx.pipe_reserve_bitmap);
if (!bitmap_weight(adev->gfx.pipe_reserve_bitmap, AMDGPU_MAX_COMPUTE_QUEUES)) {
/* Clear all reservations - everyone reacquires all resources */
for (i = 0; i < adev->gfx.num_gfx_rings; ++i)
gfx_v9_0_ring_set_pipe_percent(&adev->gfx.gfx_ring[i],
true);
for (i = 0; i < adev->gfx.num_compute_rings; ++i)
gfx_v9_0_ring_set_pipe_percent(&adev->gfx.compute_ring[i],
true);
} else {
/* Lower all pipes without a current reservation */
for (i = 0; i < adev->gfx.num_gfx_rings; ++i) {
iring = &adev->gfx.gfx_ring[i];
pipe = amdgpu_gfx_queue_to_bit(adev,
iring->me,
iring->pipe,
0);
reserve = test_bit(pipe, adev->gfx.pipe_reserve_bitmap);
gfx_v9_0_ring_set_pipe_percent(iring, reserve);
}
for (i = 0; i < adev->gfx.num_compute_rings; ++i) {
iring = &adev->gfx.compute_ring[i];
pipe = amdgpu_gfx_queue_to_bit(adev,
iring->me,
iring->pipe,
0);
reserve = test_bit(pipe, adev->gfx.pipe_reserve_bitmap);
gfx_v9_0_ring_set_pipe_percent(iring, reserve);
}
}
mutex_unlock(&adev->gfx.pipe_reserve_mutex);
}
static void gfx_v9_0_hqd_set_priority(struct amdgpu_device *adev,
struct amdgpu_ring *ring,
bool acquire)
{
uint32_t pipe_priority = acquire ? 0x2 : 0x0;
uint32_t queue_priority = acquire ? 0xf : 0x0;
mutex_lock(&adev->srbm_mutex);
soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
WREG32_SOC15(GC, 0, mmCP_HQD_PIPE_PRIORITY, pipe_priority);
WREG32_SOC15(GC, 0, mmCP_HQD_QUEUE_PRIORITY, queue_priority);
soc15_grbm_select(adev, 0, 0, 0, 0);
mutex_unlock(&adev->srbm_mutex);
}
static void gfx_v9_0_ring_set_priority_compute(struct amdgpu_ring *ring,
enum drm_sched_priority priority)
{
struct amdgpu_device *adev = ring->adev;
bool acquire = priority == DRM_SCHED_PRIORITY_HIGH_HW;
if (ring->funcs->type != AMDGPU_RING_TYPE_COMPUTE)
return;
gfx_v9_0_hqd_set_priority(adev, ring, acquire);
gfx_v9_0_pipe_reserve_resources(adev, ring, acquire);
}
static void gfx_v9_0_ring_set_wptr_compute(struct amdgpu_ring *ring)
{
struct amdgpu_device *adev = ring->adev;
......@@ -4261,6 +4360,7 @@ static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_compute = {
.test_ib = gfx_v9_0_ring_test_ib,
.insert_nop = amdgpu_ring_insert_nop,
.pad_ib = amdgpu_ring_generic_pad_ib,
.set_priority = gfx_v9_0_ring_set_priority_compute,
};
static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_kiq = {
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册