提交 a667386c 编写于 作者: F Felix Kuehling 提交者: Alex Deucher

drm/amdgpu: Make SDMA phase quantum configurable

Set a configurable SDMA phase quantum when enabling SDMA context
switching. The default value significantly reduces SDMA latency
in page table updates when user-mode SDMA queues have concurrent
activity, compared to the initial HW setting.
Signed-off-by: NFelix Kuehling <Felix.Kuehling@amd.com>
Reviewed-by: NAndres Rodriguez <andres.rodriguez@amd.com>
Reviewed-by: NShaoyun Liu <shaoyun.liu@amd.com>
Acked-by: NChunming Zhou <david1.zhou@amd.com>
Acked-by: NAlex Deucher <alexander.deucher@amd.com>
Signed-off-by: NAlex Deucher <alexander.deucher@amd.com>
上级 763dbbfa
...@@ -107,6 +107,7 @@ extern unsigned amdgpu_pcie_gen_cap; ...@@ -107,6 +107,7 @@ extern unsigned amdgpu_pcie_gen_cap;
extern unsigned amdgpu_pcie_lane_cap; extern unsigned amdgpu_pcie_lane_cap;
extern unsigned amdgpu_cg_mask; extern unsigned amdgpu_cg_mask;
extern unsigned amdgpu_pg_mask; extern unsigned amdgpu_pg_mask;
extern unsigned amdgpu_sdma_phase_quantum;
extern char *amdgpu_disable_cu; extern char *amdgpu_disable_cu;
extern char *amdgpu_virtual_display; extern char *amdgpu_virtual_display;
extern unsigned amdgpu_pp_feature_mask; extern unsigned amdgpu_pp_feature_mask;
......
...@@ -108,6 +108,7 @@ unsigned amdgpu_pcie_gen_cap = 0; ...@@ -108,6 +108,7 @@ unsigned amdgpu_pcie_gen_cap = 0;
unsigned amdgpu_pcie_lane_cap = 0; unsigned amdgpu_pcie_lane_cap = 0;
unsigned amdgpu_cg_mask = 0xffffffff; unsigned amdgpu_cg_mask = 0xffffffff;
unsigned amdgpu_pg_mask = 0xffffffff; unsigned amdgpu_pg_mask = 0xffffffff;
unsigned amdgpu_sdma_phase_quantum = 32;
char *amdgpu_disable_cu = NULL; char *amdgpu_disable_cu = NULL;
char *amdgpu_virtual_display = NULL; char *amdgpu_virtual_display = NULL;
unsigned amdgpu_pp_feature_mask = 0xffffffff; unsigned amdgpu_pp_feature_mask = 0xffffffff;
...@@ -227,6 +228,9 @@ module_param_named(cg_mask, amdgpu_cg_mask, uint, 0444); ...@@ -227,6 +228,9 @@ module_param_named(cg_mask, amdgpu_cg_mask, uint, 0444);
MODULE_PARM_DESC(pg_mask, "Powergating flags mask (0 = disable power gating)"); MODULE_PARM_DESC(pg_mask, "Powergating flags mask (0 = disable power gating)");
module_param_named(pg_mask, amdgpu_pg_mask, uint, 0444); module_param_named(pg_mask, amdgpu_pg_mask, uint, 0444);
MODULE_PARM_DESC(sdma_phase_quantum, "SDMA context switch phase quantum (x 1K GPU clock cycles, 0 = no change (default 32))");
module_param_named(sdma_phase_quantum, amdgpu_sdma_phase_quantum, uint, 0444);
MODULE_PARM_DESC(disable_cu, "Disable CUs (se.sh.cu,...)"); MODULE_PARM_DESC(disable_cu, "Disable CUs (se.sh.cu,...)");
module_param_named(disable_cu, amdgpu_disable_cu, charp, 0444); module_param_named(disable_cu, amdgpu_disable_cu, charp, 0444);
......
...@@ -351,14 +351,44 @@ static void cik_sdma_rlc_stop(struct amdgpu_device *adev) ...@@ -351,14 +351,44 @@ static void cik_sdma_rlc_stop(struct amdgpu_device *adev)
*/ */
static void cik_ctx_switch_enable(struct amdgpu_device *adev, bool enable) static void cik_ctx_switch_enable(struct amdgpu_device *adev, bool enable)
{ {
u32 f32_cntl; u32 f32_cntl, phase_quantum = 0;
int i; int i;
if (amdgpu_sdma_phase_quantum) {
unsigned value = amdgpu_sdma_phase_quantum;
unsigned unit = 0;
while (value > (SDMA0_PHASE0_QUANTUM__VALUE_MASK >>
SDMA0_PHASE0_QUANTUM__VALUE__SHIFT)) {
value = (value + 1) >> 1;
unit++;
}
if (unit > (SDMA0_PHASE0_QUANTUM__UNIT_MASK >>
SDMA0_PHASE0_QUANTUM__UNIT__SHIFT)) {
value = (SDMA0_PHASE0_QUANTUM__VALUE_MASK >>
SDMA0_PHASE0_QUANTUM__VALUE__SHIFT);
unit = (SDMA0_PHASE0_QUANTUM__UNIT_MASK >>
SDMA0_PHASE0_QUANTUM__UNIT__SHIFT);
WARN_ONCE(1,
"clamping sdma_phase_quantum to %uK clock cycles\n",
value << unit);
}
phase_quantum =
value << SDMA0_PHASE0_QUANTUM__VALUE__SHIFT |
unit << SDMA0_PHASE0_QUANTUM__UNIT__SHIFT;
}
for (i = 0; i < adev->sdma.num_instances; i++) { for (i = 0; i < adev->sdma.num_instances; i++) {
f32_cntl = RREG32(mmSDMA0_CNTL + sdma_offsets[i]); f32_cntl = RREG32(mmSDMA0_CNTL + sdma_offsets[i]);
if (enable) { if (enable) {
f32_cntl = REG_SET_FIELD(f32_cntl, SDMA0_CNTL, f32_cntl = REG_SET_FIELD(f32_cntl, SDMA0_CNTL,
AUTO_CTXSW_ENABLE, 1); AUTO_CTXSW_ENABLE, 1);
if (amdgpu_sdma_phase_quantum) {
WREG32(mmSDMA0_PHASE0_QUANTUM + sdma_offsets[i],
phase_quantum);
WREG32(mmSDMA0_PHASE1_QUANTUM + sdma_offsets[i],
phase_quantum);
}
} else { } else {
f32_cntl = REG_SET_FIELD(f32_cntl, SDMA0_CNTL, f32_cntl = REG_SET_FIELD(f32_cntl, SDMA0_CNTL,
AUTO_CTXSW_ENABLE, 0); AUTO_CTXSW_ENABLE, 0);
......
...@@ -551,9 +551,33 @@ static void sdma_v3_0_rlc_stop(struct amdgpu_device *adev) ...@@ -551,9 +551,33 @@ static void sdma_v3_0_rlc_stop(struct amdgpu_device *adev)
*/ */
static void sdma_v3_0_ctx_switch_enable(struct amdgpu_device *adev, bool enable) static void sdma_v3_0_ctx_switch_enable(struct amdgpu_device *adev, bool enable)
{ {
u32 f32_cntl; u32 f32_cntl, phase_quantum = 0;
int i; int i;
if (amdgpu_sdma_phase_quantum) {
unsigned value = amdgpu_sdma_phase_quantum;
unsigned unit = 0;
while (value > (SDMA0_PHASE0_QUANTUM__VALUE_MASK >>
SDMA0_PHASE0_QUANTUM__VALUE__SHIFT)) {
value = (value + 1) >> 1;
unit++;
}
if (unit > (SDMA0_PHASE0_QUANTUM__UNIT_MASK >>
SDMA0_PHASE0_QUANTUM__UNIT__SHIFT)) {
value = (SDMA0_PHASE0_QUANTUM__VALUE_MASK >>
SDMA0_PHASE0_QUANTUM__VALUE__SHIFT);
unit = (SDMA0_PHASE0_QUANTUM__UNIT_MASK >>
SDMA0_PHASE0_QUANTUM__UNIT__SHIFT);
WARN_ONCE(1,
"clamping sdma_phase_quantum to %uK clock cycles\n",
value << unit);
}
phase_quantum =
value << SDMA0_PHASE0_QUANTUM__VALUE__SHIFT |
unit << SDMA0_PHASE0_QUANTUM__UNIT__SHIFT;
}
for (i = 0; i < adev->sdma.num_instances; i++) { for (i = 0; i < adev->sdma.num_instances; i++) {
f32_cntl = RREG32(mmSDMA0_CNTL + sdma_offsets[i]); f32_cntl = RREG32(mmSDMA0_CNTL + sdma_offsets[i]);
if (enable) { if (enable) {
...@@ -561,6 +585,12 @@ static void sdma_v3_0_ctx_switch_enable(struct amdgpu_device *adev, bool enable) ...@@ -561,6 +585,12 @@ static void sdma_v3_0_ctx_switch_enable(struct amdgpu_device *adev, bool enable)
AUTO_CTXSW_ENABLE, 1); AUTO_CTXSW_ENABLE, 1);
f32_cntl = REG_SET_FIELD(f32_cntl, SDMA0_CNTL, f32_cntl = REG_SET_FIELD(f32_cntl, SDMA0_CNTL,
ATC_L1_ENABLE, 1); ATC_L1_ENABLE, 1);
if (amdgpu_sdma_phase_quantum) {
WREG32(mmSDMA0_PHASE0_QUANTUM + sdma_offsets[i],
phase_quantum);
WREG32(mmSDMA0_PHASE1_QUANTUM + sdma_offsets[i],
phase_quantum);
}
} else { } else {
f32_cntl = REG_SET_FIELD(f32_cntl, SDMA0_CNTL, f32_cntl = REG_SET_FIELD(f32_cntl, SDMA0_CNTL,
AUTO_CTXSW_ENABLE, 0); AUTO_CTXSW_ENABLE, 0);
......
...@@ -493,13 +493,45 @@ static void sdma_v4_0_rlc_stop(struct amdgpu_device *adev) ...@@ -493,13 +493,45 @@ static void sdma_v4_0_rlc_stop(struct amdgpu_device *adev)
*/ */
static void sdma_v4_0_ctx_switch_enable(struct amdgpu_device *adev, bool enable) static void sdma_v4_0_ctx_switch_enable(struct amdgpu_device *adev, bool enable)
{ {
u32 f32_cntl; u32 f32_cntl, phase_quantum = 0;
int i; int i;
if (amdgpu_sdma_phase_quantum) {
unsigned value = amdgpu_sdma_phase_quantum;
unsigned unit = 0;
while (value > (SDMA0_PHASE0_QUANTUM__VALUE_MASK >>
SDMA0_PHASE0_QUANTUM__VALUE__SHIFT)) {
value = (value + 1) >> 1;
unit++;
}
if (unit > (SDMA0_PHASE0_QUANTUM__UNIT_MASK >>
SDMA0_PHASE0_QUANTUM__UNIT__SHIFT)) {
value = (SDMA0_PHASE0_QUANTUM__VALUE_MASK >>
SDMA0_PHASE0_QUANTUM__VALUE__SHIFT);
unit = (SDMA0_PHASE0_QUANTUM__UNIT_MASK >>
SDMA0_PHASE0_QUANTUM__UNIT__SHIFT);
WARN_ONCE(1,
"clamping sdma_phase_quantum to %uK clock cycles\n",
value << unit);
}
phase_quantum =
value << SDMA0_PHASE0_QUANTUM__VALUE__SHIFT |
unit << SDMA0_PHASE0_QUANTUM__UNIT__SHIFT;
}
for (i = 0; i < adev->sdma.num_instances; i++) { for (i = 0; i < adev->sdma.num_instances; i++) {
f32_cntl = RREG32(sdma_v4_0_get_reg_offset(i, mmSDMA0_CNTL)); f32_cntl = RREG32(sdma_v4_0_get_reg_offset(i, mmSDMA0_CNTL));
f32_cntl = REG_SET_FIELD(f32_cntl, SDMA0_CNTL, f32_cntl = REG_SET_FIELD(f32_cntl, SDMA0_CNTL,
AUTO_CTXSW_ENABLE, enable ? 1 : 0); AUTO_CTXSW_ENABLE, enable ? 1 : 0);
if (enable && amdgpu_sdma_phase_quantum) {
WREG32(sdma_v4_0_get_reg_offset(i, mmSDMA0_PHASE0_QUANTUM),
phase_quantum);
WREG32(sdma_v4_0_get_reg_offset(i, mmSDMA0_PHASE1_QUANTUM),
phase_quantum);
WREG32(sdma_v4_0_get_reg_offset(i, mmSDMA0_PHASE2_QUANTUM),
phase_quantum);
}
WREG32(sdma_v4_0_get_reg_offset(i, mmSDMA0_CNTL), f32_cntl); WREG32(sdma_v4_0_get_reg_offset(i, mmSDMA0_CNTL), f32_cntl);
} }
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册