提交 7780f503 编写于 作者: D Dennis Li 提交者: Alex Deucher

drm/amdgpu: add function to clear MMEA error status for aldebaran

For aldebaran, hardware will not clear error status automatically when
reading error status register, insteadly driver should set clear bit of
the error status register explicitly to clear error status.
Signed-off-by: NDennis Li <Dennis.Li@amd.com>
Reviewed-by: NHawking Zhang <Hawking.Zhang@amd.com>
Signed-off-by: NAlex Deucher <alexander.deucher@amd.com>
上级 4f64f1c8
...@@ -28,6 +28,7 @@ struct amdgpu_mmhub_ras_funcs { ...@@ -28,6 +28,7 @@ struct amdgpu_mmhub_ras_funcs {
void *ras_error_status); void *ras_error_status);
void (*query_ras_error_status)(struct amdgpu_device *adev); void (*query_ras_error_status)(struct amdgpu_device *adev);
void (*reset_ras_error_count)(struct amdgpu_device *adev); void (*reset_ras_error_count)(struct amdgpu_device *adev);
void (*reset_ras_error_status)(struct amdgpu_device *adev);
}; };
struct amdgpu_mmhub_funcs { struct amdgpu_mmhub_funcs {
......
...@@ -938,6 +938,10 @@ int amdgpu_ras_reset_error_status(struct amdgpu_device *adev, ...@@ -938,6 +938,10 @@ int amdgpu_ras_reset_error_status(struct amdgpu_device *adev,
if (adev->mmhub.ras_funcs && if (adev->mmhub.ras_funcs &&
adev->mmhub.ras_funcs->reset_ras_error_count) adev->mmhub.ras_funcs->reset_ras_error_count)
adev->mmhub.ras_funcs->reset_ras_error_count(adev); adev->mmhub.ras_funcs->reset_ras_error_count(adev);
if (adev->mmhub.ras_funcs &&
adev->mmhub.ras_funcs->reset_ras_error_status)
adev->mmhub.ras_funcs->reset_ras_error_status(adev);
break; break;
case AMDGPU_RAS_BLOCK__SDMA: case AMDGPU_RAS_BLOCK__SDMA:
if (adev->sdma.funcs->reset_ras_error_count) if (adev->sdma.funcs->reset_ras_error_count)
......
...@@ -1315,12 +1315,31 @@ static void mmhub_v1_7_query_ras_error_status(struct amdgpu_device *adev) ...@@ -1315,12 +1315,31 @@ static void mmhub_v1_7_query_ras_error_status(struct amdgpu_device *adev)
} }
} }
static void mmhub_v1_7_reset_ras_error_status(struct amdgpu_device *adev)
{
int i;
uint32_t reg_value;
if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__MMHUB))
return;
for (i = 0; i < ARRAY_SIZE(mmhub_v1_7_ea_err_status_regs); i++) {
reg_value = RREG32(SOC15_REG_ENTRY_OFFSET(
mmhub_v1_7_ea_err_status_regs[i]));
reg_value = REG_SET_FIELD(reg_value, MMEA0_ERR_STATUS,
CLEAR_ERROR_STATUS, 0x01);
WREG32(SOC15_REG_ENTRY_OFFSET(mmhub_v1_7_ea_err_status_regs[i]),
reg_value);
}
}
const struct amdgpu_mmhub_ras_funcs mmhub_v1_7_ras_funcs = { const struct amdgpu_mmhub_ras_funcs mmhub_v1_7_ras_funcs = {
.ras_late_init = amdgpu_mmhub_ras_late_init, .ras_late_init = amdgpu_mmhub_ras_late_init,
.ras_fini = amdgpu_mmhub_ras_fini, .ras_fini = amdgpu_mmhub_ras_fini,
.query_ras_error_count = mmhub_v1_7_query_ras_error_count, .query_ras_error_count = mmhub_v1_7_query_ras_error_count,
.reset_ras_error_count = mmhub_v1_7_reset_ras_error_count, .reset_ras_error_count = mmhub_v1_7_reset_ras_error_count,
.query_ras_error_status = mmhub_v1_7_query_ras_error_status, .query_ras_error_status = mmhub_v1_7_query_ras_error_status,
.reset_ras_error_status = mmhub_v1_7_reset_ras_error_status,
}; };
const struct amdgpu_mmhub_funcs mmhub_v1_7_funcs = { const struct amdgpu_mmhub_funcs mmhub_v1_7_funcs = {
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册