提交 83b0582c 编写于 作者: D Dennis Li 提交者: Alex Deucher

drm/amdgpu: support gfx ras error injection and err_cnt query

check gfx error count in both ras querry function and
ras interrupt handler.

gfx ras is still disabled by default due to known stability
issue found in gpu reset.
Signed-off-by: NDennis Li <Dennis.Li@amd.com>
Reviewed-by: NTao Zhou <tao.zhou1@amd.com>
Reviewed-by: NHawking Zhang <Hawking.Zhang@amd.com>
Signed-off-by: NAlex Deucher <alexander.deucher@amd.com>
上级 2c960ea0
......@@ -602,6 +602,10 @@ int amdgpu_ras_error_query(struct amdgpu_device *adev,
if (adev->umc.funcs->query_ras_error_count)
adev->umc.funcs->query_ras_error_count(adev, &err_data);
break;
case AMDGPU_RAS_BLOCK__GFX:
if (adev->gfx.funcs->query_ras_error_count)
adev->gfx.funcs->query_ras_error_count(adev, &err_data);
break;
default:
break;
}
......@@ -639,13 +643,22 @@ int amdgpu_ras_error_inject(struct amdgpu_device *adev,
if (!obj)
return -EINVAL;
if (block_info.block_id != TA_RAS_BLOCK__UMC) {
switch (info->head.block) {
case AMDGPU_RAS_BLOCK__GFX:
if (adev->gfx.funcs->ras_error_inject)
ret = adev->gfx.funcs->ras_error_inject(adev, info);
else
ret = -EINVAL;
break;
case AMDGPU_RAS_BLOCK__UMC:
ret = psp_ras_trigger_error(&adev->psp, &block_info);
break;
default:
DRM_INFO("%s error injection is not supported yet\n",
ras_block_str(info->head.block));
return -EINVAL;
ret = -EINVAL;
}
ret = psp_ras_trigger_error(&adev->psp, &block_info);
if (ret)
DRM_ERROR("RAS ERROR: inject %s error failed ret %d\n",
ras_block_str(info->head.block),
......
......@@ -5611,6 +5611,8 @@ static int gfx_v9_0_process_ras_data_cb(struct amdgpu_device *adev,
{
/* TODO ue will trigger an interrupt. */
kgd2kfd_set_sram_ecc_flag(adev->kfd.dev);
if (adev->gfx.funcs->query_ras_error_count)
adev->gfx.funcs->query_ras_error_count(adev, err_data);
amdgpu_ras_reset_gpu(adev, 0);
return AMDGPU_RAS_UE;
}
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册