提交 34cc4fd9 编写于 作者: T Tao Zhou 提交者: Alex Deucher

drm/amdgpu: move umc ras irq functions to umc block

move umc ras irq functions from gmc v9 to generic umc block, these
functions are relevant to umc and they can be shared among all
generations of umc
Signed-off-by: NTao Zhou <tao.zhou1@amd.com>
Reviewed-by: NGuchun Chen <guchun.chen@amd.com>
Signed-off-by: NAlex Deucher <alexander.deucher@amd.com>
上级 f5f06e21
......@@ -21,7 +21,6 @@
*
*/
#include "amdgpu.h"
#include "amdgpu_ras.h"
int amdgpu_umc_ras_late_init(struct amdgpu_device *adev, void *ras_ih_info)
......@@ -75,3 +74,67 @@ int amdgpu_umc_ras_late_init(struct amdgpu_device *adev, void *ras_ih_info)
adev->gmc.umc_ras_if = NULL;
return r;
}
int amdgpu_umc_process_ras_data_cb(struct amdgpu_device *adev,
void *ras_error_status,
struct amdgpu_iv_entry *entry)
{
struct ras_err_data *err_data = (struct ras_err_data *)ras_error_status;
if (amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX))
return AMDGPU_RAS_SUCCESS;
kgd2kfd_set_sram_ecc_flag(adev->kfd.dev);
if (adev->umc.funcs &&
adev->umc.funcs->query_ras_error_count)
adev->umc.funcs->query_ras_error_count(adev, ras_error_status);
if (adev->umc.funcs &&
adev->umc.funcs->query_ras_error_address &&
adev->umc.max_ras_err_cnt_per_query) {
err_data->err_addr =
kcalloc(adev->umc.max_ras_err_cnt_per_query,
sizeof(struct eeprom_table_record), GFP_KERNEL);
/* still call query_ras_error_address to clear error status
* even NOMEM error is encountered
*/
if(!err_data->err_addr)
DRM_WARN("Failed to alloc memory for umc error address record!\n");
/* umc query_ras_error_address is also responsible for clearing
* error status
*/
adev->umc.funcs->query_ras_error_address(adev, ras_error_status);
}
/* only uncorrectable error needs gpu reset */
if (err_data->ue_count) {
if (err_data->err_addr_cnt &&
amdgpu_ras_add_bad_pages(adev, err_data->err_addr,
err_data->err_addr_cnt))
DRM_WARN("Failed to add ras bad page!\n");
amdgpu_ras_reset_gpu(adev, 0);
}
kfree(err_data->err_addr);
return AMDGPU_RAS_SUCCESS;
}
int amdgpu_umc_process_ecc_irq(struct amdgpu_device *adev,
struct amdgpu_irq_src *source,
struct amdgpu_iv_entry *entry)
{
struct ras_common_if *ras_if = adev->gmc.umc_ras_if;
struct ras_dispatch_if ih_data = {
.entry = entry,
};
if (!ras_if)
return 0;
ih_data.head = *ras_if;
amdgpu_ras_interrupt_dispatch(adev, &ih_data);
return 0;
}
......@@ -82,4 +82,10 @@ struct amdgpu_umc {
};
int amdgpu_umc_ras_late_init(struct amdgpu_device *adev, void *ras_ih_info);
int amdgpu_umc_process_ras_data_cb(struct amdgpu_device *adev,
void *ras_error_status,
struct amdgpu_iv_entry *entry);
int amdgpu_umc_process_ecc_irq(struct amdgpu_device *adev,
struct amdgpu_irq_src *source,
struct amdgpu_iv_entry *entry);
#endif
......@@ -245,70 +245,6 @@ static int gmc_v9_0_ecc_interrupt_state(struct amdgpu_device *adev,
return 0;
}
static int gmc_v9_0_process_ras_data_cb(struct amdgpu_device *adev,
void *ras_error_status,
struct amdgpu_iv_entry *entry)
{
struct ras_err_data *err_data = (struct ras_err_data *)ras_error_status;
if (amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX))
return AMDGPU_RAS_SUCCESS;
kgd2kfd_set_sram_ecc_flag(adev->kfd.dev);
if (adev->umc.funcs &&
adev->umc.funcs->query_ras_error_count)
adev->umc.funcs->query_ras_error_count(adev, ras_error_status);
if (adev->umc.funcs &&
adev->umc.funcs->query_ras_error_address &&
adev->umc.max_ras_err_cnt_per_query) {
err_data->err_addr =
kcalloc(adev->umc.max_ras_err_cnt_per_query,
sizeof(struct eeprom_table_record), GFP_KERNEL);
/* still call query_ras_error_address to clear error status
* even NOMEM error is encountered
*/
if(!err_data->err_addr)
DRM_WARN("Failed to alloc memory for umc error address record!\n");
/* umc query_ras_error_address is also responsible for clearing
* error status
*/
adev->umc.funcs->query_ras_error_address(adev, err_data);
}
/* only uncorrectable error needs gpu reset */
if (err_data->ue_count) {
if (err_data->err_addr_cnt &&
amdgpu_ras_add_bad_pages(adev, err_data->err_addr,
err_data->err_addr_cnt))
DRM_WARN("Failed to add ras bad page!\n");
amdgpu_ras_reset_gpu(adev, 0);
}
kfree(err_data->err_addr);
return AMDGPU_RAS_SUCCESS;
}
static int gmc_v9_0_process_ecc_irq(struct amdgpu_device *adev,
struct amdgpu_irq_src *source,
struct amdgpu_iv_entry *entry)
{
struct ras_common_if *ras_if = adev->gmc.umc_ras_if;
struct ras_dispatch_if ih_data = {
.entry = entry,
};
if (!ras_if)
return 0;
ih_data.head = *ras_if;
amdgpu_ras_interrupt_dispatch(adev, &ih_data);
return 0;
}
static int gmc_v9_0_vm_fault_interrupt_state(struct amdgpu_device *adev,
struct amdgpu_irq_src *src,
unsigned type,
......@@ -449,7 +385,7 @@ static const struct amdgpu_irq_src_funcs gmc_v9_0_irq_funcs = {
static const struct amdgpu_irq_src_funcs gmc_v9_0_ecc_funcs = {
.set = gmc_v9_0_ecc_interrupt_state,
.process = gmc_v9_0_process_ecc_irq,
.process = amdgpu_umc_process_ecc_irq,
};
static void gmc_v9_0_set_irq_funcs(struct amdgpu_device *adev)
......@@ -805,7 +741,7 @@ static int gmc_v9_0_ecc_late_init(void *handle)
int r;
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
struct ras_ih_if umc_ih_info = {
.cb = gmc_v9_0_process_ras_data_cb,
.cb = amdgpu_umc_process_ras_data_cb,
};
if (adev->umc.funcs && adev->umc.funcs->ras_late_init) {
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册