提交 bdc4292b 编写于 作者: Y yipechai 提交者: Alex Deucher

drm/amdgpu: Modify sdma block to fit for the unified ras block data and ops

1.Modify sdma block to fit for the unified ras block data and ops.
2.Change amdgpu_sdma_ras_funcs to amdgpu_sdma_ras, and the corresponding variable name remove _funcs suffix.
3.Remove the const flag of sdma ras variable so that sdma ras block can be able to be inserted into amdgpu device ras block link list.
4.Invoke amdgpu_ras_register_ras_block function to register sdma ras block into amdgpu device ras block link list.
5.Remove the redundant code about sdma in amdgpu_ras.c after using the unified ras block.
6.Fill unified ras block .name .block .ras_late_init and .ras_fini for all of sdma versions. If .ras_late_init and .ras_fini had been defined by the selected sdma version, the defined functions will take effect; if not defined, default fill them with amdgpu_sdma_ras_late_init and amdgpu_sdma_ras_fini.

v2: squash in warning fix (Alex)
Signed-off-by: Nyipechai <YiPeng.Chai@amd.com>
Reviewed-by: NHawking Zhang <Hawking.Zhang@amd.com>
Reviewed-by: NJohn Clements <john.clements@amd.com>
Reviewed-by: NTao Zhou <tao.zhou1@amd.com>
Signed-off-by: NAlex Deucher <alexander.deucher@amd.com>
上级 efe17d5a
...@@ -967,7 +967,6 @@ int amdgpu_ras_query_error_status(struct amdgpu_device *adev, ...@@ -967,7 +967,6 @@ int amdgpu_ras_query_error_status(struct amdgpu_device *adev,
struct amdgpu_ras_block_object* block_obj = NULL; struct amdgpu_ras_block_object* block_obj = NULL;
struct ras_manager *obj = amdgpu_ras_find_obj(adev, &info->head); struct ras_manager *obj = amdgpu_ras_find_obj(adev, &info->head);
struct ras_err_data err_data = {0, 0, 0, NULL}; struct ras_err_data err_data = {0, 0, 0, NULL};
int i;
if (!obj) if (!obj)
return -EINVAL; return -EINVAL;
...@@ -979,12 +978,6 @@ int amdgpu_ras_query_error_status(struct amdgpu_device *adev, ...@@ -979,12 +978,6 @@ int amdgpu_ras_query_error_status(struct amdgpu_device *adev,
amdgpu_ras_get_ecc_info(adev, &err_data); amdgpu_ras_get_ecc_info(adev, &err_data);
break; break;
case AMDGPU_RAS_BLOCK__SDMA: case AMDGPU_RAS_BLOCK__SDMA:
if (adev->sdma.funcs->query_ras_error_count) {
for (i = 0; i < adev->sdma.num_instances; i++)
adev->sdma.funcs->query_ras_error_count(adev, i,
&err_data);
}
break;
case AMDGPU_RAS_BLOCK__GFX: case AMDGPU_RAS_BLOCK__GFX:
case AMDGPU_RAS_BLOCK__MMHUB: case AMDGPU_RAS_BLOCK__MMHUB:
if (!block_obj || !block_obj->hw_ops) { if (!block_obj || !block_obj->hw_ops) {
...@@ -1090,9 +1083,6 @@ int amdgpu_ras_reset_error_status(struct amdgpu_device *adev, ...@@ -1090,9 +1083,6 @@ int amdgpu_ras_reset_error_status(struct amdgpu_device *adev,
block_obj->hw_ops->reset_ras_error_status(adev); block_obj->hw_ops->reset_ras_error_status(adev);
break; break;
case AMDGPU_RAS_BLOCK__SDMA: case AMDGPU_RAS_BLOCK__SDMA:
if (adev->sdma.funcs->reset_ras_error_count)
adev->sdma.funcs->reset_ras_error_count(adev);
break;
case AMDGPU_RAS_BLOCK__HDP: case AMDGPU_RAS_BLOCK__HDP:
if (!block_obj || !block_obj->hw_ops) { if (!block_obj || !block_obj->hw_ops) {
dev_info(adev->dev, "%s doesn't config ras function \n", ras_block_str(block)); dev_info(adev->dev, "%s doesn't config ras function \n", ras_block_str(block));
......
...@@ -23,6 +23,7 @@ ...@@ -23,6 +23,7 @@
#ifndef __AMDGPU_SDMA_H__ #ifndef __AMDGPU_SDMA_H__
#define __AMDGPU_SDMA_H__ #define __AMDGPU_SDMA_H__
#include "amdgpu_ras.h"
/* max number of IP instances */ /* max number of IP instances */
#define AMDGPU_MAX_SDMA_INSTANCES 8 #define AMDGPU_MAX_SDMA_INSTANCES 8
...@@ -50,13 +51,8 @@ struct amdgpu_sdma_instance { ...@@ -50,13 +51,8 @@ struct amdgpu_sdma_instance {
bool burst_nop; bool burst_nop;
}; };
struct amdgpu_sdma_ras_funcs { struct amdgpu_sdma_ras {
int (*ras_late_init)(struct amdgpu_device *adev, struct amdgpu_ras_block_object ras_block;
void *ras_ih_info);
void (*ras_fini)(struct amdgpu_device *adev);
int (*query_ras_error_count)(struct amdgpu_device *adev,
uint32_t instance, void *ras_error_status);
void (*reset_ras_error_count)(struct amdgpu_device *adev);
}; };
struct amdgpu_sdma { struct amdgpu_sdma {
...@@ -73,7 +69,7 @@ struct amdgpu_sdma { ...@@ -73,7 +69,7 @@ struct amdgpu_sdma {
uint32_t srbm_soft_reset; uint32_t srbm_soft_reset;
bool has_page_queue; bool has_page_queue;
struct ras_common_if *ras_if; struct ras_common_if *ras_if;
const struct amdgpu_sdma_ras_funcs *funcs; struct amdgpu_sdma_ras *ras;
}; };
/* /*
......
...@@ -1892,13 +1892,13 @@ static int sdma_v4_0_late_init(void *handle) ...@@ -1892,13 +1892,13 @@ static int sdma_v4_0_late_init(void *handle)
sdma_v4_0_setup_ulv(adev); sdma_v4_0_setup_ulv(adev);
if (!amdgpu_persistent_edc_harvesting_supported(adev)) { if (!amdgpu_persistent_edc_harvesting_supported(adev)) {
if (adev->sdma.funcs && if (adev->sdma.ras && adev->sdma.ras->ras_block.hw_ops &&
adev->sdma.funcs->reset_ras_error_count) adev->sdma.ras->ras_block.hw_ops->reset_ras_error_count)
adev->sdma.funcs->reset_ras_error_count(adev); adev->sdma.ras->ras_block.hw_ops->reset_ras_error_count(adev);
} }
if (adev->sdma.funcs && adev->sdma.funcs->ras_late_init) if (adev->sdma.ras && adev->sdma.ras->ras_block.ras_late_init)
return adev->sdma.funcs->ras_late_init(adev, &ih_info); return adev->sdma.ras->ras_block.ras_late_init(adev, &ih_info);
else else
return 0; return 0;
} }
...@@ -2001,8 +2001,9 @@ static int sdma_v4_0_sw_fini(void *handle) ...@@ -2001,8 +2001,9 @@ static int sdma_v4_0_sw_fini(void *handle)
struct amdgpu_device *adev = (struct amdgpu_device *)handle; struct amdgpu_device *adev = (struct amdgpu_device *)handle;
int i; int i;
if (adev->sdma.funcs && adev->sdma.funcs->ras_fini) if (adev->sdma.ras && adev->sdma.ras->ras_block.hw_ops &&
adev->sdma.funcs->ras_fini(adev); adev->sdma.ras->ras_block.ras_fini)
adev->sdma.ras->ras_block.ras_fini(adev);
for (i = 0; i < adev->sdma.num_instances; i++) { for (i = 0; i < adev->sdma.num_instances; i++) {
amdgpu_ring_fini(&adev->sdma.instance[i].ring); amdgpu_ring_fini(&adev->sdma.instance[i].ring);
...@@ -2740,7 +2741,7 @@ static void sdma_v4_0_get_ras_error_count(uint32_t value, ...@@ -2740,7 +2741,7 @@ static void sdma_v4_0_get_ras_error_count(uint32_t value,
} }
} }
static int sdma_v4_0_query_ras_error_count(struct amdgpu_device *adev, static int sdma_v4_0_query_ras_error_count_by_instance(struct amdgpu_device *adev,
uint32_t instance, void *ras_error_status) uint32_t instance, void *ras_error_status)
{ {
struct ras_err_data *err_data = (struct ras_err_data *)ras_error_status; struct ras_err_data *err_data = (struct ras_err_data *)ras_error_status;
...@@ -2762,6 +2763,18 @@ static int sdma_v4_0_query_ras_error_count(struct amdgpu_device *adev, ...@@ -2762,6 +2763,18 @@ static int sdma_v4_0_query_ras_error_count(struct amdgpu_device *adev,
return 0; return 0;
}; };
static void sdma_v4_0_query_ras_error_count(struct amdgpu_device *adev, void *ras_error_status)
{
int i = 0;
for (i = 0; i < adev->sdma.num_instances; i++) {
if (sdma_v4_0_query_ras_error_count_by_instance(adev, i, ras_error_status))
{
dev_err(adev->dev, "Query ras error count failed in SDMA%d \n", i);
return;
}
}
}
static void sdma_v4_0_reset_ras_error_count(struct amdgpu_device *adev) static void sdma_v4_0_reset_ras_error_count(struct amdgpu_device *adev)
{ {
int i; int i;
...@@ -2773,26 +2786,45 @@ static void sdma_v4_0_reset_ras_error_count(struct amdgpu_device *adev) ...@@ -2773,26 +2786,45 @@ static void sdma_v4_0_reset_ras_error_count(struct amdgpu_device *adev)
} }
} }
static const struct amdgpu_sdma_ras_funcs sdma_v4_0_ras_funcs = { const struct amdgpu_ras_block_hw_ops sdma_v4_0_ras_hw_ops = {
.ras_late_init = amdgpu_sdma_ras_late_init,
.ras_fini = amdgpu_sdma_ras_fini,
.query_ras_error_count = sdma_v4_0_query_ras_error_count, .query_ras_error_count = sdma_v4_0_query_ras_error_count,
.reset_ras_error_count = sdma_v4_0_reset_ras_error_count, .reset_ras_error_count = sdma_v4_0_reset_ras_error_count,
}; };
static struct amdgpu_sdma_ras sdma_v4_0_ras = {
.ras_block = {
.hw_ops = &sdma_v4_0_ras_hw_ops,
},
};
static void sdma_v4_0_set_ras_funcs(struct amdgpu_device *adev) static void sdma_v4_0_set_ras_funcs(struct amdgpu_device *adev)
{ {
switch (adev->ip_versions[SDMA0_HWIP][0]) { switch (adev->ip_versions[SDMA0_HWIP][0]) {
case IP_VERSION(4, 2, 0): case IP_VERSION(4, 2, 0):
case IP_VERSION(4, 2, 2): case IP_VERSION(4, 2, 2):
adev->sdma.funcs = &sdma_v4_0_ras_funcs; adev->sdma.ras = &sdma_v4_0_ras;
break; break;
case IP_VERSION(4, 4, 0): case IP_VERSION(4, 4, 0):
adev->sdma.funcs = &sdma_v4_4_ras_funcs; adev->sdma.ras = &sdma_v4_4_ras;
break; break;
default: default:
break; break;
} }
if (adev->sdma.ras) {
amdgpu_ras_register_ras_block(adev, &adev->sdma.ras->ras_block);
strcpy(adev->sdma.ras->ras_block.name,"sdma");
adev->sdma.ras->ras_block.block = AMDGPU_RAS_BLOCK__SDMA;
/* If don't define special ras_late_init function, use default ras_late_init */
if (!adev->sdma.ras->ras_block.ras_late_init)
adev->sdma.ras->ras_block.ras_late_init = amdgpu_sdma_ras_late_init;
/* If don't define special ras_fini function, use default ras_fini */
if (!adev->sdma.ras->ras_block.ras_fini)
adev->sdma.ras->ras_block.ras_fini = amdgpu_sdma_ras_fini;
}
} }
const struct amdgpu_ip_block_version sdma_v4_0_ip_block = { const struct amdgpu_ip_block_version sdma_v4_0_ip_block = {
......
...@@ -188,7 +188,7 @@ static void sdma_v4_4_get_ras_error_count(struct amdgpu_device *adev, ...@@ -188,7 +188,7 @@ static void sdma_v4_4_get_ras_error_count(struct amdgpu_device *adev,
} }
} }
static int sdma_v4_4_query_ras_error_count(struct amdgpu_device *adev, static int sdma_v4_4_query_ras_error_count_by_instance(struct amdgpu_device *adev,
uint32_t instance, uint32_t instance,
void *ras_error_status) void *ras_error_status)
{ {
...@@ -245,9 +245,26 @@ static void sdma_v4_4_reset_ras_error_count(struct amdgpu_device *adev) ...@@ -245,9 +245,26 @@ static void sdma_v4_4_reset_ras_error_count(struct amdgpu_device *adev)
} }
} }
const struct amdgpu_sdma_ras_funcs sdma_v4_4_ras_funcs = { static void sdma_v4_4_query_ras_error_count(struct amdgpu_device *adev, void *ras_error_status)
.ras_late_init = amdgpu_sdma_ras_late_init, {
.ras_fini = amdgpu_sdma_ras_fini, int i = 0;
for (i = 0; i < adev->sdma.num_instances; i++) {
if (sdma_v4_4_query_ras_error_count_by_instance(adev, i, ras_error_status))
{
dev_err(adev->dev, "Query ras error count failed in SDMA%d \n", i);
return;
}
}
}
const struct amdgpu_ras_block_hw_ops sdma_v4_4_ras_hw_ops = {
.query_ras_error_count = sdma_v4_4_query_ras_error_count, .query_ras_error_count = sdma_v4_4_query_ras_error_count,
.reset_ras_error_count = sdma_v4_4_reset_ras_error_count, .reset_ras_error_count = sdma_v4_4_reset_ras_error_count,
}; };
struct amdgpu_sdma_ras sdma_v4_4_ras = {
.ras_block = {
.hw_ops = &sdma_v4_4_ras_hw_ops,
},
};
...@@ -23,6 +23,6 @@ ...@@ -23,6 +23,6 @@
#ifndef __SDMA_V4_4_H__ #ifndef __SDMA_V4_4_H__
#define __SDMA_V4_4_H__ #define __SDMA_V4_4_H__
extern const struct amdgpu_sdma_ras_funcs sdma_v4_4_ras_funcs; extern struct amdgpu_sdma_ras sdma_v4_4_ras;
#endif #endif
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册