提交 52137ca8 编写于 作者: H Hawking Zhang 提交者: Alex Deucher

drm/amdgpu: move xgmi ras functions to xgmi_ras_funcs

xgmi ras is not managed by gpu driver when gpu is
connected to cpu through xgmi. move all xgmi ras
functions to xgmi_ras_funcs so gpu driver only
initializes xgmi ras functions when it manages
xgmi ras.
Signed-off-by: NHawking Zhang <Hawking.Zhang@amd.com>
Reviewed-by: NDennis Li <Dennis.Li@amd.com>
Reviewed-by: NJohn Clements <John.Clements@amd.com>
Signed-off-by: NAlex Deucher <alexander.deucher@amd.com>
上级 6e36f231
...@@ -403,14 +403,26 @@ int amdgpu_gmc_ras_late_init(struct amdgpu_device *adev) ...@@ -403,14 +403,26 @@ int amdgpu_gmc_ras_late_init(struct amdgpu_device *adev)
return r; return r;
} }
return amdgpu_xgmi_ras_late_init(adev); if (!adev->gmc.xgmi.connected_to_cpu)
adev->gmc.xgmi.ras_funcs = &xgmi_ras_funcs;
if (adev->gmc.xgmi.ras_funcs &&
adev->gmc.xgmi.ras_funcs->ras_late_init) {
r = adev->gmc.xgmi.ras_funcs->ras_late_init(adev);
if (r)
return r;
}
return 0;
} }
void amdgpu_gmc_ras_fini(struct amdgpu_device *adev) void amdgpu_gmc_ras_fini(struct amdgpu_device *adev)
{ {
amdgpu_umc_ras_fini(adev); amdgpu_umc_ras_fini(adev);
amdgpu_mmhub_ras_fini(adev); amdgpu_mmhub_ras_fini(adev);
amdgpu_xgmi_ras_fini(adev); if (adev->gmc.xgmi.ras_funcs &&
adev->gmc.xgmi.ras_funcs->ras_fini)
adev->gmc.xgmi.ras_funcs->ras_fini(adev);
} }
/* /*
......
...@@ -135,6 +135,14 @@ struct amdgpu_gmc_funcs { ...@@ -135,6 +135,14 @@ struct amdgpu_gmc_funcs {
unsigned int (*get_vbios_fb_size)(struct amdgpu_device *adev); unsigned int (*get_vbios_fb_size)(struct amdgpu_device *adev);
}; };
struct amdgpu_xgmi_ras_funcs {
int (*ras_late_init)(struct amdgpu_device *adev);
void (*ras_fini)(struct amdgpu_device *adev);
int (*query_ras_error_count)(struct amdgpu_device *adev,
void *ras_error_status);
void (*reset_ras_error_count)(struct amdgpu_device *adev);
};
struct amdgpu_xgmi { struct amdgpu_xgmi {
/* from psp */ /* from psp */
u64 node_id; u64 node_id;
...@@ -151,6 +159,7 @@ struct amdgpu_xgmi { ...@@ -151,6 +159,7 @@ struct amdgpu_xgmi {
struct ras_common_if *ras_if; struct ras_common_if *ras_if;
bool connected_to_cpu; bool connected_to_cpu;
bool pending_reset; bool pending_reset;
const struct amdgpu_xgmi_ras_funcs *ras_funcs;
}; };
struct amdgpu_gmc { struct amdgpu_gmc {
......
...@@ -809,7 +809,9 @@ int amdgpu_ras_query_error_status(struct amdgpu_device *adev, ...@@ -809,7 +809,9 @@ int amdgpu_ras_query_error_status(struct amdgpu_device *adev,
adev->nbio.ras_funcs->query_ras_error_count(adev, &err_data); adev->nbio.ras_funcs->query_ras_error_count(adev, &err_data);
break; break;
case AMDGPU_RAS_BLOCK__XGMI_WAFL: case AMDGPU_RAS_BLOCK__XGMI_WAFL:
amdgpu_xgmi_query_ras_error_count(adev, &err_data); if (adev->gmc.xgmi.ras_funcs &&
adev->gmc.xgmi.ras_funcs->query_ras_error_count)
adev->gmc.xgmi.ras_funcs->query_ras_error_count(adev, &err_data);
break; break;
default: default:
break; break;
......
...@@ -628,7 +628,7 @@ int amdgpu_xgmi_remove_device(struct amdgpu_device *adev) ...@@ -628,7 +628,7 @@ int amdgpu_xgmi_remove_device(struct amdgpu_device *adev)
return psp_xgmi_terminate(&adev->psp); return psp_xgmi_terminate(&adev->psp);
} }
int amdgpu_xgmi_ras_late_init(struct amdgpu_device *adev) static int amdgpu_xgmi_ras_late_init(struct amdgpu_device *adev)
{ {
int r; int r;
struct ras_ih_if ih_info = { struct ras_ih_if ih_info = {
...@@ -642,7 +642,7 @@ int amdgpu_xgmi_ras_late_init(struct amdgpu_device *adev) ...@@ -642,7 +642,7 @@ int amdgpu_xgmi_ras_late_init(struct amdgpu_device *adev)
adev->gmc.xgmi.num_physical_nodes == 0) adev->gmc.xgmi.num_physical_nodes == 0)
return 0; return 0;
amdgpu_xgmi_reset_ras_error_count(adev); adev->gmc.xgmi.ras_funcs->reset_ras_error_count(adev);
if (!adev->gmc.xgmi.ras_if) { if (!adev->gmc.xgmi.ras_if) {
adev->gmc.xgmi.ras_if = kmalloc(sizeof(struct ras_common_if), GFP_KERNEL); adev->gmc.xgmi.ras_if = kmalloc(sizeof(struct ras_common_if), GFP_KERNEL);
...@@ -664,7 +664,7 @@ int amdgpu_xgmi_ras_late_init(struct amdgpu_device *adev) ...@@ -664,7 +664,7 @@ int amdgpu_xgmi_ras_late_init(struct amdgpu_device *adev)
return r; return r;
} }
void amdgpu_xgmi_ras_fini(struct amdgpu_device *adev) static void amdgpu_xgmi_ras_fini(struct amdgpu_device *adev)
{ {
if (amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__XGMI_WAFL) && if (amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__XGMI_WAFL) &&
adev->gmc.xgmi.ras_if) { adev->gmc.xgmi.ras_if) {
...@@ -691,7 +691,7 @@ static void pcs_clear_status(struct amdgpu_device *adev, uint32_t pcs_status_reg ...@@ -691,7 +691,7 @@ static void pcs_clear_status(struct amdgpu_device *adev, uint32_t pcs_status_reg
WREG32_PCIE(pcs_status_reg, 0); WREG32_PCIE(pcs_status_reg, 0);
} }
void amdgpu_xgmi_reset_ras_error_count(struct amdgpu_device *adev) static void amdgpu_xgmi_reset_ras_error_count(struct amdgpu_device *adev)
{ {
uint32_t i; uint32_t i;
...@@ -751,7 +751,7 @@ static int amdgpu_xgmi_query_pcs_error_status(struct amdgpu_device *adev, ...@@ -751,7 +751,7 @@ static int amdgpu_xgmi_query_pcs_error_status(struct amdgpu_device *adev,
return 0; return 0;
} }
int amdgpu_xgmi_query_ras_error_count(struct amdgpu_device *adev, static int amdgpu_xgmi_query_ras_error_count(struct amdgpu_device *adev,
void *ras_error_status) void *ras_error_status)
{ {
struct ras_err_data *err_data = (struct ras_err_data *)ras_error_status; struct ras_err_data *err_data = (struct ras_err_data *)ras_error_status;
...@@ -801,10 +801,17 @@ int amdgpu_xgmi_query_ras_error_count(struct amdgpu_device *adev, ...@@ -801,10 +801,17 @@ int amdgpu_xgmi_query_ras_error_count(struct amdgpu_device *adev,
break; break;
} }
amdgpu_xgmi_reset_ras_error_count(adev); adev->gmc.xgmi.ras_funcs->reset_ras_error_count(adev);
err_data->ue_count += ue_cnt; err_data->ue_count += ue_cnt;
err_data->ce_count += ce_cnt; err_data->ce_count += ce_cnt;
return 0; return 0;
} }
const struct amdgpu_xgmi_ras_funcs xgmi_ras_funcs = {
.ras_late_init = amdgpu_xgmi_ras_late_init,
.ras_fini = amdgpu_xgmi_ras_fini,
.query_ras_error_count = amdgpu_xgmi_query_ras_error_count,
.reset_ras_error_count = amdgpu_xgmi_reset_ras_error_count,
};
...@@ -50,6 +50,7 @@ struct amdgpu_pcs_ras_field { ...@@ -50,6 +50,7 @@ struct amdgpu_pcs_ras_field {
uint32_t pcs_err_shift; uint32_t pcs_err_shift;
}; };
extern const struct amdgpu_xgmi_ras_funcs xgmi_ras_funcs;
struct amdgpu_hive_info *amdgpu_get_xgmi_hive(struct amdgpu_device *adev); struct amdgpu_hive_info *amdgpu_get_xgmi_hive(struct amdgpu_device *adev);
void amdgpu_put_xgmi_hive(struct amdgpu_hive_info *hive); void amdgpu_put_xgmi_hive(struct amdgpu_hive_info *hive);
int amdgpu_xgmi_update_topology(struct amdgpu_hive_info *hive, struct amdgpu_device *adev); int amdgpu_xgmi_update_topology(struct amdgpu_hive_info *hive, struct amdgpu_device *adev);
...@@ -58,14 +59,8 @@ int amdgpu_xgmi_remove_device(struct amdgpu_device *adev); ...@@ -58,14 +59,8 @@ int amdgpu_xgmi_remove_device(struct amdgpu_device *adev);
int amdgpu_xgmi_set_pstate(struct amdgpu_device *adev, int pstate); int amdgpu_xgmi_set_pstate(struct amdgpu_device *adev, int pstate);
int amdgpu_xgmi_get_hops_count(struct amdgpu_device *adev, int amdgpu_xgmi_get_hops_count(struct amdgpu_device *adev,
struct amdgpu_device *peer_adev); struct amdgpu_device *peer_adev);
int amdgpu_xgmi_ras_late_init(struct amdgpu_device *adev);
void amdgpu_xgmi_ras_fini(struct amdgpu_device *adev);
uint64_t amdgpu_xgmi_get_relative_phy_addr(struct amdgpu_device *adev, uint64_t amdgpu_xgmi_get_relative_phy_addr(struct amdgpu_device *adev,
uint64_t addr); uint64_t addr);
int amdgpu_xgmi_query_ras_error_count(struct amdgpu_device *adev,
void *ras_error_status);
void amdgpu_xgmi_reset_ras_error_count(struct amdgpu_device *adev);
static inline bool amdgpu_xgmi_same_hive(struct amdgpu_device *adev, static inline bool amdgpu_xgmi_same_hive(struct amdgpu_device *adev,
struct amdgpu_device *bo_adev) struct amdgpu_device *bo_adev)
{ {
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册