提交 d510eccf 编写于 作者: S Stanley.Yang 提交者: Alex Deucher

drm/amd/pm: add send bad channel info function

support message SMU update bad channel info to update HBM bad channel
info in OOB table
Signed-off-by: NStanley.Yang <Stanley.Yang@amd.com>
Reviewed-by: NTao Zhou <tao.zhou1@amd.com>
Signed-off-by: NAlex Deucher <alexander.deucher@amd.com>
上级 6e6faf7a
......@@ -507,6 +507,18 @@ int amdgpu_dpm_send_hbm_bad_pages_num(struct amdgpu_device *adev, uint32_t size)
return ret;
}
int amdgpu_dpm_send_hbm_bad_channel_flag(struct amdgpu_device *adev, uint32_t size)
{
struct smu_context *smu = adev->powerplay.pp_handle;
int ret = 0;
mutex_lock(&adev->pm.mutex);
ret = smu_send_hbm_bad_channel_flag(smu, size);
mutex_unlock(&adev->pm.mutex);
return ret;
}
int amdgpu_dpm_get_dpm_freq_range(struct amdgpu_device *adev,
enum pp_clock_type type,
uint32_t *min,
......
......@@ -412,6 +412,7 @@ void amdgpu_dpm_enable_jpeg(struct amdgpu_device *adev, bool enable);
int amdgpu_pm_load_smu_firmware(struct amdgpu_device *adev, uint32_t *smu_version);
int amdgpu_dpm_handle_passthrough_sbr(struct amdgpu_device *adev, bool enable);
int amdgpu_dpm_send_hbm_bad_pages_num(struct amdgpu_device *adev, uint32_t size);
int amdgpu_dpm_send_hbm_bad_channel_flag(struct amdgpu_device *adev, uint32_t size);
int amdgpu_dpm_get_dpm_freq_range(struct amdgpu_device *adev,
enum pp_clock_type type,
uint32_t *min,
......
......@@ -3052,3 +3052,13 @@ int smu_send_hbm_bad_pages_num(struct smu_context *smu, uint32_t size)
return ret;
}
int smu_send_hbm_bad_channel_flag(struct smu_context *smu, uint32_t size)
{
int ret = 0;
if (smu->ppt_funcs && smu->ppt_funcs->send_hbm_bad_channel_flag)
ret = smu->ppt_funcs->send_hbm_bad_channel_flag(smu, size);
return ret;
}
......@@ -1292,6 +1292,12 @@ struct pptable_funcs {
* @set_config_table: Apply the input DriverSmuConfig table settings.
*/
int (*set_config_table)(struct smu_context *smu, struct config_table_setting *table);
/**
* @sned_hbm_bad_channel_flag: message SMU to update bad channel info
* of SMUBUS table.
*/
int (*send_hbm_bad_channel_flag)(struct smu_context *smu, uint32_t size);
};
typedef enum {
......@@ -1428,5 +1434,6 @@ int smu_get_ecc_info(struct smu_context *smu, void *umc_ecc);
int smu_stb_collect_info(struct smu_context *smu, void *buff, uint32_t size);
void amdgpu_smu_stb_debug_fs_init(struct amdgpu_device *adev);
int smu_send_hbm_bad_pages_num(struct smu_context *smu, uint32_t size);
int smu_send_hbm_bad_channel_flag(struct smu_context *smu, uint32_t size);
#endif
#endif
......@@ -103,7 +103,8 @@
#define PPSMC_MSG_GfxDriverResetRecovery 0x42
#define PPSMC_MSG_BoardPowerCalibration 0x43
#define PPSMC_MSG_HeavySBR 0x45
#define PPSMC_Message_Count 0x46
#define PPSMC_MSG_SetBadHBMPagesRetiredFlagsPerChannel 0x46
#define PPSMC_Message_Count 0x47
//PPSMC Reset Types
......
......@@ -232,7 +232,8 @@
__SMU_DUMMY_MAP(ForceGfxVid), \
__SMU_DUMMY_MAP(Spare0), \
__SMU_DUMMY_MAP(UnforceGfxVid), \
__SMU_DUMMY_MAP(HeavySBR),
__SMU_DUMMY_MAP(HeavySBR), \
__SMU_DUMMY_MAP(SetBadHBMPagesRetiredFlagsPerChannel),
#undef __SMU_DUMMY_MAP
#define __SMU_DUMMY_MAP(type) SMU_MSG_##type
......
......@@ -82,6 +82,12 @@
*/
#define SUPPORT_ECCTABLE_SMU_VERSION 0x00442a00
/*
* SMU support BAD CHENNEL info MSG since version 68.51.00,
* use this to check ECCTALE feature whether support
*/
#define SUPPORT_BAD_CHANNEL_INFO_MSG_VERSION 0x00443300
static const struct smu_temperature_range smu13_thermal_policy[] =
{
{-273150, 99000, 99000, -273150, 99000, 99000, -273150, 99000, 99000},
......@@ -140,6 +146,7 @@ static const struct cmn2asic_msg_mapping aldebaran_message_map[SMU_MSG_MAX_COUNT
MSG_MAP(GfxDriverResetRecovery, PPSMC_MSG_GfxDriverResetRecovery, 0),
MSG_MAP(BoardPowerCalibration, PPSMC_MSG_BoardPowerCalibration, 0),
MSG_MAP(HeavySBR, PPSMC_MSG_HeavySBR, 0),
MSG_MAP(SetBadHBMPagesRetiredFlagsPerChannel, PPSMC_MSG_SetBadHBMPagesRetiredFlagsPerChannel, 0),
};
static const struct cmn2asic_mapping aldebaran_clk_map[SMU_CLK_COUNT] = {
......@@ -1997,6 +2004,41 @@ static int aldebaran_smu_send_hbm_bad_page_num(struct smu_context *smu,
return ret;
}
static int aldebaran_check_bad_channel_info_support(struct smu_context *smu)
{
uint32_t if_version = 0xff, smu_version = 0xff;
int ret = 0;
ret = smu_cmn_get_smc_version(smu, &if_version, &smu_version);
if (ret) {
/* return not support if failed get smu_version */
ret = -EOPNOTSUPP;
}
if (smu_version < SUPPORT_BAD_CHANNEL_INFO_MSG_VERSION)
ret = -EOPNOTSUPP;
return ret;
}
static int aldebaran_send_hbm_bad_channel_flag(struct smu_context *smu,
uint32_t size)
{
int ret = 0;
ret = aldebaran_check_bad_channel_info_support(smu);
if (ret)
return ret;
/* message SMU to update the bad channel info on SMUBUS */
ret = smu_cmn_send_smc_msg_with_param(smu, SMU_MSG_SetBadHBMPagesRetiredFlagsPerChannel, size, NULL);
if (ret)
dev_err(smu->adev->dev, "[%s] failed to message SMU to update HBM bad channel info\n",
__func__);
return ret;
}
static const struct pptable_funcs aldebaran_ppt_funcs = {
/* init dpm */
.get_allowed_feature_mask = aldebaran_get_allowed_feature_mask,
......@@ -2062,6 +2104,7 @@ static const struct pptable_funcs aldebaran_ppt_funcs = {
.i2c_fini = aldebaran_i2c_control_fini,
.send_hbm_bad_pages_num = aldebaran_smu_send_hbm_bad_page_num,
.get_ecc_info = aldebaran_get_ecc_info,
.send_hbm_bad_channel_flag = aldebaran_send_hbm_bad_channel_flag,
};
void aldebaran_set_ppt_funcs(struct smu_context *smu)
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册