提交 400013b2 编写于 作者: T Tao Zhou 提交者: Alex Deucher

drm/amdgpu: add umc_fill_error_record to make code more simple

Create common amdgpu_umc_fill_error_record function for all versions
of UMC and clean up related codes.
Signed-off-by: NTao Zhou <tao.zhou1@amd.com>
Reviewed-by: NHawking Zhang <Hawking.Zhang@amd.com>
Signed-off-by: NAlex Deucher <alexander.deucher@amd.com>
上级 9a176960
......@@ -158,14 +158,9 @@ static int amdgpu_reserve_page_direct(struct amdgpu_device *adev, uint64_t addre
}
memset(&err_rec, 0x0, sizeof(struct eeprom_table_record));
err_rec.address = address;
err_rec.retired_page = address >> AMDGPU_GPU_PAGE_SHIFT;
err_rec.ts = (uint64_t)ktime_get_real_seconds();
err_rec.err_type = AMDGPU_RAS_EEPROM_ERR_NON_RECOVERABLE;
err_data.err_addr = &err_rec;
err_data.err_addr_cnt = 1;
amdgpu_umc_fill_error_record(&err_data, address,
(address >> AMDGPU_GPU_PAGE_SHIFT), 0, 0);
if (amdgpu_bad_page_threshold != 0) {
amdgpu_ras_add_bad_pages(adev, err_data.err_addr,
......@@ -2660,8 +2655,6 @@ static int amdgpu_bad_page_notifier(struct notifier_block *nb,
dev_info(adev->dev, "Uncorrectable error detected in UMC inst: %d, chan_idx: %d",
umc_inst, ch_inst);
memset(&err_rec, 0x0, sizeof(struct eeprom_table_record));
/*
* Translate UMC channel address to Physical address
*/
......@@ -2673,16 +2666,10 @@ static int amdgpu_bad_page_notifier(struct notifier_block *nb,
ADDR_OF_256B_BLOCK(channel_index) |
OFFSET_IN_256B_BLOCK(m->addr);
err_rec.address = m->addr;
err_rec.retired_page = retired_page >> AMDGPU_GPU_PAGE_SHIFT;
err_rec.ts = (uint64_t)ktime_get_real_seconds();
err_rec.err_type = AMDGPU_RAS_EEPROM_ERR_NON_RECOVERABLE;
err_rec.cu = 0;
err_rec.mem_channel = channel_index;
err_rec.mcumc_id = umc_inst;
memset(&err_rec, 0x0, sizeof(struct eeprom_table_record));
err_data.err_addr = &err_rec;
err_data.err_addr_cnt = 1;
amdgpu_umc_fill_error_record(&err_data, m->addr,
retired_page, channel_index, umc_inst);
if (amdgpu_bad_page_threshold != 0) {
amdgpu_ras_add_bad_pages(adev, err_data.err_addr,
......
......@@ -218,3 +218,24 @@ int amdgpu_umc_process_ecc_irq(struct amdgpu_device *adev,
amdgpu_ras_interrupt_dispatch(adev, &ih_data);
return 0;
}
void amdgpu_umc_fill_error_record(struct ras_err_data *err_data,
uint64_t err_addr,
uint64_t retired_page,
uint32_t channel_index,
uint32_t umc_inst)
{
struct eeprom_table_record *err_rec =
&err_data->err_addr[err_data->err_addr_cnt];
err_rec->address = err_addr;
/* page frame address is saved */
err_rec->retired_page = retired_page >> AMDGPU_GPU_PAGE_SHIFT;
err_rec->ts = (uint64_t)ktime_get_real_seconds();
err_rec->err_type = AMDGPU_RAS_EEPROM_ERR_NON_RECOVERABLE;
err_rec->cu = 0;
err_rec->mem_channel = channel_index;
err_rec->mcumc_id = umc_inst;
err_data->err_addr_cnt++;
}
......@@ -80,4 +80,9 @@ int amdgpu_umc_poison_handler(struct amdgpu_device *adev,
int amdgpu_umc_process_ecc_irq(struct amdgpu_device *adev,
struct amdgpu_irq_src *source,
struct amdgpu_iv_entry *entry);
void amdgpu_umc_fill_error_record(struct ras_err_data *err_data,
uint64_t err_addr,
uint64_t retired_page,
uint32_t channel_index,
uint32_t umc_inst);
#endif
......@@ -300,7 +300,6 @@ static void umc_v6_1_query_error_address(struct amdgpu_device *adev,
{
uint32_t lsb, mc_umc_status_addr;
uint64_t mc_umc_status, err_addr, retired_page, mc_umc_addrt0;
struct eeprom_table_record *err_rec;
uint32_t channel_index = adev->umc.channel_idx_tbl[umc_inst * adev->umc.channel_inst_num + ch_inst];
if (adev->asic_type == CHIP_ARCTURUS) {
......@@ -328,8 +327,6 @@ static void umc_v6_1_query_error_address(struct amdgpu_device *adev,
return;
}
err_rec = &err_data->err_addr[err_data->err_addr_cnt];
/* calculate error address if ue/ce error is detected */
if (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Val) == 1 &&
(REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, UECC) == 1 ||
......@@ -348,18 +345,9 @@ static void umc_v6_1_query_error_address(struct amdgpu_device *adev,
/* we only save ue error information currently, ce is skipped */
if (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, UECC)
== 1) {
err_rec->address = err_addr;
/* page frame address is saved */
err_rec->retired_page = retired_page >> AMDGPU_GPU_PAGE_SHIFT;
err_rec->ts = (uint64_t)ktime_get_real_seconds();
err_rec->err_type = AMDGPU_RAS_EEPROM_ERR_NON_RECOVERABLE;
err_rec->cu = 0;
err_rec->mem_channel = channel_index;
err_rec->mcumc_id = umc_inst;
err_data->err_addr_cnt++;
}
== 1)
amdgpu_umc_fill_error_record(err_data, err_addr,
retired_page, channel_index, umc_inst);
}
/* clear umc status */
......
......@@ -120,7 +120,6 @@ static void umc_v6_7_ecc_info_query_error_address(struct amdgpu_device *adev,
uint32_t umc_inst)
{
uint64_t mc_umc_status, err_addr, retired_page;
struct eeprom_table_record *err_rec;
uint32_t channel_index;
uint32_t eccinfo_table_idx;
struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
......@@ -137,8 +136,6 @@ static void umc_v6_7_ecc_info_query_error_address(struct amdgpu_device *adev,
if (!err_data->err_addr)
return;
err_rec = &err_data->err_addr[err_data->err_addr_cnt];
/* calculate error address if ue/ce error is detected */
if (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Val) == 1 &&
(REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, UECC) == 1 ||
......@@ -154,18 +151,9 @@ static void umc_v6_7_ecc_info_query_error_address(struct amdgpu_device *adev,
/* we only save ue error information currently, ce is skipped */
if (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, UECC)
== 1) {
err_rec->address = err_addr;
/* page frame address is saved */
err_rec->retired_page = retired_page >> AMDGPU_GPU_PAGE_SHIFT;
err_rec->ts = (uint64_t)ktime_get_real_seconds();
err_rec->err_type = AMDGPU_RAS_EEPROM_ERR_NON_RECOVERABLE;
err_rec->cu = 0;
err_rec->mem_channel = channel_index;
err_rec->mcumc_id = umc_inst;
err_data->err_addr_cnt++;
}
== 1)
amdgpu_umc_fill_error_record(err_data, err_addr,
retired_page, channel_index, umc_inst);
}
}
......@@ -345,7 +333,6 @@ static void umc_v6_7_query_error_address(struct amdgpu_device *adev,
{
uint32_t mc_umc_status_addr;
uint64_t mc_umc_status, err_addr, retired_page, mc_umc_addrt0;
struct eeprom_table_record *err_rec;
uint32_t channel_index;
mc_umc_status_addr =
......@@ -364,8 +351,6 @@ static void umc_v6_7_query_error_address(struct amdgpu_device *adev,
return;
}
err_rec = &err_data->err_addr[err_data->err_addr_cnt];
channel_index =
adev->umc.channel_idx_tbl[umc_inst * adev->umc.channel_inst_num + ch_inst];
......@@ -384,18 +369,9 @@ static void umc_v6_7_query_error_address(struct amdgpu_device *adev,
/* we only save ue error information currently, ce is skipped */
if (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, UECC)
== 1) {
err_rec->address = err_addr;
/* page frame address is saved */
err_rec->retired_page = retired_page >> AMDGPU_GPU_PAGE_SHIFT;
err_rec->ts = (uint64_t)ktime_get_real_seconds();
err_rec->err_type = AMDGPU_RAS_EEPROM_ERR_NON_RECOVERABLE;
err_rec->cu = 0;
err_rec->mem_channel = channel_index;
err_rec->mcumc_id = umc_inst;
err_data->err_addr_cnt++;
}
== 1)
amdgpu_umc_fill_error_record(err_data, err_addr,
retired_page, channel_index, umc_inst);
}
/* clear umc status */
......
......@@ -121,7 +121,6 @@ static void umc_v8_7_ecc_info_query_error_address(struct amdgpu_device *adev,
uint32_t umc_inst)
{
uint64_t mc_umc_status, err_addr, retired_page;
struct eeprom_table_record *err_rec;
uint32_t channel_index;
uint32_t eccinfo_table_idx;
struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
......@@ -138,8 +137,6 @@ static void umc_v8_7_ecc_info_query_error_address(struct amdgpu_device *adev,
if (!err_data->err_addr)
return;
err_rec = &err_data->err_addr[err_data->err_addr_cnt];
/* calculate error address if ue/ce error is detected */
if (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Val) == 1 &&
(REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, UECC) == 1 ||
......@@ -155,18 +152,9 @@ static void umc_v8_7_ecc_info_query_error_address(struct amdgpu_device *adev,
/* we only save ue error information currently, ce is skipped */
if (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, UECC)
== 1) {
err_rec->address = err_addr;
/* page frame address is saved */
err_rec->retired_page = retired_page >> AMDGPU_GPU_PAGE_SHIFT;
err_rec->ts = (uint64_t)ktime_get_real_seconds();
err_rec->err_type = AMDGPU_RAS_EEPROM_ERR_NON_RECOVERABLE;
err_rec->cu = 0;
err_rec->mem_channel = channel_index;
err_rec->mcumc_id = umc_inst;
err_data->err_addr_cnt++;
}
== 1)
amdgpu_umc_fill_error_record(err_data, err_addr,
retired_page, channel_index, umc_inst);
}
}
......@@ -344,7 +332,6 @@ static void umc_v8_7_query_error_address(struct amdgpu_device *adev,
{
uint32_t lsb, mc_umc_status_addr;
uint64_t mc_umc_status, err_addr, retired_page, mc_umc_addrt0;
struct eeprom_table_record *err_rec;
uint32_t channel_index = adev->umc.channel_idx_tbl[umc_inst * adev->umc.channel_inst_num + ch_inst];
mc_umc_status_addr =
......@@ -363,8 +350,6 @@ static void umc_v8_7_query_error_address(struct amdgpu_device *adev,
return;
}
err_rec = &err_data->err_addr[err_data->err_addr_cnt];
/* calculate error address if ue/ce error is detected */
if (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Val) == 1 &&
(REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, UECC) == 1 ||
......@@ -383,18 +368,9 @@ static void umc_v8_7_query_error_address(struct amdgpu_device *adev,
/* we only save ue error information currently, ce is skipped */
if (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, UECC)
== 1) {
err_rec->address = err_addr;
/* page frame address is saved */
err_rec->retired_page = retired_page >> AMDGPU_GPU_PAGE_SHIFT;
err_rec->ts = (uint64_t)ktime_get_real_seconds();
err_rec->err_type = AMDGPU_RAS_EEPROM_ERR_NON_RECOVERABLE;
err_rec->cu = 0;
err_rec->mem_channel = channel_index;
err_rec->mcumc_id = umc_inst;
err_data->err_addr_cnt++;
}
== 1)
amdgpu_umc_fill_error_record(err_data, err_addr,
retired_page, channel_index, umc_inst);
}
/* clear umc status */
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册