提交 400013b2 编写于 作者: T Tao Zhou 提交者: Alex Deucher

drm/amdgpu: add umc_fill_error_record to make code more simple

Create common amdgpu_umc_fill_error_record function for all versions
of UMC and clean up related codes.
Signed-off-by: NTao Zhou <tao.zhou1@amd.com>
Reviewed-by: NHawking Zhang <Hawking.Zhang@amd.com>
Signed-off-by: NAlex Deucher <alexander.deucher@amd.com>
上级 9a176960
...@@ -158,14 +158,9 @@ static int amdgpu_reserve_page_direct(struct amdgpu_device *adev, uint64_t addre ...@@ -158,14 +158,9 @@ static int amdgpu_reserve_page_direct(struct amdgpu_device *adev, uint64_t addre
} }
memset(&err_rec, 0x0, sizeof(struct eeprom_table_record)); memset(&err_rec, 0x0, sizeof(struct eeprom_table_record));
err_rec.address = address;
err_rec.retired_page = address >> AMDGPU_GPU_PAGE_SHIFT;
err_rec.ts = (uint64_t)ktime_get_real_seconds();
err_rec.err_type = AMDGPU_RAS_EEPROM_ERR_NON_RECOVERABLE;
err_data.err_addr = &err_rec; err_data.err_addr = &err_rec;
err_data.err_addr_cnt = 1; amdgpu_umc_fill_error_record(&err_data, address,
(address >> AMDGPU_GPU_PAGE_SHIFT), 0, 0);
if (amdgpu_bad_page_threshold != 0) { if (amdgpu_bad_page_threshold != 0) {
amdgpu_ras_add_bad_pages(adev, err_data.err_addr, amdgpu_ras_add_bad_pages(adev, err_data.err_addr,
...@@ -2660,8 +2655,6 @@ static int amdgpu_bad_page_notifier(struct notifier_block *nb, ...@@ -2660,8 +2655,6 @@ static int amdgpu_bad_page_notifier(struct notifier_block *nb,
dev_info(adev->dev, "Uncorrectable error detected in UMC inst: %d, chan_idx: %d", dev_info(adev->dev, "Uncorrectable error detected in UMC inst: %d, chan_idx: %d",
umc_inst, ch_inst); umc_inst, ch_inst);
memset(&err_rec, 0x0, sizeof(struct eeprom_table_record));
/* /*
* Translate UMC channel address to Physical address * Translate UMC channel address to Physical address
*/ */
...@@ -2673,16 +2666,10 @@ static int amdgpu_bad_page_notifier(struct notifier_block *nb, ...@@ -2673,16 +2666,10 @@ static int amdgpu_bad_page_notifier(struct notifier_block *nb,
ADDR_OF_256B_BLOCK(channel_index) | ADDR_OF_256B_BLOCK(channel_index) |
OFFSET_IN_256B_BLOCK(m->addr); OFFSET_IN_256B_BLOCK(m->addr);
err_rec.address = m->addr; memset(&err_rec, 0x0, sizeof(struct eeprom_table_record));
err_rec.retired_page = retired_page >> AMDGPU_GPU_PAGE_SHIFT;
err_rec.ts = (uint64_t)ktime_get_real_seconds();
err_rec.err_type = AMDGPU_RAS_EEPROM_ERR_NON_RECOVERABLE;
err_rec.cu = 0;
err_rec.mem_channel = channel_index;
err_rec.mcumc_id = umc_inst;
err_data.err_addr = &err_rec; err_data.err_addr = &err_rec;
err_data.err_addr_cnt = 1; amdgpu_umc_fill_error_record(&err_data, m->addr,
retired_page, channel_index, umc_inst);
if (amdgpu_bad_page_threshold != 0) { if (amdgpu_bad_page_threshold != 0) {
amdgpu_ras_add_bad_pages(adev, err_data.err_addr, amdgpu_ras_add_bad_pages(adev, err_data.err_addr,
......
...@@ -218,3 +218,24 @@ int amdgpu_umc_process_ecc_irq(struct amdgpu_device *adev, ...@@ -218,3 +218,24 @@ int amdgpu_umc_process_ecc_irq(struct amdgpu_device *adev,
amdgpu_ras_interrupt_dispatch(adev, &ih_data); amdgpu_ras_interrupt_dispatch(adev, &ih_data);
return 0; return 0;
} }
void amdgpu_umc_fill_error_record(struct ras_err_data *err_data,
uint64_t err_addr,
uint64_t retired_page,
uint32_t channel_index,
uint32_t umc_inst)
{
struct eeprom_table_record *err_rec =
&err_data->err_addr[err_data->err_addr_cnt];
err_rec->address = err_addr;
/* page frame address is saved */
err_rec->retired_page = retired_page >> AMDGPU_GPU_PAGE_SHIFT;
err_rec->ts = (uint64_t)ktime_get_real_seconds();
err_rec->err_type = AMDGPU_RAS_EEPROM_ERR_NON_RECOVERABLE;
err_rec->cu = 0;
err_rec->mem_channel = channel_index;
err_rec->mcumc_id = umc_inst;
err_data->err_addr_cnt++;
}
...@@ -80,4 +80,9 @@ int amdgpu_umc_poison_handler(struct amdgpu_device *adev, ...@@ -80,4 +80,9 @@ int amdgpu_umc_poison_handler(struct amdgpu_device *adev,
int amdgpu_umc_process_ecc_irq(struct amdgpu_device *adev, int amdgpu_umc_process_ecc_irq(struct amdgpu_device *adev,
struct amdgpu_irq_src *source, struct amdgpu_irq_src *source,
struct amdgpu_iv_entry *entry); struct amdgpu_iv_entry *entry);
void amdgpu_umc_fill_error_record(struct ras_err_data *err_data,
uint64_t err_addr,
uint64_t retired_page,
uint32_t channel_index,
uint32_t umc_inst);
#endif #endif
...@@ -300,7 +300,6 @@ static void umc_v6_1_query_error_address(struct amdgpu_device *adev, ...@@ -300,7 +300,6 @@ static void umc_v6_1_query_error_address(struct amdgpu_device *adev,
{ {
uint32_t lsb, mc_umc_status_addr; uint32_t lsb, mc_umc_status_addr;
uint64_t mc_umc_status, err_addr, retired_page, mc_umc_addrt0; uint64_t mc_umc_status, err_addr, retired_page, mc_umc_addrt0;
struct eeprom_table_record *err_rec;
uint32_t channel_index = adev->umc.channel_idx_tbl[umc_inst * adev->umc.channel_inst_num + ch_inst]; uint32_t channel_index = adev->umc.channel_idx_tbl[umc_inst * adev->umc.channel_inst_num + ch_inst];
if (adev->asic_type == CHIP_ARCTURUS) { if (adev->asic_type == CHIP_ARCTURUS) {
...@@ -328,8 +327,6 @@ static void umc_v6_1_query_error_address(struct amdgpu_device *adev, ...@@ -328,8 +327,6 @@ static void umc_v6_1_query_error_address(struct amdgpu_device *adev,
return; return;
} }
err_rec = &err_data->err_addr[err_data->err_addr_cnt];
/* calculate error address if ue/ce error is detected */ /* calculate error address if ue/ce error is detected */
if (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Val) == 1 && if (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Val) == 1 &&
(REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, UECC) == 1 || (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, UECC) == 1 ||
...@@ -348,18 +345,9 @@ static void umc_v6_1_query_error_address(struct amdgpu_device *adev, ...@@ -348,18 +345,9 @@ static void umc_v6_1_query_error_address(struct amdgpu_device *adev,
/* we only save ue error information currently, ce is skipped */ /* we only save ue error information currently, ce is skipped */
if (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, UECC) if (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, UECC)
== 1) { == 1)
err_rec->address = err_addr; amdgpu_umc_fill_error_record(err_data, err_addr,
/* page frame address is saved */ retired_page, channel_index, umc_inst);
err_rec->retired_page = retired_page >> AMDGPU_GPU_PAGE_SHIFT;
err_rec->ts = (uint64_t)ktime_get_real_seconds();
err_rec->err_type = AMDGPU_RAS_EEPROM_ERR_NON_RECOVERABLE;
err_rec->cu = 0;
err_rec->mem_channel = channel_index;
err_rec->mcumc_id = umc_inst;
err_data->err_addr_cnt++;
}
} }
/* clear umc status */ /* clear umc status */
......
...@@ -120,7 +120,6 @@ static void umc_v6_7_ecc_info_query_error_address(struct amdgpu_device *adev, ...@@ -120,7 +120,6 @@ static void umc_v6_7_ecc_info_query_error_address(struct amdgpu_device *adev,
uint32_t umc_inst) uint32_t umc_inst)
{ {
uint64_t mc_umc_status, err_addr, retired_page; uint64_t mc_umc_status, err_addr, retired_page;
struct eeprom_table_record *err_rec;
uint32_t channel_index; uint32_t channel_index;
uint32_t eccinfo_table_idx; uint32_t eccinfo_table_idx;
struct amdgpu_ras *ras = amdgpu_ras_get_context(adev); struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
...@@ -137,8 +136,6 @@ static void umc_v6_7_ecc_info_query_error_address(struct amdgpu_device *adev, ...@@ -137,8 +136,6 @@ static void umc_v6_7_ecc_info_query_error_address(struct amdgpu_device *adev,
if (!err_data->err_addr) if (!err_data->err_addr)
return; return;
err_rec = &err_data->err_addr[err_data->err_addr_cnt];
/* calculate error address if ue/ce error is detected */ /* calculate error address if ue/ce error is detected */
if (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Val) == 1 && if (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Val) == 1 &&
(REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, UECC) == 1 || (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, UECC) == 1 ||
...@@ -154,18 +151,9 @@ static void umc_v6_7_ecc_info_query_error_address(struct amdgpu_device *adev, ...@@ -154,18 +151,9 @@ static void umc_v6_7_ecc_info_query_error_address(struct amdgpu_device *adev,
/* we only save ue error information currently, ce is skipped */ /* we only save ue error information currently, ce is skipped */
if (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, UECC) if (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, UECC)
== 1) { == 1)
err_rec->address = err_addr; amdgpu_umc_fill_error_record(err_data, err_addr,
/* page frame address is saved */ retired_page, channel_index, umc_inst);
err_rec->retired_page = retired_page >> AMDGPU_GPU_PAGE_SHIFT;
err_rec->ts = (uint64_t)ktime_get_real_seconds();
err_rec->err_type = AMDGPU_RAS_EEPROM_ERR_NON_RECOVERABLE;
err_rec->cu = 0;
err_rec->mem_channel = channel_index;
err_rec->mcumc_id = umc_inst;
err_data->err_addr_cnt++;
}
} }
} }
...@@ -345,7 +333,6 @@ static void umc_v6_7_query_error_address(struct amdgpu_device *adev, ...@@ -345,7 +333,6 @@ static void umc_v6_7_query_error_address(struct amdgpu_device *adev,
{ {
uint32_t mc_umc_status_addr; uint32_t mc_umc_status_addr;
uint64_t mc_umc_status, err_addr, retired_page, mc_umc_addrt0; uint64_t mc_umc_status, err_addr, retired_page, mc_umc_addrt0;
struct eeprom_table_record *err_rec;
uint32_t channel_index; uint32_t channel_index;
mc_umc_status_addr = mc_umc_status_addr =
...@@ -364,8 +351,6 @@ static void umc_v6_7_query_error_address(struct amdgpu_device *adev, ...@@ -364,8 +351,6 @@ static void umc_v6_7_query_error_address(struct amdgpu_device *adev,
return; return;
} }
err_rec = &err_data->err_addr[err_data->err_addr_cnt];
channel_index = channel_index =
adev->umc.channel_idx_tbl[umc_inst * adev->umc.channel_inst_num + ch_inst]; adev->umc.channel_idx_tbl[umc_inst * adev->umc.channel_inst_num + ch_inst];
...@@ -384,18 +369,9 @@ static void umc_v6_7_query_error_address(struct amdgpu_device *adev, ...@@ -384,18 +369,9 @@ static void umc_v6_7_query_error_address(struct amdgpu_device *adev,
/* we only save ue error information currently, ce is skipped */ /* we only save ue error information currently, ce is skipped */
if (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, UECC) if (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, UECC)
== 1) { == 1)
err_rec->address = err_addr; amdgpu_umc_fill_error_record(err_data, err_addr,
/* page frame address is saved */ retired_page, channel_index, umc_inst);
err_rec->retired_page = retired_page >> AMDGPU_GPU_PAGE_SHIFT;
err_rec->ts = (uint64_t)ktime_get_real_seconds();
err_rec->err_type = AMDGPU_RAS_EEPROM_ERR_NON_RECOVERABLE;
err_rec->cu = 0;
err_rec->mem_channel = channel_index;
err_rec->mcumc_id = umc_inst;
err_data->err_addr_cnt++;
}
} }
/* clear umc status */ /* clear umc status */
......
...@@ -121,7 +121,6 @@ static void umc_v8_7_ecc_info_query_error_address(struct amdgpu_device *adev, ...@@ -121,7 +121,6 @@ static void umc_v8_7_ecc_info_query_error_address(struct amdgpu_device *adev,
uint32_t umc_inst) uint32_t umc_inst)
{ {
uint64_t mc_umc_status, err_addr, retired_page; uint64_t mc_umc_status, err_addr, retired_page;
struct eeprom_table_record *err_rec;
uint32_t channel_index; uint32_t channel_index;
uint32_t eccinfo_table_idx; uint32_t eccinfo_table_idx;
struct amdgpu_ras *ras = amdgpu_ras_get_context(adev); struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
...@@ -138,8 +137,6 @@ static void umc_v8_7_ecc_info_query_error_address(struct amdgpu_device *adev, ...@@ -138,8 +137,6 @@ static void umc_v8_7_ecc_info_query_error_address(struct amdgpu_device *adev,
if (!err_data->err_addr) if (!err_data->err_addr)
return; return;
err_rec = &err_data->err_addr[err_data->err_addr_cnt];
/* calculate error address if ue/ce error is detected */ /* calculate error address if ue/ce error is detected */
if (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Val) == 1 && if (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Val) == 1 &&
(REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, UECC) == 1 || (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, UECC) == 1 ||
...@@ -155,18 +152,9 @@ static void umc_v8_7_ecc_info_query_error_address(struct amdgpu_device *adev, ...@@ -155,18 +152,9 @@ static void umc_v8_7_ecc_info_query_error_address(struct amdgpu_device *adev,
/* we only save ue error information currently, ce is skipped */ /* we only save ue error information currently, ce is skipped */
if (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, UECC) if (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, UECC)
== 1) { == 1)
err_rec->address = err_addr; amdgpu_umc_fill_error_record(err_data, err_addr,
/* page frame address is saved */ retired_page, channel_index, umc_inst);
err_rec->retired_page = retired_page >> AMDGPU_GPU_PAGE_SHIFT;
err_rec->ts = (uint64_t)ktime_get_real_seconds();
err_rec->err_type = AMDGPU_RAS_EEPROM_ERR_NON_RECOVERABLE;
err_rec->cu = 0;
err_rec->mem_channel = channel_index;
err_rec->mcumc_id = umc_inst;
err_data->err_addr_cnt++;
}
} }
} }
...@@ -344,7 +332,6 @@ static void umc_v8_7_query_error_address(struct amdgpu_device *adev, ...@@ -344,7 +332,6 @@ static void umc_v8_7_query_error_address(struct amdgpu_device *adev,
{ {
uint32_t lsb, mc_umc_status_addr; uint32_t lsb, mc_umc_status_addr;
uint64_t mc_umc_status, err_addr, retired_page, mc_umc_addrt0; uint64_t mc_umc_status, err_addr, retired_page, mc_umc_addrt0;
struct eeprom_table_record *err_rec;
uint32_t channel_index = adev->umc.channel_idx_tbl[umc_inst * adev->umc.channel_inst_num + ch_inst]; uint32_t channel_index = adev->umc.channel_idx_tbl[umc_inst * adev->umc.channel_inst_num + ch_inst];
mc_umc_status_addr = mc_umc_status_addr =
...@@ -363,8 +350,6 @@ static void umc_v8_7_query_error_address(struct amdgpu_device *adev, ...@@ -363,8 +350,6 @@ static void umc_v8_7_query_error_address(struct amdgpu_device *adev,
return; return;
} }
err_rec = &err_data->err_addr[err_data->err_addr_cnt];
/* calculate error address if ue/ce error is detected */ /* calculate error address if ue/ce error is detected */
if (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Val) == 1 && if (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Val) == 1 &&
(REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, UECC) == 1 || (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, UECC) == 1 ||
...@@ -383,18 +368,9 @@ static void umc_v8_7_query_error_address(struct amdgpu_device *adev, ...@@ -383,18 +368,9 @@ static void umc_v8_7_query_error_address(struct amdgpu_device *adev,
/* we only save ue error information currently, ce is skipped */ /* we only save ue error information currently, ce is skipped */
if (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, UECC) if (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, UECC)
== 1) { == 1)
err_rec->address = err_addr; amdgpu_umc_fill_error_record(err_data, err_addr,
/* page frame address is saved */ retired_page, channel_index, umc_inst);
err_rec->retired_page = retired_page >> AMDGPU_GPU_PAGE_SHIFT;
err_rec->ts = (uint64_t)ktime_get_real_seconds();
err_rec->err_type = AMDGPU_RAS_EEPROM_ERR_NON_RECOVERABLE;
err_rec->cu = 0;
err_rec->mem_channel = channel_index;
err_rec->mcumc_id = umc_inst;
err_data->err_addr_cnt++;
}
} }
/* clear umc status */ /* clear umc status */
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册