diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_err.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_err.c index 7d5b96dc390a03d805ac6335e7ae081b89c54e19..f157f4c7036d0b5760c6c1d99f335c380fc78b0c 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_err.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_err.c @@ -1663,10 +1663,10 @@ pci_ers_result_t hclge_handle_hw_ras_error(struct hnae3_ae_dev *ae_dev) return PCI_ERS_RESULT_RECOVERED; } -void hclge_handle_hw_msix_error(struct hclge_dev *hdev) +int hclge_handle_hw_msix_error(struct hclge_dev *hdev, + unsigned long *reset_requests) { struct device *dev = &hdev->pdev->dev; - struct hnae3_ae_dev *ae_dev = hdev->ae_dev; u32 mpf_bd_num, pf_bd_num, bd_num; enum hnae3_reset_type reset_level; struct hclge_desc desc_bd; @@ -1682,7 +1682,7 @@ void hclge_handle_hw_msix_error(struct hclge_dev *hdev) if (ret) { dev_err(dev, "fail(%d) to query msix int status bd num\n", ret); - return; + return ret; } mpf_bd_num = le32_to_cpu(desc_bd.data[0]); @@ -1691,7 +1691,7 @@ void hclge_handle_hw_msix_error(struct hclge_dev *hdev) desc = kcalloc(bd_num, sizeof(struct hclge_desc), GFP_KERNEL); if (!desc) - return; + goto out; /* query all main PF MSIx errors */ hclge_cmd_setup_basic_desc(&desc[0], HCLGE_QUERY_CLEAR_ALL_MPF_MSIX_INT, @@ -1711,10 +1711,10 @@ void hclge_handle_hw_msix_error(struct hclge_dev *hdev) reset_level = hclge_log_error(dev, "MAC_AFIFO_TNL_INT_R", &hclge_mac_afifo_tnl_int[0], status); - HCLGE_SET_DEFAULT_RESET_REQUEST(reset_level); + set_bit(reset_level, reset_requests); } - /* log PPU(RCB) errors */ + /* log PPU(RCB) MPF errors */ desc_data = (__le32 *)&desc[5]; status = le32_to_cpu(*(desc_data + 2)) & HCLGE_PPU_MPF_INT_ST2_MSIX_MASK; @@ -1723,7 +1723,7 @@ void hclge_handle_hw_msix_error(struct hclge_dev *hdev) hclge_log_error(dev, "PPU_MPF_ABNORMAL_INT_ST2", &hclge_ppu_mpf_abnormal_int_st2[0], status); - HCLGE_SET_DEFAULT_RESET_REQUEST(reset_level); + set_bit(reset_level, reset_requests); } /* clear all main PF MSIx errors */ @@ -1754,7 +1754,7 @@ void hclge_handle_hw_msix_error(struct hclge_dev *hdev) reset_level = hclge_log_error(dev, "SSU_PORT_BASED_ERR_INT", &hclge_ssu_port_based_pf_int[0], status); - HCLGE_SET_DEFAULT_RESET_REQUEST(reset_level); + set_bit(reset_level, reset_requests); } /* read and log PPP PF errors */ @@ -1764,18 +1764,17 @@ void hclge_handle_hw_msix_error(struct hclge_dev *hdev) reset_level = hclge_log_error(dev, "PPP_PF_ABNORMAL_INT_ST0", &hclge_ppp_pf_abnormal_int[0], status); - HCLGE_SET_DEFAULT_RESET_REQUEST(reset_level); + set_bit(reset_level, reset_requests); } - - /* PPU(RCB) PF errors */ + /* log PPU(RCB) PF errors */ desc_data = (__le32 *)&desc[3]; status = le32_to_cpu(*desc_data) & HCLGE_PPU_PF_INT_MSIX_MASK; if (status) { reset_level = hclge_log_error(dev, "PPU_PF_ABNORMAL_INT_ST", &hclge_ppu_pf_abnormal_int[0], status); - HCLGE_SET_DEFAULT_RESET_REQUEST(reset_level); + set_bit(reset_level, reset_requests); } /* clear all PF MSIx errors */ @@ -1788,5 +1787,6 @@ void hclge_handle_hw_msix_error(struct hclge_dev *hdev) msi_error: kfree(desc); +out: + return ret; } - diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_err.h b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_err.h index 0166730a4a69b22798e6f83ac1fbc8d14a30b318..e9f2c37c90740cdba652f0d975011358f4a79a6b 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_err.h +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_err.h @@ -156,7 +156,8 @@ extern const struct hclge_hw_error hclge_rocee_qmm_ovf_err_int[]; int hclge_hw_error_set_state(struct hclge_dev *hdev, bool state); int hclge_clear_all_ras_errors(struct hclge_dev *hdev); pci_ers_result_t hclge_handle_hw_ras_error(struct hnae3_ae_dev *ae_dev); -void hclge_handle_hw_msix_error(struct hclge_dev *hdev); +int hclge_handle_hw_msix_error(struct hclge_dev *hdev, + unsigned long *reset_requests); int hclge_handle_all_ras_errors(struct hclge_dev *hdev); int hclge_handle_rocee_ras_error(struct hnae3_ae_dev *ae_dev); enum hnae3_reset_type hclge_log_error(struct device *dev, char *reg, diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c index afb40ff772c4e18af491b99917a1081d920b8d2a..b09ef42dac26618074bd594aa2d1115232a83664 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c @@ -2402,6 +2402,16 @@ static irqreturn_t hclge_misc_irq_handle(int irq, void *data) /* vector 0 interrupt is shared with reset and mailbox source events.*/ switch (event_cause) { case HCLGE_VECTOR0_EVENT_ERR: + /* we do not know what type of reset is required now. This could + * only be decided after we fetch the type of errors which + * caused this event. Therefore, we will do below for now: + * 1. Assert HNAE3_UNKNOWN_RESET type of reset. This means we + * have defered type of reset to be used. + * 2. Schedule the reset serivce task. + * 3. When service task receives HNAE3_UNKNOWN_RESET type it + * will fetch the correct type of reset. This would be done + * by first decoding the types of errors. + */ set_bit(HNAE3_UNKNOWN_RESET, &hdev->reset_request); /* fall through */ case HCLGE_VECTOR0_EVENT_RST: @@ -2484,23 +2494,6 @@ static void hclge_misc_irq_uninit(struct hclge_dev *hdev) hclge_free_vector(hdev, 0); } -static void hclge_misc_err_recovery(struct hclge_dev *hdev) -{ - struct hnae3_ae_dev *ae_dev = pci_get_drvdata(hdev->pdev); - u32 msix_sts_reg; - - msix_sts_reg = hclge_read_dev(&hdev->hw, - HCLGE_VECTOR0_PF_OTHER_INT_STS_REG); - - if (msix_sts_reg & HCLGE_VECTOR0_REG_MSIX_MASK) { - hclge_handle_hw_msix_error(hdev); - if (ae_dev->ops->reset_event) - ae_dev->ops->reset_event(hdev->pdev, NULL); - } - clear_bit(HNAE3_UNKNOWN_RESET, &hdev->reset_request); - hclge_enable_vector(&hdev->misc_vector, true); -} - int hclge_notify_client(struct hclge_dev *hdev, enum hnae3_reset_notify_type type) { @@ -2726,6 +2719,23 @@ enum hnae3_reset_type hclge_get_reset_level(struct hclge_dev *hdev, { enum hnae3_reset_type rst_level = HNAE3_NONE_RESET; + /* first, resolve any unknown reset type to the known type(s) */ + if (test_bit(HNAE3_UNKNOWN_RESET, addr)) { + /* we will intentionally ignore any errors from this function + * as we will end up in *some* reset request in any case + */ + hclge_handle_hw_msix_error(hdev, addr); + clear_bit(HNAE3_UNKNOWN_RESET, addr); + /* We defered the clearing of the error event which caused + * interrupt since it was not posssible to do that in + * interrupt context (and this is the reason we introduced + * new UNKNOWN reset type). Now, the errors have been + * handled and cleared in hardware we can safely enable + * interrupts. This is an exception to the norm. + */ + hclge_enable_vector(&hdev->misc_vector, true); + } + /* return the highest priority reset level amongst all */ if (test_bit(HNAE3_IMP_RESET, addr)) { rst_level = HNAE3_IMP_RESET; @@ -3114,16 +3124,11 @@ static void hclge_reset_service_task(struct work_struct *work) struct hclge_dev *hdev = container_of(work, struct hclge_dev, rst_service_task); - clear_bit(HCLGE_STATE_RST_SERVICE_SCHED, &hdev->state); - - if (test_bit(HNAE3_UNKNOWN_RESET, &hdev->reset_request)) { - hclge_misc_err_recovery(hdev); - return; - } - if (test_and_set_bit(HCLGE_STATE_RST_HANDLING, &hdev->state)) return; + clear_bit(HCLGE_STATE_RST_SERVICE_SCHED, &hdev->state); + hclge_reset_subtask(hdev); clear_bit(HCLGE_STATE_RST_HANDLING, &hdev->state);