diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_err.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_err.c index 784dd27c36132b98bec27da0c8952cf490af9f90..9599d7cc9ace374046a0019058f769e7f7e9bfe2 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_err.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_err.c @@ -1355,49 +1355,6 @@ int hclge_handle_all_ras_errors(struct hclge_dev *hdev) return ret; } -int hclge_clear_all_ras_errors(struct hclge_dev *hdev) -{ - struct hclge_bd_num bd_num; - struct hclge_desc *desc; - int ret; - - /* query the number of registers in the RAS int status */ - desc = hclge_query_bd_num(hdev, &bd_num, - HCLGE_QUERY_RAS_INT_STS_BD_NUM); - if (!desc) - return -ENOMEM; - - /* query all main PF RAS errors */ - ret = hclge_query_error(hdev, desc, HCLGE_QUERY_CLEAR_MPF_RAS_INT, - bd_num.mpf_bd_num); - if (ret) { - kfree(desc); - return ret; - } - - /* clear all main PF RAS errors */ - ret = hclge_clear_error(hdev, desc, bd_num.mpf_bd_num); - if (ret) { - kfree(desc); - return ret; - } - - memset(desc, 0, bd_num.max_bd_num * sizeof(struct hclge_desc)); - /* query all PF RAS errors */ - ret = hclge_query_error(hdev, desc, HCLGE_QUERY_CLEAR_PF_RAS_INT, - bd_num.pf_bd_num); - if (ret) { - kfree(desc); - return ret; - } - - /* clear all PF RAS errors */ - ret = hclge_clear_error(hdev, desc, bd_num.pf_bd_num); - - kfree(desc); - return ret; -} - static int hclge_log_rocee_ovf_error(struct hclge_dev *hdev) { struct device *dev = &hdev->pdev->dev; @@ -1501,7 +1458,7 @@ hclge_log_and_clear_rocee_ras_error(struct hclge_dev *hdev) return reset_type; } -static int hclge_config_rocee_ras_interrupt(struct hclge_dev *hdev, bool en) +int hclge_config_rocee_ras_interrupt(struct hclge_dev *hdev, bool en) { struct device *dev = &hdev->pdev->dev; struct hclge_desc desc; @@ -1576,10 +1533,9 @@ static const struct hclge_hw_blk hw_blk[] = { { /* sentinel */ } }; -int hclge_hw_error_set_state(struct hclge_dev *hdev, bool state) +int hclge_config_nic_hw_error(struct hclge_dev *hdev, bool state) { const struct hclge_hw_blk *module = hw_blk; - struct device *dev = &hdev->pdev->dev; int ret = 0; while (module->name) { @@ -1591,10 +1547,6 @@ int hclge_hw_error_set_state(struct hclge_dev *hdev, bool state) module++; } - ret = hclge_config_rocee_ras_interrupt(hdev, state); - if (ret) - dev_err(dev, "fail(%d) to configure ROCEE err int\n", ret); - return ret; } @@ -1604,6 +1556,12 @@ pci_ers_result_t hclge_handle_hw_ras_error(struct hnae3_ae_dev *ae_dev) struct device *dev = &hdev->pdev->dev; u32 status; + if (!test_bit(HCLGE_STATE_SERVICE_INITED, &hdev->state)) { + dev_err(dev, + "Can't recover - RAS error reported during dev init\n"); + return PCI_ERS_RESULT_NONE; + } + status = hclge_read_dev(&hdev->hw, HCLGE_RAS_PF_OTHER_INT_STS_REG); if (status & HCLGE_RAS_REG_NFE_MASK || @@ -1640,8 +1598,8 @@ pci_ers_result_t hclge_handle_hw_ras_error(struct hnae3_ae_dev *ae_dev) return PCI_ERS_RESULT_RECOVERED; } -int hclge_handle_hw_msix_error(struct hclge_dev *hdev, - unsigned long *reset_requests) +static int hclge_handle_all_hw_msix_error(struct hclge_dev *hdev, + unsigned long *reset_requests) { struct hclge_mac_tnl_stats mac_tnl_stats; struct device *dev = &hdev->pdev->dev; @@ -1781,3 +1739,41 @@ int hclge_handle_hw_msix_error(struct hclge_dev *hdev, out: return ret; } + +int hclge_handle_hw_msix_error(struct hclge_dev *hdev, + unsigned long *reset_requests) +{ + struct device *dev = &hdev->pdev->dev; + + if (!test_bit(HCLGE_STATE_SERVICE_INITED, &hdev->state)) { + dev_err(dev, + "Can't handle - MSIx error reported during dev init\n"); + return 0; + } + + return hclge_handle_all_hw_msix_error(hdev, reset_requests); +} + +void hclge_handle_all_hns_hw_errors(struct hnae3_ae_dev *ae_dev) +{ + struct hclge_dev *hdev = ae_dev->priv; + struct device *dev = &hdev->pdev->dev; + u32 status; + + ae_dev->hw_err_reset_req = 0; + status = hclge_read_dev(&hdev->hw, HCLGE_RAS_PF_OTHER_INT_STS_REG); + + /* Handle Non-fatal HNS RAS errors */ + if (status & HCLGE_RAS_REG_NFE_MASK) { + dev_warn(dev, "HNS hw error(RAS) identified during init\n"); + hclge_handle_all_ras_errors(hdev); + } + + /* Handle HNS hw errors reported through msix */ + status = hclge_read_dev(&hdev->hw, + HCLGE_VECTOR0_PF_OTHER_INT_STS_REG); + if (status & HCLGE_VECTOR0_REG_MSIX_MASK) { + dev_warn(dev, "HNS hw error(MSIx) identified during init\n"); + hclge_handle_all_hw_msix_error(hdev, &ae_dev->hw_err_reset_req); + } +} diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_err.h b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_err.h index 534a622adab30370c52db73f0797ae7d699f9e95..f08a63ace84282705bea59d082f1edc01c707022 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_err.h +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_err.h @@ -150,8 +150,9 @@ extern const struct hclge_hw_error hclge_ssu_port_based_pf_int[]; extern const struct hclge_hw_error hclge_rocee_qmm_ovf_err_int[]; int hclge_config_mac_tnl_int(struct hclge_dev *hdev, bool en); -int hclge_hw_error_set_state(struct hclge_dev *hdev, bool state); -int hclge_clear_all_ras_errors(struct hclge_dev *hdev); +int hclge_config_nic_hw_error(struct hclge_dev *hdev, bool state); +int hclge_config_rocee_ras_interrupt(struct hclge_dev *hdev, bool en); +void hclge_handle_all_hns_hw_errors(struct hnae3_ae_dev *ae_dev); pci_ers_result_t hclge_handle_hw_ras_error(struct hnae3_ae_dev *ae_dev); int hclge_handle_hw_msix_error(struct hclge_dev *hdev, unsigned long *reset_requests); diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c index d728332432ac923368227e4202c1bf489944c6e5..7014c6383614e74f15591e187145b6cc0119b76d 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c @@ -8307,10 +8307,16 @@ static int hclge_init_nic_client_instance(struct hnae3_ae_dev *ae_dev, hnae3_set_client_init_flag(client, ae_dev, 1); + /* Enable nic hw error interrupts */ + ret = hclge_config_nic_hw_error(hdev, true); + if (ret) + dev_err(&ae_dev->pdev->dev, + "fail(%d) to enable hw error interrupts\n", ret); + if (netif_msg_drv(&hdev->vport->nic)) hclge_info_show(hdev); - return 0; + return ret; } static int hclge_init_roce_client_instance(struct hnae3_ae_dev *ae_dev, @@ -8334,7 +8340,13 @@ static int hclge_init_roce_client_instance(struct hnae3_ae_dev *ae_dev, hnae3_set_client_init_flag(client, ae_dev, 1); - return 0; + /* Enable roce ras interrupts */ + ret = hclge_config_rocee_ras_interrupt(hdev, true); + if (ret) + dev_err(&ae_dev->pdev->dev, + "fail(%d) to enable roce ras interrupts\n", ret); + + return ret; } static int hclge_init_client_instance(struct hnae3_client *client, @@ -8686,20 +8698,6 @@ static int hclge_init_ae_dev(struct hnae3_ae_dev *ae_dev) goto err_mdiobus_unreg; } - ret = hclge_hw_error_set_state(hdev, true); - if (ret) { - dev_err(&pdev->dev, - "fail(%d) to enable hw error interrupts\n", ret); - goto err_mdiobus_unreg; - } - - ret = hclge_clear_all_ras_errors(hdev); - if (ret) { - dev_err(&pdev->dev, - "fail(%d) to clear all ras states\n", ret); - goto err_mdiobus_unreg; - } - INIT_KFIFO(hdev->mac_tnl_log); hclge_dcb_ops_set(hdev); @@ -8712,6 +8710,9 @@ static int hclge_init_ae_dev(struct hnae3_ae_dev *ae_dev) hclge_clear_all_event_cause(hdev); + /* Log and clear the hw errors those already occurred */ + hclge_handle_all_hns_hw_errors(ae_dev); + /* Enable MISC vector(vector0) */ hclge_enable_vector(&hdev->misc_vector, true); @@ -8826,20 +8827,24 @@ static int hclge_reset_ae_dev(struct hnae3_ae_dev *ae_dev) } /* Re-enable the hw error interrupts because - * the interrupts get disabled on core/global reset. + * the interrupts get disabled on global reset. */ - ret = hclge_hw_error_set_state(hdev, true); + ret = hclge_config_nic_hw_error(hdev, true); if (ret) { dev_err(&pdev->dev, - "fail(%d) to re-enable HNS hw error interrupts\n", ret); + "fail(%d) to re-enable NIC hw error interrupts\n", + ret); return ret; } - ret = hclge_clear_all_ras_errors(hdev); - if (ret) { - dev_err(&pdev->dev, - "fail(%d) to clear all ras states\n", ret); - return ret; + if (hdev->roce_client) { + ret = hclge_config_rocee_ras_interrupt(hdev, true); + if (ret) { + dev_err(&ae_dev->pdev->dev, + "fail(%d) to re-enable roce ras interrupts\n", + ret); + return ret; + } } hclge_reset_vport_state(hdev); @@ -8866,8 +8871,11 @@ static void hclge_uninit_ae_dev(struct hnae3_ae_dev *ae_dev) hclge_enable_vector(&hdev->misc_vector, false); synchronize_irq(hdev->misc_vector.vector_irq); + /* Disable all hw interrupts */ hclge_config_mac_tnl_int(hdev, false); - hclge_hw_error_set_state(hdev, false); + hclge_config_nic_hw_error(hdev, false); + hclge_config_rocee_ras_interrupt(hdev, false); + hclge_cmd_uninit(hdev); hclge_misc_irq_uninit(hdev); hclge_pci_uninit(hdev);