From 6afa66aefe46f9a56c339f165f067e612eaf79ed Mon Sep 17 00:00:00 2001 From: Shiju Jose Date: Thu, 25 Apr 2019 16:15:55 +0800 Subject: [PATCH] net: hns3: process H/W errors occurred before HNS dev initialization driver inclusion category: bugfix bugzilla: NA CVE: NA Presently the HNS driver enables the HNS H/W error interrupts after the dev initialization is completed. However some exceptions such as NCSI errors can occur when the network port driver is not loaded and those errors required reporting to the BMC. Therefore the firmware enabled all the HNS hw error interrupts before the driver is loaded. Thus the HNS driver needs to process and recover those hw errors occurred before HNS driver is initialized. This patch adds processing of the HNS hw errors(RAS and MSI-X) which occurred before the driver initialization. Feature or Bugfix:Feature Signed-off-by: Shiju Jose Signed-off-by: Weihang Li Signed-off-by: Shiju Jose Signed-off-by: liweihang Reviewed-by: shenjian Reviewed-by: Xie XiuQi Signed-off-by: Yang Yingliang --- .../hisilicon/hns3/hns3pf/hclge_err.c | 100 +++++++++--------- .../hisilicon/hns3/hns3pf/hclge_err.h | 5 +- .../hisilicon/hns3/hns3pf/hclge_main.c | 58 +++++----- 3 files changed, 84 insertions(+), 79 deletions(-) diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_err.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_err.c index 784dd27c3613..9599d7cc9ace 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_err.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_err.c @@ -1355,49 +1355,6 @@ int hclge_handle_all_ras_errors(struct hclge_dev *hdev) return ret; } -int hclge_clear_all_ras_errors(struct hclge_dev *hdev) -{ - struct hclge_bd_num bd_num; - struct hclge_desc *desc; - int ret; - - /* query the number of registers in the RAS int status */ - desc = hclge_query_bd_num(hdev, &bd_num, - HCLGE_QUERY_RAS_INT_STS_BD_NUM); - if (!desc) - return -ENOMEM; - - /* query all main PF RAS errors */ - ret = hclge_query_error(hdev, desc, HCLGE_QUERY_CLEAR_MPF_RAS_INT, - bd_num.mpf_bd_num); - if (ret) { - kfree(desc); - return ret; - } - - /* clear all main PF RAS errors */ - ret = hclge_clear_error(hdev, desc, bd_num.mpf_bd_num); - if (ret) { - kfree(desc); - return ret; - } - - memset(desc, 0, bd_num.max_bd_num * sizeof(struct hclge_desc)); - /* query all PF RAS errors */ - ret = hclge_query_error(hdev, desc, HCLGE_QUERY_CLEAR_PF_RAS_INT, - bd_num.pf_bd_num); - if (ret) { - kfree(desc); - return ret; - } - - /* clear all PF RAS errors */ - ret = hclge_clear_error(hdev, desc, bd_num.pf_bd_num); - - kfree(desc); - return ret; -} - static int hclge_log_rocee_ovf_error(struct hclge_dev *hdev) { struct device *dev = &hdev->pdev->dev; @@ -1501,7 +1458,7 @@ hclge_log_and_clear_rocee_ras_error(struct hclge_dev *hdev) return reset_type; } -static int hclge_config_rocee_ras_interrupt(struct hclge_dev *hdev, bool en) +int hclge_config_rocee_ras_interrupt(struct hclge_dev *hdev, bool en) { struct device *dev = &hdev->pdev->dev; struct hclge_desc desc; @@ -1576,10 +1533,9 @@ static const struct hclge_hw_blk hw_blk[] = { { /* sentinel */ } }; -int hclge_hw_error_set_state(struct hclge_dev *hdev, bool state) +int hclge_config_nic_hw_error(struct hclge_dev *hdev, bool state) { const struct hclge_hw_blk *module = hw_blk; - struct device *dev = &hdev->pdev->dev; int ret = 0; while (module->name) { @@ -1591,10 +1547,6 @@ int hclge_hw_error_set_state(struct hclge_dev *hdev, bool state) module++; } - ret = hclge_config_rocee_ras_interrupt(hdev, state); - if (ret) - dev_err(dev, "fail(%d) to configure ROCEE err int\n", ret); - return ret; } @@ -1604,6 +1556,12 @@ pci_ers_result_t hclge_handle_hw_ras_error(struct hnae3_ae_dev *ae_dev) struct device *dev = &hdev->pdev->dev; u32 status; + if (!test_bit(HCLGE_STATE_SERVICE_INITED, &hdev->state)) { + dev_err(dev, + "Can't recover - RAS error reported during dev init\n"); + return PCI_ERS_RESULT_NONE; + } + status = hclge_read_dev(&hdev->hw, HCLGE_RAS_PF_OTHER_INT_STS_REG); if (status & HCLGE_RAS_REG_NFE_MASK || @@ -1640,8 +1598,8 @@ pci_ers_result_t hclge_handle_hw_ras_error(struct hnae3_ae_dev *ae_dev) return PCI_ERS_RESULT_RECOVERED; } -int hclge_handle_hw_msix_error(struct hclge_dev *hdev, - unsigned long *reset_requests) +static int hclge_handle_all_hw_msix_error(struct hclge_dev *hdev, + unsigned long *reset_requests) { struct hclge_mac_tnl_stats mac_tnl_stats; struct device *dev = &hdev->pdev->dev; @@ -1781,3 +1739,41 @@ int hclge_handle_hw_msix_error(struct hclge_dev *hdev, out: return ret; } + +int hclge_handle_hw_msix_error(struct hclge_dev *hdev, + unsigned long *reset_requests) +{ + struct device *dev = &hdev->pdev->dev; + + if (!test_bit(HCLGE_STATE_SERVICE_INITED, &hdev->state)) { + dev_err(dev, + "Can't handle - MSIx error reported during dev init\n"); + return 0; + } + + return hclge_handle_all_hw_msix_error(hdev, reset_requests); +} + +void hclge_handle_all_hns_hw_errors(struct hnae3_ae_dev *ae_dev) +{ + struct hclge_dev *hdev = ae_dev->priv; + struct device *dev = &hdev->pdev->dev; + u32 status; + + ae_dev->hw_err_reset_req = 0; + status = hclge_read_dev(&hdev->hw, HCLGE_RAS_PF_OTHER_INT_STS_REG); + + /* Handle Non-fatal HNS RAS errors */ + if (status & HCLGE_RAS_REG_NFE_MASK) { + dev_warn(dev, "HNS hw error(RAS) identified during init\n"); + hclge_handle_all_ras_errors(hdev); + } + + /* Handle HNS hw errors reported through msix */ + status = hclge_read_dev(&hdev->hw, + HCLGE_VECTOR0_PF_OTHER_INT_STS_REG); + if (status & HCLGE_VECTOR0_REG_MSIX_MASK) { + dev_warn(dev, "HNS hw error(MSIx) identified during init\n"); + hclge_handle_all_hw_msix_error(hdev, &ae_dev->hw_err_reset_req); + } +} diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_err.h b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_err.h index 534a622adab3..f08a63ace842 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_err.h +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_err.h @@ -150,8 +150,9 @@ extern const struct hclge_hw_error hclge_ssu_port_based_pf_int[]; extern const struct hclge_hw_error hclge_rocee_qmm_ovf_err_int[]; int hclge_config_mac_tnl_int(struct hclge_dev *hdev, bool en); -int hclge_hw_error_set_state(struct hclge_dev *hdev, bool state); -int hclge_clear_all_ras_errors(struct hclge_dev *hdev); +int hclge_config_nic_hw_error(struct hclge_dev *hdev, bool state); +int hclge_config_rocee_ras_interrupt(struct hclge_dev *hdev, bool en); +void hclge_handle_all_hns_hw_errors(struct hnae3_ae_dev *ae_dev); pci_ers_result_t hclge_handle_hw_ras_error(struct hnae3_ae_dev *ae_dev); int hclge_handle_hw_msix_error(struct hclge_dev *hdev, unsigned long *reset_requests); diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c index d728332432ac..7014c6383614 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c @@ -8307,10 +8307,16 @@ static int hclge_init_nic_client_instance(struct hnae3_ae_dev *ae_dev, hnae3_set_client_init_flag(client, ae_dev, 1); + /* Enable nic hw error interrupts */ + ret = hclge_config_nic_hw_error(hdev, true); + if (ret) + dev_err(&ae_dev->pdev->dev, + "fail(%d) to enable hw error interrupts\n", ret); + if (netif_msg_drv(&hdev->vport->nic)) hclge_info_show(hdev); - return 0; + return ret; } static int hclge_init_roce_client_instance(struct hnae3_ae_dev *ae_dev, @@ -8334,7 +8340,13 @@ static int hclge_init_roce_client_instance(struct hnae3_ae_dev *ae_dev, hnae3_set_client_init_flag(client, ae_dev, 1); - return 0; + /* Enable roce ras interrupts */ + ret = hclge_config_rocee_ras_interrupt(hdev, true); + if (ret) + dev_err(&ae_dev->pdev->dev, + "fail(%d) to enable roce ras interrupts\n", ret); + + return ret; } static int hclge_init_client_instance(struct hnae3_client *client, @@ -8686,20 +8698,6 @@ static int hclge_init_ae_dev(struct hnae3_ae_dev *ae_dev) goto err_mdiobus_unreg; } - ret = hclge_hw_error_set_state(hdev, true); - if (ret) { - dev_err(&pdev->dev, - "fail(%d) to enable hw error interrupts\n", ret); - goto err_mdiobus_unreg; - } - - ret = hclge_clear_all_ras_errors(hdev); - if (ret) { - dev_err(&pdev->dev, - "fail(%d) to clear all ras states\n", ret); - goto err_mdiobus_unreg; - } - INIT_KFIFO(hdev->mac_tnl_log); hclge_dcb_ops_set(hdev); @@ -8712,6 +8710,9 @@ static int hclge_init_ae_dev(struct hnae3_ae_dev *ae_dev) hclge_clear_all_event_cause(hdev); + /* Log and clear the hw errors those already occurred */ + hclge_handle_all_hns_hw_errors(ae_dev); + /* Enable MISC vector(vector0) */ hclge_enable_vector(&hdev->misc_vector, true); @@ -8826,20 +8827,24 @@ static int hclge_reset_ae_dev(struct hnae3_ae_dev *ae_dev) } /* Re-enable the hw error interrupts because - * the interrupts get disabled on core/global reset. + * the interrupts get disabled on global reset. */ - ret = hclge_hw_error_set_state(hdev, true); + ret = hclge_config_nic_hw_error(hdev, true); if (ret) { dev_err(&pdev->dev, - "fail(%d) to re-enable HNS hw error interrupts\n", ret); + "fail(%d) to re-enable NIC hw error interrupts\n", + ret); return ret; } - ret = hclge_clear_all_ras_errors(hdev); - if (ret) { - dev_err(&pdev->dev, - "fail(%d) to clear all ras states\n", ret); - return ret; + if (hdev->roce_client) { + ret = hclge_config_rocee_ras_interrupt(hdev, true); + if (ret) { + dev_err(&ae_dev->pdev->dev, + "fail(%d) to re-enable roce ras interrupts\n", + ret); + return ret; + } } hclge_reset_vport_state(hdev); @@ -8866,8 +8871,11 @@ static void hclge_uninit_ae_dev(struct hnae3_ae_dev *ae_dev) hclge_enable_vector(&hdev->misc_vector, false); synchronize_irq(hdev->misc_vector.vector_irq); + /* Disable all hw interrupts */ hclge_config_mac_tnl_int(hdev, false); - hclge_hw_error_set_state(hdev, false); + hclge_config_nic_hw_error(hdev, false); + hclge_config_rocee_ras_interrupt(hdev, false); + hclge_cmd_uninit(hdev); hclge_misc_irq_uninit(hdev); hclge_pci_uninit(hdev); -- GitLab