From 7e228817bc4ee6f9710177a45275d54ce54f1d3a Mon Sep 17 00:00:00 2001 From: liweihang Date: Wed, 15 May 2019 14:56:51 +0800 Subject: [PATCH] net: hns3: modify handling of msi-x driver inclusion category: bugfix bugzilla: NA CVE: NA Move hclge_handle_hw_msix_error() out of the lock that named HCLGE_STATE_RST_HANDLING, whick may cause nic client initialization failed when some msi-x errors occur after the first client has finished initialization. Feature or Bugfix: Bugfix Signed-off-by: liweihang Reviewed-by: lipeng Reviewed-by: Yang Yingliang Signed-off-by: Yang Yingliang --- .../hisilicon/hns3/hns3pf/hclge_err.c | 1 - .../hisilicon/hns3/hns3pf/hclge_main.c | 45 +++++++++++-------- 2 files changed, 26 insertions(+), 20 deletions(-) diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_err.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_err.c index 52e5cdef3c3d..e4c3d5666b9f 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_err.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_err.c @@ -1796,7 +1796,6 @@ static int hclge_handle_all_hw_msix_error(struct hclge_dev *hdev, ret = hclge_clear_mac_tnl_int(hdev); if (ret) dev_err(dev, "clear mac tnl int failed (%d)\n", ret); - set_bit(HNAE3_NONE_RESET, reset_requests); } msi_error: diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c index 1cb2c21de2f0..87777ec3b3ca 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c @@ -3113,23 +3113,6 @@ enum hnae3_reset_type hclge_get_reset_level(struct hclge_dev *hdev, { enum hnae3_reset_type rst_level = HNAE3_NONE_RESET; - /* first, resolve any unknown reset type to the known type(s) */ - if (test_bit(HNAE3_UNKNOWN_RESET, addr)) { - /* we will intentionally ignore any errors from this function - * as we will end up in *some* reset request in any case - */ - hclge_handle_hw_msix_error(hdev, addr); - clear_bit(HNAE3_UNKNOWN_RESET, addr); - /* We defered the clearing of the error event which caused - * interrupt since it was not posssible to do that in - * interrupt context (and this is the reason we introduced - * new UNKNOWN reset type). Now, the errors have been - * handled and cleared in hardware we can safely enable - * interrupts. This is an exception to the norm. - */ - hclge_enable_vector(&hdev->misc_vector, true); - } - /* return the highest priority reset level amongst all */ if (test_bit(HNAE3_IMP_RESET, addr)) { rst_level = HNAE3_IMP_RESET; @@ -3535,15 +3518,39 @@ static void hclge_reset_subtask(struct hclge_dev *hdev) hdev->reset_type = HNAE3_NONE_RESET; } +static void hclge_misc_err_recovery(struct hclge_dev *hdev) +{ + struct hnae3_ae_dev *ae_dev = pci_get_drvdata(hdev->pdev); + u32 msix_sts_reg; + + msix_sts_reg = hclge_read_dev(&hdev->hw, + HCLGE_VECTOR0_PF_OTHER_INT_STS_REG); + + if (msix_sts_reg & HCLGE_VECTOR0_REG_MSIX_MASK) { + hclge_handle_hw_msix_error(hdev, &hdev->default_reset_request); + if (hdev->default_reset_request) + if (ae_dev->ops->reset_event) + ae_dev->ops->reset_event(hdev->pdev, NULL); + } + + clear_bit(HNAE3_UNKNOWN_RESET, &hdev->reset_request); + hclge_enable_vector(&hdev->misc_vector, true); +} + static void hclge_reset_service_task(struct work_struct *work) { struct hclge_dev *hdev = container_of(work, struct hclge_dev, rst_service_task); - if (test_and_set_bit(HCLGE_STATE_RST_HANDLING, &hdev->state)) + clear_bit(HCLGE_STATE_RST_SERVICE_SCHED, &hdev->state); + + if (test_bit(HNAE3_UNKNOWN_RESET, &hdev->reset_request)) { + hclge_misc_err_recovery(hdev); return; + } - clear_bit(HCLGE_STATE_RST_SERVICE_SCHED, &hdev->state); + if (test_and_set_bit(HCLGE_STATE_RST_HANDLING, &hdev->state)) + return; hclge_reset_subtask(hdev); -- GitLab