提交 e284db77 编写于 作者: Y Yonglong Liu 提交者: Yang Yingliang

net: hns3: fix kernel crash when unload VF while it is being reset

driver inclusion
category: bugfix
bugzilla: NA
CVE: NA

----------------------------

When fully configure VLANs for a VF, then unload the VF while
triggering a reset to PF, will cause a kernel crash because the
irq is already uninit.

[  293.177579] ------------[ cut here ]------------
[  293.183502] kernel BUG at drivers/pci/msi.c:352!
[  293.189547] Internal error: Oops - BUG: 0 [#1] SMP
[  293.195910] Modules linked in: 8021q garp mrp hclgevf devlink xt_CHECKSUM ipt_MASQUERADE xt_conntrack ipt_REJECT nf_reject_ipv4 ip6table_mangle ip6table_nat nf_nat_ipv6 iptable_mangle iptable_nat nf_nat_ipv4 nf_nat nf_conntrack nf_defrag_ipv6 nf_defrag_ipv4 libcrc32c ebtable_filter ebtables ip6table_filter ip6_tables iptable_filter tun bridge stp llc rfkill vfat fat ib_isert iscsi_target_mod ib_srpt target_core_mod ib_srp scsi_transport_srp ib_ipoib ib_umad rpcrdma sunrpc rdma_ucm ib_uverbs ib_iser rdma_cm iw_cm ib_cm libiscsi scsi_transport_iscsi ipmi_ssif aes_ce_blk crypto_simd cryptd hns_roce_hw_v2 aes_ce_cipher ghash_ce hns_roce sha2_ce sha256_arm64 ses sha1_ce ib_core enclosure sbsa_gwdt uio_pdrv_genirq uio sg ipmi_si ipmi_devintf ipmi_msghandler spi_dw_mmio sch_fq_codel ip_tables ext4 mbcache
[  293.302197]  jbd2 sd_mod realtek hisi_sas_v3_hw hclge hisi_sas_main libsas ahci scsi_transport_sas libahci ixgbe hns3 libata hnae3 host_edma_drv mdio dm_mirror dm_region_hash dm_log dm_mod
[  293.334208] Process kworker/0:4 (pid: 684, stack limit = 0x000000009f218975)
[  293.346841] CPU: 0 PID: 684 Comm: kworker/0:4 Kdump: loaded Not tainted 4.19.90-2106.3.0.0095.oe1.aarch64 #1
[  293.368467] Hardware name: Huawei TaiShan 2280 V2/BC82AMDC, BIOS 2280-V2 CS V5.B170.01 06/30/2021
[  293.390124] Workqueue: hclgevf hclgevf_service_task [hclgevf]
[  293.402627] pstate: 80c00009 (Nzcv daif +PAN +UAO)
[  293.414324] pc : free_msi_irqs+0x19c/0x1b8
[  293.425429] lr : free_msi_irqs+0x18c/0x1b8
[  293.436545] sp : ffff00002716fbb0
[  293.446950] x29: ffff00002716fbb0 x28: 0000000000000000
[  293.459519] x27: 0000000000000000 x26: ffff45b91ea16b00
[  293.472183] x25: 0000000000000000 x24: ffffa587b08f4700
[  293.484717] x23: ffffc591ac30e000 x22: ffffa587b08f8428
[  293.497190] x21: ffffc591ac30e300 x20: 0000000000000000
[  293.509594] x19: ffffa58a062a8300 x18: 0000000000000000
[  293.521949] x17: 0000000000000000 x16: ffff45b91dcc3f48
[  293.534013] x15: 0000000000000000 x14: 0000000000000000
[  293.545883] x13: 0000000000000040 x12: 0000000000000228
[  293.557508] x11: 0000000000000020 x10: 0000000000000040
[  293.568889] x9 : ffff45b91ea1e190 x8 : ffffc591802d0000
[  293.580123] x7 : ffffc591802d0148 x6 : 0000000000000120
[  293.591190] x5 : ffffc591802d0000 x4 : 0000000000000000
[  293.602015] x3 : 0000000000000000 x2 : 0000000000000000
[  293.612624] x1 : 00000000000004a4 x0 : ffffa58a1e0c6b80
[  293.623028] Call trace:
[  293.630340]  free_msi_irqs+0x19c/0x1b8
[  293.638849]  pci_disable_msix+0x118/0x140
[  293.647452]  pci_free_irq_vectors+0x20/0x38
[  293.656081]  hclgevf_uninit_msi+0x44/0x58 [hclgevf]
[  293.665309]  hclgevf_reset_rebuild+0x1ac/0x2e0 [hclgevf]
[  293.674866]  hclgevf_reset+0x358/0x400 [hclgevf]
[  293.683545]  hclgevf_reset_service_task+0xd0/0x1b0 [hclgevf]
[  293.693325]  hclgevf_service_task+0x4c/0x2e8 [hclgevf]
[  293.702307]  process_one_work+0x1b0/0x448
[  293.710034]  worker_thread+0x54/0x468
[  293.717331]  kthread+0x134/0x138
[  293.724114]  ret_from_fork+0x10/0x18
[  293.731324] Code: f940b000 b4ffff00 a903e7b8 f90017b6 (d4210000)

This patch fixes the problem by waiting for the VF reset done
while unloading the VF.
Signed-off-by: NYonglong Liu <liuyonglong@huawei.com>
Reviewed-by: NJunxin Chen <chenjunxin1@huawei.com>
Signed-off-by: NYang Yingliang <yangyingliang@huawei.com>
上级 b2c2b141
...@@ -2896,6 +2896,8 @@ static void hclgevf_uninit_client_instance(struct hnae3_client *client, ...@@ -2896,6 +2896,8 @@ static void hclgevf_uninit_client_instance(struct hnae3_client *client,
/* un-init roce, if it exists */ /* un-init roce, if it exists */
if (hdev->roce_client) { if (hdev->roce_client) {
while (test_bit(HCLGEVF_STATE_RST_HANDLING, &hdev->state))
msleep(100);
hdev->roce_client->ops->uninit_instance(&hdev->roce, 0); hdev->roce_client->ops->uninit_instance(&hdev->roce, 0);
hdev->roce_client = NULL; hdev->roce_client = NULL;
hdev->roce.client = NULL; hdev->roce.client = NULL;
...@@ -2905,6 +2907,8 @@ static void hclgevf_uninit_client_instance(struct hnae3_client *client, ...@@ -2905,6 +2907,8 @@ static void hclgevf_uninit_client_instance(struct hnae3_client *client,
if (client->ops->uninit_instance && hdev->nic_client && if (client->ops->uninit_instance && hdev->nic_client &&
client->type != HNAE3_CLIENT_ROCE) { client->type != HNAE3_CLIENT_ROCE) {
clear_bit(HCLGEVF_STATE_NIC_REGISTERED, &hdev->state); clear_bit(HCLGEVF_STATE_NIC_REGISTERED, &hdev->state);
while (test_bit(HCLGEVF_STATE_RST_HANDLING, &hdev->state))
msleep(100);
client->ops->uninit_instance(&hdev->nic, 0); client->ops->uninit_instance(&hdev->nic, 0);
hdev->nic_client = NULL; hdev->nic_client = NULL;
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册