From 762c10db0d17cc7181767051205a93e8fe438e92 Mon Sep 17 00:00:00 2001 From: Xiongfeng Wang Date: Thu, 9 May 2019 14:27:22 +0800 Subject: [PATCH] pciehp: fix a race between pciehp and removing operations by sysfs hulk inclusion category: bugfix bugzilla: NA CVE: NA ------------------------------------------------- When I run a stress test about pcie hotplug and removing operations by sysfs, I got a hange task, and the following call trace is printed. INFO: task irq/746-pciehp:41551 blocked for more than 120 seconds. Tainted: P W OE 4.19.25- "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message. irq/746-pciehp D 0 41551 2 0x00000228 Call trace: __switch_to+0x94/0xe8 __schedule+0x270/0x8b0 schedule+0x2c/0x88 schedule_preempt_disabled+0x14/0x20 __mutex_lock.isra.1+0x1fc/0x540 __mutex_lock_slowpath+0x24/0x30 mutex_lock+0x80/0xa8 pci_lock_rescan_remove+0x20/0x28 pciehp_configure_device+0x30/0x140 pciehp_handle_presence_or_link_change+0x35c/0x4b0 pciehp_ist+0x1cc/0x1d0 irq_thread_fn+0x30/0x80 irq_thread+0x128/0x200 kthread+0x134/0x138 ret_from_fork+0x10/0x18 INFO: task bash:6424 blocked for more than 120 seconds. Tainted: P W OE 4.19.25- "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message. bash D 0 6424 2231 0x00000200 Call trace: __switch_to+0x94/0xe8 __schedule+0x270/0x8b0 schedule+0x2c/0x88 schedule_timeout+0x224/0x448 wait_for_common+0x198/0x2a0 wait_for_completion+0x28/0x38 kthread_stop+0x60/0x190 __free_irq+0x1c0/0x348 free_irq+0x40/0x88 pcie_shutdown_notification+0x54/0x80 pciehp_remove+0x30/0x50 pcie_port_remove_service+0x3c/0x58 device_release_driver_internal+0x1b4/0x250 device_release_driver+0x28/0x38 bus_remove_device+0xd4/0x160 device_del+0x128/0x348 device_unregister+0x24/0x78 remove_iter+0x48/0x58 device_for_each_child+0x6c/0xb8 pcie_port_device_remove+0x2c/0x48 pcie_portdrv_remove+0x5c/0x68 pci_device_remove+0x48/0xd8 device_release_driver_internal+0x1b4/0x250 device_release_driver+0x28/0x38 pci_stop_bus_device+0x84/0xb8 pci_stop_and_remove_bus_device_locked+0x24/0x40 remove_store+0xa4/0xb8 dev_attr_store+0x44/0x60 sysfs_kf_write+0x58/0x80 kernfs_fop_write+0xe8/0x1f0 __vfs_write+0x60/0x190 vfs_write+0xac/0x1c0 ksys_write+0x6c/0xd8 __arm64_sys_write+0x24/0x30 el0_svc_common+0xa0/0x180 el0_svc_handler+0x38/0x78 el0_svc+0x8/0xc When we remove a slot by sysfs. 'pci_stop_and_remove_bus_device_locked()' will be called. This function will get the global mutex lock 'pci_rescan_remove_lock', and remove the slot. If the irq thread 'pciehp_ist' is still running, we will wait until it exits. If a pciehp interrupt happens immediately after we remove the slot by sysfs, but before we free the pciehp irq in 'pci_stop_and_remove_bus_device_locked()'. 'pciehp_ist' will hung because the global mutex lock 'pci_rescan_remove_lock' is held by the sysfs operation. But the sysfs operation is waiting for the pciehp irq thread 'pciehp_ist' ends. Then a hung task occurs. So this two kinds of operation, removing through attention buttion and removing through /sys/devices/pci***/remove, should not be excuted at the same time. This patch add a global variable to mark that one of these operations is under processing. When this variable is set, if another operation is requested, it will be rejected. Signed-off-by: Xiongfeng Wang Reviewed-by: Hanjun Guo Signed-off-by: Yang Yingliang --- drivers/pci/hotplug/pciehp_ctrl.c | 14 ++++++++ drivers/pci/hotplug/pciehp_hpc.c | 56 ++++++++++++++++++++++++++++--- drivers/pci/pci-sysfs.c | 8 +++++ drivers/pci/remove.c | 5 +++ include/linux/pci.h | 3 ++ 5 files changed, 81 insertions(+), 5 deletions(-) diff --git a/drivers/pci/hotplug/pciehp_ctrl.c b/drivers/pci/hotplug/pciehp_ctrl.c index 305f8031eb01..d5f993d2f5f3 100644 --- a/drivers/pci/hotplug/pciehp_ctrl.c +++ b/drivers/pci/hotplug/pciehp_ctrl.c @@ -145,6 +145,8 @@ void pciehp_queue_pushbutton_work(struct work_struct *work) { struct slot *p_slot = container_of(work, struct slot, work.work); struct controller *ctrl = p_slot->ctrl; + int events = atomic_long_read(&work->data) & (PCI_EXP_SLTSTA_PDC | + PCI_EXP_SLTSTA_DLLSC | DISABLE_SLOT); mutex_lock(&p_slot->lock); switch (p_slot->state) { @@ -155,6 +157,12 @@ void pciehp_queue_pushbutton_work(struct work_struct *work) pciehp_request(ctrl, PCI_EXP_SLTSTA_PDC); break; default: + if (events) { + atomic_or(events, &ctrl->pending_events); + if (!pciehp_poll_mode) + irq_wake_thread(ctrl->pcie->irq, ctrl); + } else + slot_being_removed_rescanned = 0; break; } mutex_unlock(&p_slot->lock); @@ -180,6 +188,7 @@ void pciehp_handle_button_press(struct slot *p_slot) /* blink green LED and turn off amber */ pciehp_green_led_blink(p_slot); pciehp_set_attention_status(p_slot, 0); + atomic_long_set(&p_slot->work.work.data, 0); schedule_delayed_work(&p_slot->work, 5 * HZ); break; case BLINKINGOFF_STATE: @@ -201,10 +210,12 @@ void pciehp_handle_button_press(struct slot *p_slot) pciehp_set_attention_status(p_slot, 0); ctrl_info(ctrl, "Slot(%s): Action canceled due to button press\n", slot_name(p_slot)); + slot_being_removed_rescanned = 0; break; default: ctrl_err(ctrl, "Slot(%s): Ignoring invalid state %#x\n", slot_name(p_slot), p_slot->state); + slot_being_removed_rescanned = 0; break; } mutex_unlock(&p_slot->lock); @@ -225,6 +236,7 @@ void pciehp_handle_disable_request(struct slot *slot) mutex_unlock(&slot->lock); ctrl->request_result = pciehp_disable_slot(slot, SAFE_REMOVAL); + slot_being_removed_rescanned = 0; } void pciehp_handle_presence_or_link_change(struct slot *slot, u32 events) @@ -274,6 +286,7 @@ void pciehp_handle_presence_or_link_change(struct slot *slot, u32 events) link_active = pciehp_check_link_active(ctrl); if (!present && !link_active) { mutex_unlock(&slot->lock); + slot_being_removed_rescanned = 0; return; } @@ -296,6 +309,7 @@ void pciehp_handle_presence_or_link_change(struct slot *slot, u32 events) mutex_unlock(&slot->lock); break; } + slot_being_removed_rescanned = 0; } static int __pciehp_enable_slot(struct slot *p_slot) diff --git a/drivers/pci/hotplug/pciehp_hpc.c b/drivers/pci/hotplug/pciehp_hpc.c index f875f73c1f4a..600961c1e08b 100644 --- a/drivers/pci/hotplug/pciehp_hpc.c +++ b/drivers/pci/hotplug/pciehp_hpc.c @@ -659,7 +659,17 @@ static irqreturn_t pciehp_ist(int irq, void *dev_id) if (events & PCI_EXP_SLTSTA_ABP) { ctrl_info(ctrl, "Slot(%s): Attention button pressed\n", slot_name(slot)); - pciehp_handle_button_press(slot); + if (!test_and_set_bit(0, &slot_being_removed_rescanned)) + pciehp_handle_button_press(slot); + else { + if (slot->state == BLINKINGOFF_STATE || + slot->state == BLINKINGON_STATE) + pciehp_handle_button_press(slot); + else + ctrl_info(ctrl, "Slot(%s): Slot operation failed because a remove or" + " rescan operation is under processing, please try later!\n", + slot_name(slot)); + } } /* Check Power Fault Detected */ @@ -675,10 +685,46 @@ static irqreturn_t pciehp_ist(int irq, void *dev_id) * or Data Link Layer State Changed events. */ down_read(&ctrl->reset_lock); - if (events & DISABLE_SLOT) - pciehp_handle_disable_request(slot); - else if (events & (PCI_EXP_SLTSTA_PDC | PCI_EXP_SLTSTA_DLLSC)) - pciehp_handle_presence_or_link_change(slot, events); + if (events & DISABLE_SLOT) { + if (!test_and_set_bit(0, &slot_being_removed_rescanned)) + pciehp_handle_disable_request(slot); + else { + if (slot->state == BLINKINGOFF_STATE || + slot->state == BLINKINGON_STATE) + pciehp_handle_disable_request(slot); + else { + /* + * we use the work_struct private data to store + * the event type + */ + atomic_long_set(&slot->work.work.data, + DISABLE_SLOT); + schedule_delayed_work(&slot->work, 3 * HZ); + } + } + } else if (events & (PCI_EXP_SLTSTA_PDC | PCI_EXP_SLTSTA_DLLSC)) { + if (!test_and_set_bit(0, &slot_being_removed_rescanned)) + pciehp_handle_presence_or_link_change(slot, events); + else { + if (slot->state == BLINKINGOFF_STATE || + slot->state == BLINKINGON_STATE) + pciehp_handle_presence_or_link_change(slot, + events); + else { + /* + * When we are removing or rescanning through + * sysfs, suprise link down/up happens. So we + * will handle this event 3 seconds later. + */ + ctrl_info(ctrl, "Slot(%s): Surprise link down/up in remove or rescan process!\n", + slot_name(slot)); + atomic_long_set(&slot->work.work.data, + events & (PCI_EXP_SLTSTA_PDC | + PCI_EXP_SLTSTA_DLLSC)); + schedule_delayed_work(&slot->work, 3 * HZ); + } + } + } up_read(&ctrl->reset_lock); pci_config_pm_runtime_put(pdev); diff --git a/drivers/pci/pci-sysfs.c b/drivers/pci/pci-sysfs.c index 9ecfe13157c0..f04d83031759 100644 --- a/drivers/pci/pci-sysfs.c +++ b/drivers/pci/pci-sysfs.c @@ -474,8 +474,16 @@ static ssize_t remove_store(struct device *dev, struct device_attribute *attr, if (kstrtoul(buf, 0, &val) < 0) return -EINVAL; + if (test_and_set_bit(0, &slot_being_removed_rescanned)) { + pr_info("Slot is being removed or rescanned, please try later!\n"); + return -EINVAL; + } + if (val && device_remove_file_self(dev, attr)) pci_stop_and_remove_bus_device_locked(to_pci_dev(dev)); + + slot_being_removed_rescanned = 0; + return count; } static struct device_attribute dev_remove_attr = __ATTR(remove, diff --git a/drivers/pci/remove.c b/drivers/pci/remove.c index e9c6b120cf45..6d15ad0488d2 100644 --- a/drivers/pci/remove.c +++ b/drivers/pci/remove.c @@ -3,6 +3,11 @@ #include #include "pci.h" +/* + * When a slot is being removed/rescanned, this flag is set. + */ +unsigned long slot_being_removed_rescanned; + static void pci_free_resources(struct pci_dev *dev) { int i; diff --git a/include/linux/pci.h b/include/linux/pci.h index c7f62371c94a..3eeff0250dcf 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -913,6 +913,9 @@ extern struct bus_type pci_bus_type; /* Do NOT directly access these two variables, unless you are arch-specific PCI * code, or PCI core code. */ extern struct list_head pci_root_buses; /* List of all known PCI buses */ + +extern unsigned long slot_being_removed_rescanned; + /* Some device drivers need know if PCI is initiated */ int no_pci_devices(void); -- GitLab