提交 762c10db 编写于 作者: X Xiongfeng Wang 提交者: Xie XiuQi

pciehp: fix a race between pciehp and removing operations by sysfs

hulk inclusion
category: bugfix
bugzilla: NA
CVE: NA

-------------------------------------------------

When I run a stress test about pcie hotplug and removing operations by
sysfs, I got a hange task, and the following call trace is printed.

 INFO: task irq/746-pciehp:41551 blocked for more than 120 seconds.
       Tainted: P        W  OE     4.19.25-
 "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message.
 irq/746-pciehp  D    0 41551      2 0x00000228
 Call trace:
  __switch_to+0x94/0xe8
  __schedule+0x270/0x8b0
  schedule+0x2c/0x88
  schedule_preempt_disabled+0x14/0x20
  __mutex_lock.isra.1+0x1fc/0x540
  __mutex_lock_slowpath+0x24/0x30
  mutex_lock+0x80/0xa8
  pci_lock_rescan_remove+0x20/0x28
  pciehp_configure_device+0x30/0x140
  pciehp_handle_presence_or_link_change+0x35c/0x4b0
  pciehp_ist+0x1cc/0x1d0
  irq_thread_fn+0x30/0x80
  irq_thread+0x128/0x200
  kthread+0x134/0x138
  ret_from_fork+0x10/0x18
 INFO: task bash:6424 blocked for more than 120 seconds.
       Tainted: P        W  OE     4.19.25-
 "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message.
 bash            D    0  6424   2231 0x00000200
 Call trace:
  __switch_to+0x94/0xe8
  __schedule+0x270/0x8b0
  schedule+0x2c/0x88
  schedule_timeout+0x224/0x448
  wait_for_common+0x198/0x2a0
  wait_for_completion+0x28/0x38
  kthread_stop+0x60/0x190
  __free_irq+0x1c0/0x348
  free_irq+0x40/0x88
  pcie_shutdown_notification+0x54/0x80
  pciehp_remove+0x30/0x50
  pcie_port_remove_service+0x3c/0x58
  device_release_driver_internal+0x1b4/0x250
  device_release_driver+0x28/0x38
  bus_remove_device+0xd4/0x160
  device_del+0x128/0x348
  device_unregister+0x24/0x78
  remove_iter+0x48/0x58
  device_for_each_child+0x6c/0xb8
  pcie_port_device_remove+0x2c/0x48
  pcie_portdrv_remove+0x5c/0x68
  pci_device_remove+0x48/0xd8
  device_release_driver_internal+0x1b4/0x250
  device_release_driver+0x28/0x38
  pci_stop_bus_device+0x84/0xb8
  pci_stop_and_remove_bus_device_locked+0x24/0x40
  remove_store+0xa4/0xb8
  dev_attr_store+0x44/0x60
  sysfs_kf_write+0x58/0x80
  kernfs_fop_write+0xe8/0x1f0
  __vfs_write+0x60/0x190
  vfs_write+0xac/0x1c0
  ksys_write+0x6c/0xd8
  __arm64_sys_write+0x24/0x30
  el0_svc_common+0xa0/0x180
  el0_svc_handler+0x38/0x78
  el0_svc+0x8/0xc

When we remove a slot by sysfs.
'pci_stop_and_remove_bus_device_locked()' will be called. This function
will get the global mutex lock 'pci_rescan_remove_lock', and remove the
slot. If the irq thread 'pciehp_ist' is still running, we will wait
until it exits.

If a pciehp interrupt happens immediately after we remove the slot by
sysfs, but before we free the pciehp irq in
'pci_stop_and_remove_bus_device_locked()'. 'pciehp_ist' will hung
because the global mutex lock 'pci_rescan_remove_lock' is held by the
sysfs operation. But the sysfs operation is waiting for the pciehp irq
thread 'pciehp_ist' ends. Then a hung task occurs.

So this two kinds of operation, removing through attention buttion and
removing through /sys/devices/pci***/remove, should not be excuted at
the same time. This patch add a global variable to mark that one of these
operations is under processing. When this variable is set,  if another
operation is requested, it will be rejected.
Signed-off-by: NXiongfeng Wang <wangxiongfeng2@huawei.com>
Reviewed-by: NHanjun Guo <guohanjun@huawei.com>
Signed-off-by: NYang Yingliang <yangyingliang@huawei.com>
上级 541d167d
...@@ -145,6 +145,8 @@ void pciehp_queue_pushbutton_work(struct work_struct *work) ...@@ -145,6 +145,8 @@ void pciehp_queue_pushbutton_work(struct work_struct *work)
{ {
struct slot *p_slot = container_of(work, struct slot, work.work); struct slot *p_slot = container_of(work, struct slot, work.work);
struct controller *ctrl = p_slot->ctrl; struct controller *ctrl = p_slot->ctrl;
int events = atomic_long_read(&work->data) & (PCI_EXP_SLTSTA_PDC |
PCI_EXP_SLTSTA_DLLSC | DISABLE_SLOT);
mutex_lock(&p_slot->lock); mutex_lock(&p_slot->lock);
switch (p_slot->state) { switch (p_slot->state) {
...@@ -155,6 +157,12 @@ void pciehp_queue_pushbutton_work(struct work_struct *work) ...@@ -155,6 +157,12 @@ void pciehp_queue_pushbutton_work(struct work_struct *work)
pciehp_request(ctrl, PCI_EXP_SLTSTA_PDC); pciehp_request(ctrl, PCI_EXP_SLTSTA_PDC);
break; break;
default: default:
if (events) {
atomic_or(events, &ctrl->pending_events);
if (!pciehp_poll_mode)
irq_wake_thread(ctrl->pcie->irq, ctrl);
} else
slot_being_removed_rescanned = 0;
break; break;
} }
mutex_unlock(&p_slot->lock); mutex_unlock(&p_slot->lock);
...@@ -180,6 +188,7 @@ void pciehp_handle_button_press(struct slot *p_slot) ...@@ -180,6 +188,7 @@ void pciehp_handle_button_press(struct slot *p_slot)
/* blink green LED and turn off amber */ /* blink green LED and turn off amber */
pciehp_green_led_blink(p_slot); pciehp_green_led_blink(p_slot);
pciehp_set_attention_status(p_slot, 0); pciehp_set_attention_status(p_slot, 0);
atomic_long_set(&p_slot->work.work.data, 0);
schedule_delayed_work(&p_slot->work, 5 * HZ); schedule_delayed_work(&p_slot->work, 5 * HZ);
break; break;
case BLINKINGOFF_STATE: case BLINKINGOFF_STATE:
...@@ -201,10 +210,12 @@ void pciehp_handle_button_press(struct slot *p_slot) ...@@ -201,10 +210,12 @@ void pciehp_handle_button_press(struct slot *p_slot)
pciehp_set_attention_status(p_slot, 0); pciehp_set_attention_status(p_slot, 0);
ctrl_info(ctrl, "Slot(%s): Action canceled due to button press\n", ctrl_info(ctrl, "Slot(%s): Action canceled due to button press\n",
slot_name(p_slot)); slot_name(p_slot));
slot_being_removed_rescanned = 0;
break; break;
default: default:
ctrl_err(ctrl, "Slot(%s): Ignoring invalid state %#x\n", ctrl_err(ctrl, "Slot(%s): Ignoring invalid state %#x\n",
slot_name(p_slot), p_slot->state); slot_name(p_slot), p_slot->state);
slot_being_removed_rescanned = 0;
break; break;
} }
mutex_unlock(&p_slot->lock); mutex_unlock(&p_slot->lock);
...@@ -225,6 +236,7 @@ void pciehp_handle_disable_request(struct slot *slot) ...@@ -225,6 +236,7 @@ void pciehp_handle_disable_request(struct slot *slot)
mutex_unlock(&slot->lock); mutex_unlock(&slot->lock);
ctrl->request_result = pciehp_disable_slot(slot, SAFE_REMOVAL); ctrl->request_result = pciehp_disable_slot(slot, SAFE_REMOVAL);
slot_being_removed_rescanned = 0;
} }
void pciehp_handle_presence_or_link_change(struct slot *slot, u32 events) void pciehp_handle_presence_or_link_change(struct slot *slot, u32 events)
...@@ -274,6 +286,7 @@ void pciehp_handle_presence_or_link_change(struct slot *slot, u32 events) ...@@ -274,6 +286,7 @@ void pciehp_handle_presence_or_link_change(struct slot *slot, u32 events)
link_active = pciehp_check_link_active(ctrl); link_active = pciehp_check_link_active(ctrl);
if (!present && !link_active) { if (!present && !link_active) {
mutex_unlock(&slot->lock); mutex_unlock(&slot->lock);
slot_being_removed_rescanned = 0;
return; return;
} }
...@@ -296,6 +309,7 @@ void pciehp_handle_presence_or_link_change(struct slot *slot, u32 events) ...@@ -296,6 +309,7 @@ void pciehp_handle_presence_or_link_change(struct slot *slot, u32 events)
mutex_unlock(&slot->lock); mutex_unlock(&slot->lock);
break; break;
} }
slot_being_removed_rescanned = 0;
} }
static int __pciehp_enable_slot(struct slot *p_slot) static int __pciehp_enable_slot(struct slot *p_slot)
......
...@@ -659,7 +659,17 @@ static irqreturn_t pciehp_ist(int irq, void *dev_id) ...@@ -659,7 +659,17 @@ static irqreturn_t pciehp_ist(int irq, void *dev_id)
if (events & PCI_EXP_SLTSTA_ABP) { if (events & PCI_EXP_SLTSTA_ABP) {
ctrl_info(ctrl, "Slot(%s): Attention button pressed\n", ctrl_info(ctrl, "Slot(%s): Attention button pressed\n",
slot_name(slot)); slot_name(slot));
pciehp_handle_button_press(slot); if (!test_and_set_bit(0, &slot_being_removed_rescanned))
pciehp_handle_button_press(slot);
else {
if (slot->state == BLINKINGOFF_STATE ||
slot->state == BLINKINGON_STATE)
pciehp_handle_button_press(slot);
else
ctrl_info(ctrl, "Slot(%s): Slot operation failed because a remove or"
" rescan operation is under processing, please try later!\n",
slot_name(slot));
}
} }
/* Check Power Fault Detected */ /* Check Power Fault Detected */
...@@ -675,10 +685,46 @@ static irqreturn_t pciehp_ist(int irq, void *dev_id) ...@@ -675,10 +685,46 @@ static irqreturn_t pciehp_ist(int irq, void *dev_id)
* or Data Link Layer State Changed events. * or Data Link Layer State Changed events.
*/ */
down_read(&ctrl->reset_lock); down_read(&ctrl->reset_lock);
if (events & DISABLE_SLOT) if (events & DISABLE_SLOT) {
pciehp_handle_disable_request(slot); if (!test_and_set_bit(0, &slot_being_removed_rescanned))
else if (events & (PCI_EXP_SLTSTA_PDC | PCI_EXP_SLTSTA_DLLSC)) pciehp_handle_disable_request(slot);
pciehp_handle_presence_or_link_change(slot, events); else {
if (slot->state == BLINKINGOFF_STATE ||
slot->state == BLINKINGON_STATE)
pciehp_handle_disable_request(slot);
else {
/*
* we use the work_struct private data to store
* the event type
*/
atomic_long_set(&slot->work.work.data,
DISABLE_SLOT);
schedule_delayed_work(&slot->work, 3 * HZ);
}
}
} else if (events & (PCI_EXP_SLTSTA_PDC | PCI_EXP_SLTSTA_DLLSC)) {
if (!test_and_set_bit(0, &slot_being_removed_rescanned))
pciehp_handle_presence_or_link_change(slot, events);
else {
if (slot->state == BLINKINGOFF_STATE ||
slot->state == BLINKINGON_STATE)
pciehp_handle_presence_or_link_change(slot,
events);
else {
/*
* When we are removing or rescanning through
* sysfs, suprise link down/up happens. So we
* will handle this event 3 seconds later.
*/
ctrl_info(ctrl, "Slot(%s): Surprise link down/up in remove or rescan process!\n",
slot_name(slot));
atomic_long_set(&slot->work.work.data,
events & (PCI_EXP_SLTSTA_PDC |
PCI_EXP_SLTSTA_DLLSC));
schedule_delayed_work(&slot->work, 3 * HZ);
}
}
}
up_read(&ctrl->reset_lock); up_read(&ctrl->reset_lock);
pci_config_pm_runtime_put(pdev); pci_config_pm_runtime_put(pdev);
......
...@@ -474,8 +474,16 @@ static ssize_t remove_store(struct device *dev, struct device_attribute *attr, ...@@ -474,8 +474,16 @@ static ssize_t remove_store(struct device *dev, struct device_attribute *attr,
if (kstrtoul(buf, 0, &val) < 0) if (kstrtoul(buf, 0, &val) < 0)
return -EINVAL; return -EINVAL;
if (test_and_set_bit(0, &slot_being_removed_rescanned)) {
pr_info("Slot is being removed or rescanned, please try later!\n");
return -EINVAL;
}
if (val && device_remove_file_self(dev, attr)) if (val && device_remove_file_self(dev, attr))
pci_stop_and_remove_bus_device_locked(to_pci_dev(dev)); pci_stop_and_remove_bus_device_locked(to_pci_dev(dev));
slot_being_removed_rescanned = 0;
return count; return count;
} }
static struct device_attribute dev_remove_attr = __ATTR(remove, static struct device_attribute dev_remove_attr = __ATTR(remove,
......
...@@ -3,6 +3,11 @@ ...@@ -3,6 +3,11 @@
#include <linux/module.h> #include <linux/module.h>
#include "pci.h" #include "pci.h"
/*
* When a slot is being removed/rescanned, this flag is set.
*/
unsigned long slot_being_removed_rescanned;
static void pci_free_resources(struct pci_dev *dev) static void pci_free_resources(struct pci_dev *dev)
{ {
int i; int i;
......
...@@ -913,6 +913,9 @@ extern struct bus_type pci_bus_type; ...@@ -913,6 +913,9 @@ extern struct bus_type pci_bus_type;
/* Do NOT directly access these two variables, unless you are arch-specific PCI /* Do NOT directly access these two variables, unless you are arch-specific PCI
* code, or PCI core code. */ * code, or PCI core code. */
extern struct list_head pci_root_buses; /* List of all known PCI buses */ extern struct list_head pci_root_buses; /* List of all known PCI buses */
extern unsigned long slot_being_removed_rescanned;
/* Some device drivers need know if PCI is initiated */ /* Some device drivers need know if PCI is initiated */
int no_pci_devices(void); int no_pci_devices(void);
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册