提交 fc3d3db0 编写于 作者: M Moshe Shemesh 提交者: Saeed Mahameed

net/mlx5: Avoid double clear or set of sync reset requested

Double clear of reset requested state can lead to NULL pointer as it
will try to delete the timer twice. This can happen for example on a
race between abort from FW and pci error or reset. Avoid such case using
test_and_clear_bit() to verify only one time reset requested state clear
flow. Similarly use test_and_set_bit() to verify only one time reset
requested state set flow.

Fixes: 7dd6df32 ("net/mlx5: Handle sync reset abort event")
Signed-off-by: NMoshe Shemesh <moshe@nvidia.com>
Reviewed-by: NMaher Sanalla <msanalla@nvidia.com>
Reviewed-by: NShay Drory <shayd@nvidia.com>
Signed-off-by: NSaeed Mahameed <saeedm@nvidia.com>
上级 cb7786a7
......@@ -162,14 +162,19 @@ static void mlx5_stop_sync_reset_poll(struct mlx5_core_dev *dev)
del_timer_sync(&fw_reset->timer);
}
static void mlx5_sync_reset_clear_reset_requested(struct mlx5_core_dev *dev, bool poll_health)
static int mlx5_sync_reset_clear_reset_requested(struct mlx5_core_dev *dev, bool poll_health)
{
struct mlx5_fw_reset *fw_reset = dev->priv.fw_reset;
if (!test_and_clear_bit(MLX5_FW_RESET_FLAGS_RESET_REQUESTED, &fw_reset->reset_flags)) {
mlx5_core_warn(dev, "Reset request was already cleared\n");
return -EALREADY;
}
mlx5_stop_sync_reset_poll(dev);
clear_bit(MLX5_FW_RESET_FLAGS_RESET_REQUESTED, &fw_reset->reset_flags);
if (poll_health)
mlx5_start_health_poll(dev);
return 0;
}
static void mlx5_sync_reset_reload_work(struct work_struct *work)
......@@ -229,13 +234,17 @@ static int mlx5_fw_reset_set_reset_sync_nack(struct mlx5_core_dev *dev)
return mlx5_reg_mfrl_set(dev, MLX5_MFRL_REG_RESET_LEVEL3, 0, 2, false);
}
static void mlx5_sync_reset_set_reset_requested(struct mlx5_core_dev *dev)
static int mlx5_sync_reset_set_reset_requested(struct mlx5_core_dev *dev)
{
struct mlx5_fw_reset *fw_reset = dev->priv.fw_reset;
if (test_and_set_bit(MLX5_FW_RESET_FLAGS_RESET_REQUESTED, &fw_reset->reset_flags)) {
mlx5_core_warn(dev, "Reset request was already set\n");
return -EALREADY;
}
mlx5_stop_health_poll(dev, true);
set_bit(MLX5_FW_RESET_FLAGS_RESET_REQUESTED, &fw_reset->reset_flags);
mlx5_start_sync_reset_poll(dev);
return 0;
}
static void mlx5_fw_live_patch_event(struct work_struct *work)
......@@ -264,7 +273,9 @@ static void mlx5_sync_reset_request_event(struct work_struct *work)
err ? "Failed" : "Sent");
return;
}
mlx5_sync_reset_set_reset_requested(dev);
if (mlx5_sync_reset_set_reset_requested(dev))
return;
err = mlx5_fw_reset_set_reset_sync_ack(dev);
if (err)
mlx5_core_warn(dev, "PCI Sync FW Update Reset Ack Failed. Error code: %d\n", err);
......@@ -362,7 +373,8 @@ static void mlx5_sync_reset_now_event(struct work_struct *work)
struct mlx5_core_dev *dev = fw_reset->dev;
int err;
mlx5_sync_reset_clear_reset_requested(dev, false);
if (mlx5_sync_reset_clear_reset_requested(dev, false))
return;
mlx5_core_warn(dev, "Sync Reset now. Device is going to reset.\n");
......@@ -391,10 +403,8 @@ static void mlx5_sync_reset_abort_event(struct work_struct *work)
reset_abort_work);
struct mlx5_core_dev *dev = fw_reset->dev;
if (!test_bit(MLX5_FW_RESET_FLAGS_RESET_REQUESTED, &fw_reset->reset_flags))
if (mlx5_sync_reset_clear_reset_requested(dev, true))
return;
mlx5_sync_reset_clear_reset_requested(dev, true);
mlx5_core_warn(dev, "PCI Sync FW Update Reset Aborted.\n");
}
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册