提交 5adff6a0 编写于 作者: D Daniel Jurgens 提交者: David S. Miller

net/mlx5: Fix incorrect page count when in internal error

Change page cleanup flow when in internal error to properly decrement
the page counts when reclaiming pages.  The prevents timing out waiting
for extra pages that were actually cleaned up previously.

fixes: 89d44f0a ('net/mlx5_core: Add pci error handlers to mlx5_core driver')
Signed-off-by: NDaniel Jurgens <danielj@mellanox.com>
Signed-off-by: NSaeed Mahameed <saeedm@mellanox.com>
Signed-off-by: NDavid S. Miller <davem@davemloft.net>
上级 c1d4d2e9
...@@ -345,7 +345,6 @@ static int give_pages(struct mlx5_core_dev *dev, u16 func_id, int npages, ...@@ -345,7 +345,6 @@ static int give_pages(struct mlx5_core_dev *dev, u16 func_id, int npages,
func_id, npages, err); func_id, npages, err);
goto out_4k; goto out_4k;
} }
dev->priv.fw_pages += npages;
err = mlx5_cmd_status_to_err(&out.hdr); err = mlx5_cmd_status_to_err(&out.hdr);
if (err) { if (err) {
...@@ -373,6 +372,33 @@ static int give_pages(struct mlx5_core_dev *dev, u16 func_id, int npages, ...@@ -373,6 +372,33 @@ static int give_pages(struct mlx5_core_dev *dev, u16 func_id, int npages,
return err; return err;
} }
static int reclaim_pages_cmd(struct mlx5_core_dev *dev,
struct mlx5_manage_pages_inbox *in, int in_size,
struct mlx5_manage_pages_outbox *out, int out_size)
{
struct fw_page *fwp;
struct rb_node *p;
u32 npages;
u32 i = 0;
if (dev->state != MLX5_DEVICE_STATE_INTERNAL_ERROR)
return mlx5_cmd_exec_check_status(dev, (u32 *)in, in_size,
(u32 *)out, out_size);
npages = be32_to_cpu(in->num_entries);
p = rb_first(&dev->priv.page_root);
while (p && i < npages) {
fwp = rb_entry(p, struct fw_page, rb_node);
out->pas[i] = cpu_to_be64(fwp->addr);
p = rb_next(p);
i++;
}
out->num_entries = cpu_to_be32(i);
return 0;
}
static int reclaim_pages(struct mlx5_core_dev *dev, u32 func_id, int npages, static int reclaim_pages(struct mlx5_core_dev *dev, u32 func_id, int npages,
int *nclaimed) int *nclaimed)
{ {
...@@ -398,15 +424,9 @@ static int reclaim_pages(struct mlx5_core_dev *dev, u32 func_id, int npages, ...@@ -398,15 +424,9 @@ static int reclaim_pages(struct mlx5_core_dev *dev, u32 func_id, int npages,
in.func_id = cpu_to_be16(func_id); in.func_id = cpu_to_be16(func_id);
in.num_entries = cpu_to_be32(npages); in.num_entries = cpu_to_be32(npages);
mlx5_core_dbg(dev, "npages %d, outlen %d\n", npages, outlen); mlx5_core_dbg(dev, "npages %d, outlen %d\n", npages, outlen);
err = mlx5_cmd_exec(dev, &in, sizeof(in), out, outlen); err = reclaim_pages_cmd(dev, &in, sizeof(in), out, outlen);
if (err) { if (err) {
mlx5_core_err(dev, "failed reclaiming pages\n"); mlx5_core_err(dev, "failed reclaiming pages: err %d\n", err);
goto out_free;
}
dev->priv.fw_pages -= npages;
if (out->hdr.status) {
err = mlx5_cmd_status_to_err(&out->hdr);
goto out_free; goto out_free;
} }
...@@ -417,13 +437,15 @@ static int reclaim_pages(struct mlx5_core_dev *dev, u32 func_id, int npages, ...@@ -417,13 +437,15 @@ static int reclaim_pages(struct mlx5_core_dev *dev, u32 func_id, int npages,
err = -EINVAL; err = -EINVAL;
goto out_free; goto out_free;
} }
if (nclaimed)
*nclaimed = num_claimed;
for (i = 0; i < num_claimed; i++) { for (i = 0; i < num_claimed; i++) {
addr = be64_to_cpu(out->pas[i]); addr = be64_to_cpu(out->pas[i]);
free_4k(dev, addr); free_4k(dev, addr);
} }
if (nclaimed)
*nclaimed = num_claimed;
dev->priv.fw_pages -= num_claimed; dev->priv.fw_pages -= num_claimed;
if (func_id) if (func_id)
dev->priv.vfs_pages -= num_claimed; dev->priv.vfs_pages -= num_claimed;
...@@ -514,14 +536,10 @@ int mlx5_reclaim_startup_pages(struct mlx5_core_dev *dev) ...@@ -514,14 +536,10 @@ int mlx5_reclaim_startup_pages(struct mlx5_core_dev *dev)
p = rb_first(&dev->priv.page_root); p = rb_first(&dev->priv.page_root);
if (p) { if (p) {
fwp = rb_entry(p, struct fw_page, rb_node); fwp = rb_entry(p, struct fw_page, rb_node);
if (dev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR) { err = reclaim_pages(dev, fwp->func_id,
free_4k(dev, fwp->addr); optimal_reclaimed_pages(),
nclaimed = 1; &nclaimed);
} else {
err = reclaim_pages(dev, fwp->func_id,
optimal_reclaimed_pages(),
&nclaimed);
}
if (err) { if (err) {
mlx5_core_warn(dev, "failed reclaiming pages (%d)\n", mlx5_core_warn(dev, "failed reclaiming pages (%d)\n",
err); err);
...@@ -536,6 +554,13 @@ int mlx5_reclaim_startup_pages(struct mlx5_core_dev *dev) ...@@ -536,6 +554,13 @@ int mlx5_reclaim_startup_pages(struct mlx5_core_dev *dev)
} }
} while (p); } while (p);
WARN(dev->priv.fw_pages,
"FW pages counter is %d after reclaiming all pages\n",
dev->priv.fw_pages);
WARN(dev->priv.vfs_pages,
"VFs FW pages counter is %d after reclaiming all pages\n",
dev->priv.vfs_pages);
return 0; return 0;
} }
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册