提交 b07042ca 编写于 作者: L Linus Torvalds

Merge tag 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/rdma/rdma

Pull rdma fixes from Doug Ledford:
 "Here's our second -rc pull request. Nothing particularly special in
  this one. The client removal deadlock fix is kindy tricky, but we had
  multiple eyes on it and no one could find a fault in it. A couple
  Spectre V1 fixes too. Otherwise, all just normal -rc fodder:

   - A couple Spectre V1 fixes (umad, hfi1)

   - Fix a tricky deadlock in the rdma core code with refcounting
     instead of locks (client removal patches)

   - Build errors (hns)

   - Fix a scheduling while atomic issue (mlx5)

   - Use after free fix (mad)

   - Fix error path return code (hns)

   - Null deref fix (siw_crypto_hash)

   - A few other misc. minor fixes"

* tag 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/rdma/rdma:
  RDMA/hns: Fix error return code in hns_roce_v1_rsv_lp_qp()
  RDMA/mlx5: Release locks during notifier unregister
  IB/hfi1: Fix Spectre v1 vulnerability
  IB/mad: Fix use-after-free in ib mad completion handling
  RDMA/restrack: Track driver QP types in resource tracker
  IB/mlx5: Fix MR registration flow to use UMR properly
  RDMA/devices: Remove the lock around remove_client_context
  RDMA/devices: Do not deadlock during client removal
  IB/core: Add mitigation for Spectre V1
  Do not dereference 'siw_crypto_shash' before checking
  RDMA/qedr: Fix the hca_type and hca_rev returned in device attributes
  RDMA/hns: Fix build error
......@@ -302,7 +302,9 @@ static inline struct ib_qp *_ib_create_qp(struct ib_device *dev,
struct ib_udata *udata,
struct ib_uobject *uobj)
{
enum ib_qp_type qp_type = attr->qp_type;
struct ib_qp *qp;
bool is_xrc;
if (!dev->ops.create_qp)
return ERR_PTR(-EOPNOTSUPP);
......@@ -320,7 +322,8 @@ static inline struct ib_qp *_ib_create_qp(struct ib_device *dev,
* and more importantly they are created internaly by driver,
* see mlx5 create_dev_resources() as an example.
*/
if (attr->qp_type < IB_QPT_XRC_INI) {
is_xrc = qp_type == IB_QPT_XRC_INI || qp_type == IB_QPT_XRC_TGT;
if ((qp_type < IB_QPT_MAX && !is_xrc) || qp_type == IB_QPT_DRIVER) {
qp->res.type = RDMA_RESTRACK_QP;
if (uobj)
rdma_restrack_uadd(&qp->res);
......
......@@ -94,11 +94,17 @@ static DEFINE_XARRAY_FLAGS(devices, XA_FLAGS_ALLOC);
static DECLARE_RWSEM(devices_rwsem);
#define DEVICE_REGISTERED XA_MARK_1
static LIST_HEAD(client_list);
static u32 highest_client_id;
#define CLIENT_REGISTERED XA_MARK_1
static DEFINE_XARRAY_FLAGS(clients, XA_FLAGS_ALLOC);
static DECLARE_RWSEM(clients_rwsem);
static void ib_client_put(struct ib_client *client)
{
if (refcount_dec_and_test(&client->uses))
complete(&client->uses_zero);
}
/*
* If client_data is registered then the corresponding client must also still
* be registered.
......@@ -660,6 +666,14 @@ static int add_client_context(struct ib_device *device,
return 0;
down_write(&device->client_data_rwsem);
/*
* So long as the client is registered hold both the client and device
* unregistration locks.
*/
if (!refcount_inc_not_zero(&client->uses))
goto out_unlock;
refcount_inc(&device->refcount);
/*
* Another caller to add_client_context got here first and has already
* completely initialized context.
......@@ -683,6 +697,9 @@ static int add_client_context(struct ib_device *device,
return 0;
out:
ib_device_put(device);
ib_client_put(client);
out_unlock:
up_write(&device->client_data_rwsem);
return ret;
}
......@@ -702,7 +719,7 @@ static void remove_client_context(struct ib_device *device,
client_data = xa_load(&device->client_data, client_id);
xa_clear_mark(&device->client_data, client_id, CLIENT_DATA_REGISTERED);
client = xa_load(&clients, client_id);
downgrade_write(&device->client_data_rwsem);
up_write(&device->client_data_rwsem);
/*
* Notice we cannot be holding any exclusive locks when calling the
......@@ -712,17 +729,13 @@ static void remove_client_context(struct ib_device *device,
*
* For this reason clients and drivers should not call the
* unregistration functions will holdling any locks.
*
* It tempting to drop the client_data_rwsem too, but this is required
* to ensure that unregister_client does not return until all clients
* are completely unregistered, which is required to avoid module
* unloading races.
*/
if (client->remove)
client->remove(device, client_data);
xa_erase(&device->client_data, client_id);
up_read(&device->client_data_rwsem);
ib_device_put(device);
ib_client_put(client);
}
static int alloc_port_data(struct ib_device *device)
......@@ -1224,7 +1237,7 @@ static int setup_device(struct ib_device *device)
static void disable_device(struct ib_device *device)
{
struct ib_client *client;
u32 cid;
WARN_ON(!refcount_read(&device->refcount));
......@@ -1232,10 +1245,19 @@ static void disable_device(struct ib_device *device)
xa_clear_mark(&devices, device->index, DEVICE_REGISTERED);
up_write(&devices_rwsem);
/*
* Remove clients in LIFO order, see assign_client_id. This could be
* more efficient if xarray learns to reverse iterate. Since no new
* clients can be added to this ib_device past this point we only need
* the maximum possible client_id value here.
*/
down_read(&clients_rwsem);
list_for_each_entry_reverse(client, &client_list, list)
remove_client_context(device, client->client_id);
cid = highest_client_id;
up_read(&clients_rwsem);
while (cid) {
cid--;
remove_client_context(device, cid);
}
/* Pairs with refcount_set in enable_device */
ib_device_put(device);
......@@ -1662,30 +1684,31 @@ static int assign_client_id(struct ib_client *client)
/*
* The add/remove callbacks must be called in FIFO/LIFO order. To
* achieve this we assign client_ids so they are sorted in
* registration order, and retain a linked list we can reverse iterate
* to get the LIFO order. The extra linked list can go away if xarray
* learns to reverse iterate.
* registration order.
*/
if (list_empty(&client_list)) {
client->client_id = 0;
} else {
struct ib_client *last;
last = list_last_entry(&client_list, struct ib_client, list);
client->client_id = last->client_id + 1;
}
client->client_id = highest_client_id;
ret = xa_insert(&clients, client->client_id, client, GFP_KERNEL);
if (ret)
goto out;
highest_client_id++;
xa_set_mark(&clients, client->client_id, CLIENT_REGISTERED);
list_add_tail(&client->list, &client_list);
out:
up_write(&clients_rwsem);
return ret;
}
static void remove_client_id(struct ib_client *client)
{
down_write(&clients_rwsem);
xa_erase(&clients, client->client_id);
for (; highest_client_id; highest_client_id--)
if (xa_load(&clients, highest_client_id - 1))
break;
up_write(&clients_rwsem);
}
/**
* ib_register_client - Register an IB client
* @client:Client to register
......@@ -1705,6 +1728,8 @@ int ib_register_client(struct ib_client *client)
unsigned long index;
int ret;
refcount_set(&client->uses, 1);
init_completion(&client->uses_zero);
ret = assign_client_id(client);
if (ret)
return ret;
......@@ -1740,21 +1765,30 @@ void ib_unregister_client(struct ib_client *client)
unsigned long index;
down_write(&clients_rwsem);
ib_client_put(client);
xa_clear_mark(&clients, client->client_id, CLIENT_REGISTERED);
up_write(&clients_rwsem);
/*
* Every device still known must be serialized to make sure we are
* done with the client callbacks before we return.
*/
down_read(&devices_rwsem);
xa_for_each (&devices, index, device)
/* We do not want to have locks while calling client->remove() */
rcu_read_lock();
xa_for_each (&devices, index, device) {
if (!ib_device_try_get(device))
continue;
rcu_read_unlock();
remove_client_context(device, client->client_id);
up_read(&devices_rwsem);
down_write(&clients_rwsem);
list_del(&client->list);
xa_erase(&clients, client->client_id);
up_write(&clients_rwsem);
ib_device_put(device);
rcu_read_lock();
}
rcu_read_unlock();
/*
* remove_client_context() is not a fence, it can return even though a
* removal is ongoing. Wait until all removals are completed.
*/
wait_for_completion(&client->uses_zero);
remove_client_id(client);
}
EXPORT_SYMBOL(ib_unregister_client);
......
......@@ -3224,18 +3224,18 @@ static int ib_mad_port_open(struct ib_device *device,
if (has_smi)
cq_size *= 2;
port_priv->pd = ib_alloc_pd(device, 0);
if (IS_ERR(port_priv->pd)) {
dev_err(&device->dev, "Couldn't create ib_mad PD\n");
ret = PTR_ERR(port_priv->pd);
goto error3;
}
port_priv->cq = ib_alloc_cq(port_priv->device, port_priv, cq_size, 0,
IB_POLL_UNBOUND_WORKQUEUE);
if (IS_ERR(port_priv->cq)) {
dev_err(&device->dev, "Couldn't create ib_mad CQ\n");
ret = PTR_ERR(port_priv->cq);
goto error3;
}
port_priv->pd = ib_alloc_pd(device, 0);
if (IS_ERR(port_priv->pd)) {
dev_err(&device->dev, "Couldn't create ib_mad PD\n");
ret = PTR_ERR(port_priv->pd);
goto error4;
}
......@@ -3278,11 +3278,11 @@ static int ib_mad_port_open(struct ib_device *device,
error7:
destroy_mad_qp(&port_priv->qp_info[0]);
error6:
ib_dealloc_pd(port_priv->pd);
error4:
ib_free_cq(port_priv->cq);
cleanup_recv_queue(&port_priv->qp_info[1]);
cleanup_recv_queue(&port_priv->qp_info[0]);
error4:
ib_dealloc_pd(port_priv->pd);
error3:
kfree(port_priv);
......@@ -3312,8 +3312,8 @@ static int ib_mad_port_close(struct ib_device *device, int port_num)
destroy_workqueue(port_priv->wq);
destroy_mad_qp(&port_priv->qp_info[1]);
destroy_mad_qp(&port_priv->qp_info[0]);
ib_dealloc_pd(port_priv->pd);
ib_free_cq(port_priv->cq);
ib_dealloc_pd(port_priv->pd);
cleanup_recv_queue(&port_priv->qp_info[1]);
cleanup_recv_queue(&port_priv->qp_info[0]);
/* XXX: Handle deallocation of MAD registration tables */
......
......@@ -49,6 +49,7 @@
#include <linux/sched.h>
#include <linux/semaphore.h>
#include <linux/slab.h>
#include <linux/nospec.h>
#include <linux/uaccess.h>
......@@ -884,11 +885,14 @@ static int ib_umad_unreg_agent(struct ib_umad_file *file, u32 __user *arg)
if (get_user(id, arg))
return -EFAULT;
if (id >= IB_UMAD_MAX_AGENTS)
return -EINVAL;
mutex_lock(&file->port->file_mutex);
mutex_lock(&file->mutex);
if (id >= IB_UMAD_MAX_AGENTS || !__get_agent(file, id)) {
id = array_index_nospec(id, IB_UMAD_MAX_AGENTS);
if (!__get_agent(file, id)) {
ret = -EINVAL;
goto out;
}
......
......@@ -54,6 +54,7 @@
#include <linux/mm.h>
#include <linux/vmalloc.h>
#include <rdma/opa_addr.h>
#include <linux/nospec.h>
#include "hfi.h"
#include "common.h"
......@@ -1536,6 +1537,7 @@ static int hfi1_check_ah(struct ib_device *ibdev, struct rdma_ah_attr *ah_attr)
sl = rdma_ah_get_sl(ah_attr);
if (sl >= ARRAY_SIZE(ibp->sl_to_sc))
return -EINVAL;
sl = array_index_nospec(sl, ARRAY_SIZE(ibp->sl_to_sc));
sc5 = ibp->sl_to_sc[sl];
if (sc_to_vlt(dd, sc5) > num_vls && sc_to_vlt(dd, sc5) != 0xf)
......
# SPDX-License-Identifier: GPL-2.0-only
config INFINIBAND_HNS
tristate "HNS RoCE Driver"
bool "HNS RoCE Driver"
depends on NET_VENDOR_HISILICON
depends on ARM64 || (COMPILE_TEST && 64BIT)
---help---
......@@ -11,7 +11,7 @@ config INFINIBAND_HNS
To compile HIP06 or HIP08 driver as module, choose M here.
config INFINIBAND_HNS_HIP06
bool "Hisilicon Hip06 Family RoCE support"
tristate "Hisilicon Hip06 Family RoCE support"
depends on INFINIBAND_HNS && HNS && HNS_DSAF && HNS_ENET
---help---
RoCE driver support for Hisilicon RoCE engine in Hisilicon Hip06 and
......@@ -21,7 +21,7 @@ config INFINIBAND_HNS_HIP06
module will be called hns-roce-hw-v1
config INFINIBAND_HNS_HIP08
bool "Hisilicon Hip08 Family RoCE support"
tristate "Hisilicon Hip08 Family RoCE support"
depends on INFINIBAND_HNS && PCI && HNS3
---help---
RoCE driver support for Hisilicon RoCE engine in Hisilicon Hip08 SoC.
......
......@@ -9,12 +9,8 @@ hns-roce-objs := hns_roce_main.o hns_roce_cmd.o hns_roce_pd.o \
hns_roce_ah.o hns_roce_hem.o hns_roce_mr.o hns_roce_qp.o \
hns_roce_cq.o hns_roce_alloc.o hns_roce_db.o hns_roce_srq.o hns_roce_restrack.o
ifdef CONFIG_INFINIBAND_HNS_HIP06
hns-roce-hw-v1-objs := hns_roce_hw_v1.o $(hns-roce-objs)
obj-$(CONFIG_INFINIBAND_HNS) += hns-roce-hw-v1.o
endif
obj-$(CONFIG_INFINIBAND_HNS_HIP06) += hns-roce-hw-v1.o
ifdef CONFIG_INFINIBAND_HNS_HIP08
hns-roce-hw-v2-objs := hns_roce_hw_v2.o hns_roce_hw_v2_dfx.o $(hns-roce-objs)
obj-$(CONFIG_INFINIBAND_HNS) += hns-roce-hw-v2.o
endif
obj-$(CONFIG_INFINIBAND_HNS_HIP08) += hns-roce-hw-v2.o
......@@ -750,8 +750,10 @@ static int hns_roce_v1_rsv_lp_qp(struct hns_roce_dev *hr_dev)
atomic_set(&free_mr->mr_free_cq->ib_cq.usecnt, 0);
pd = rdma_zalloc_drv_obj(ibdev, ib_pd);
if (!pd)
if (!pd) {
ret = -ENOMEM;
goto alloc_mem_failed;
}
pd->device = ibdev;
ret = hns_roce_alloc_pd(pd, NULL);
......
......@@ -5802,13 +5802,12 @@ static void mlx5_ib_unbind_slave_port(struct mlx5_ib_dev *ibdev,
return;
}
if (mpi->mdev_events.notifier_call)
mlx5_notifier_unregister(mpi->mdev, &mpi->mdev_events);
mpi->mdev_events.notifier_call = NULL;
mpi->ibdev = NULL;
spin_unlock(&port->mp.mpi_lock);
if (mpi->mdev_events.notifier_call)
mlx5_notifier_unregister(mpi->mdev, &mpi->mdev_events);
mpi->mdev_events.notifier_call = NULL;
mlx5_remove_netdev_notifier(ibdev, port_num);
spin_lock(&port->mp.mpi_lock);
......
......@@ -51,22 +51,12 @@ static void clean_mr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr);
static void dereg_mr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr);
static int mr_cache_max_order(struct mlx5_ib_dev *dev);
static int unreg_umr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr);
static bool umr_can_modify_entity_size(struct mlx5_ib_dev *dev)
{
return !MLX5_CAP_GEN(dev->mdev, umr_modify_entity_size_disabled);
}
static bool umr_can_use_indirect_mkey(struct mlx5_ib_dev *dev)
{
return !MLX5_CAP_GEN(dev->mdev, umr_indirect_mkey_disabled);
}
static bool use_umr(struct mlx5_ib_dev *dev, int order)
{
return order <= mr_cache_max_order(dev) &&
umr_can_modify_entity_size(dev);
}
static int destroy_mkey(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr)
{
int err = mlx5_core_destroy_mkey(dev->mdev, &mr->mmkey);
......@@ -1271,7 +1261,7 @@ struct ib_mr *mlx5_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
{
struct mlx5_ib_dev *dev = to_mdev(pd->device);
struct mlx5_ib_mr *mr = NULL;
bool populate_mtts = false;
bool use_umr;
struct ib_umem *umem;
int page_shift;
int npages;
......@@ -1303,29 +1293,30 @@ struct ib_mr *mlx5_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
if (err < 0)
return ERR_PTR(err);
if (use_umr(dev, order)) {
use_umr = !MLX5_CAP_GEN(dev->mdev, umr_modify_entity_size_disabled) &&
(!MLX5_CAP_GEN(dev->mdev, umr_modify_atomic_disabled) ||
!MLX5_CAP_GEN(dev->mdev, atomic));
if (order <= mr_cache_max_order(dev) && use_umr) {
mr = alloc_mr_from_cache(pd, umem, virt_addr, length, ncont,
page_shift, order, access_flags);
if (PTR_ERR(mr) == -EAGAIN) {
mlx5_ib_dbg(dev, "cache empty for order %d\n", order);
mr = NULL;
}
populate_mtts = false;
} else if (!MLX5_CAP_GEN(dev->mdev, umr_extended_translation_offset)) {
if (access_flags & IB_ACCESS_ON_DEMAND) {
err = -EINVAL;
pr_err("Got MR registration for ODP MR > 512MB, not supported for Connect-IB\n");
goto error;
}
populate_mtts = true;
use_umr = false;
}
if (!mr) {
if (!umr_can_modify_entity_size(dev))
populate_mtts = true;
mutex_lock(&dev->slow_path_mutex);
mr = reg_create(NULL, pd, virt_addr, length, umem, ncont,
page_shift, access_flags, populate_mtts);
page_shift, access_flags, !use_umr);
mutex_unlock(&dev->slow_path_mutex);
}
......@@ -1341,7 +1332,7 @@ struct ib_mr *mlx5_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
update_odp_mr(mr);
if (!populate_mtts) {
if (use_umr) {
int update_xlt_flags = MLX5_IB_UPD_XLT_ENABLE;
if (access_flags & IB_ACCESS_ON_DEMAND)
......
......@@ -125,14 +125,20 @@ static ssize_t hw_rev_show(struct device *device, struct device_attribute *attr,
struct qedr_dev *dev =
rdma_device_to_drv_device(device, struct qedr_dev, ibdev);
return scnprintf(buf, PAGE_SIZE, "0x%x\n", dev->pdev->vendor);
return scnprintf(buf, PAGE_SIZE, "0x%x\n", dev->attr.hw_ver);
}
static DEVICE_ATTR_RO(hw_rev);
static ssize_t hca_type_show(struct device *device,
struct device_attribute *attr, char *buf)
{
return scnprintf(buf, PAGE_SIZE, "%s\n", "HCA_TYPE_TO_SET");
struct qedr_dev *dev =
rdma_device_to_drv_device(device, struct qedr_dev, ibdev);
return scnprintf(buf, PAGE_SIZE, "FastLinQ QL%x %s\n",
dev->pdev->device,
rdma_protocol_iwarp(&dev->ibdev, 1) ?
"iWARP" : "RoCE");
}
static DEVICE_ATTR_RO(hca_type);
......
......@@ -220,12 +220,14 @@ static int siw_qp_enable_crc(struct siw_qp *qp)
{
struct siw_rx_stream *c_rx = &qp->rx_stream;
struct siw_iwarp_tx *c_tx = &qp->tx_ctx;
int size = crypto_shash_descsize(siw_crypto_shash) +
sizeof(struct shash_desc);
int size;
if (siw_crypto_shash == NULL)
return -ENOENT;
size = crypto_shash_descsize(siw_crypto_shash) +
sizeof(struct shash_desc);
c_tx->mpa_crc_hd = kzalloc(size, GFP_KERNEL);
c_rx->mpa_crc_hd = kzalloc(size, GFP_KERNEL);
if (!c_tx->mpa_crc_hd || !c_rx->mpa_crc_hd) {
......
......@@ -2647,7 +2647,9 @@ struct ib_client {
const union ib_gid *gid,
const struct sockaddr *addr,
void *client_data);
struct list_head list;
refcount_t uses;
struct completion uses_zero;
u32 client_id;
/* kverbs are not required by the client */
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册