提交 c741e491 编写于 作者: L Linus Torvalds

Merge tag 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/rdma/rdma

Pull rdma fixes from Jason Gunthorpe:
 "Quite a few small bug fixes old and new, also Doug Ledford is retiring
  now, we thank him for his work. Details:

   - Use after free in rxe

   - mlx5 DM regression

   - hns bugs triggred by device reset

   - Two fixes for CONFIG_DEBUG_PREEMPT

   - Several longstanding corner case bugs in hfi1

   - Two irdma data path bugs in rare cases and some memory issues"

* tag 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/rdma/rdma:
  RDMA/irdma: Don't arm the CQ more than two times if no CE for this CQ
  RDMA/irdma: Report correct WC errors
  RDMA/irdma: Fix a potential memory allocation issue in 'irdma_prm_add_pble_mem()'
  RDMA/irdma: Fix a user-after-free in add_pble_prm
  IB/hfi1: Fix leak of rcvhdrtail_dummy_kvaddr
  IB/hfi1: Fix early init panic
  IB/hfi1: Insure use of smp_processor_id() is preempt disabled
  IB/hfi1: Correct guard on eager buffer deallocation
  RDMA/rtrs: Call {get,put}_cpu_ptr to silence a debug kernel warning
  RDMA/hns: Do not destroy QP resources in the hw resetting phase
  RDMA/hns: Do not halt commands during reset until later
  Remove Doug Ledford from MAINTAINERS
  RDMA/mlx5: Fix releasing unallocated memory in dereg MR flow
  RDMA: Fix use-after-free in rxe_queue_cleanup
......@@ -9329,7 +9329,6 @@ S: Maintained
F: drivers/iio/pressure/dps310.c
INFINIBAND SUBSYSTEM
M: Doug Ledford <dledford@redhat.com>
M: Jason Gunthorpe <jgg@nvidia.com>
L: linux-rdma@vger.kernel.org
S: Supported
......
......@@ -8415,6 +8415,8 @@ static void receive_interrupt_common(struct hfi1_ctxtdata *rcd)
*/
static void __hfi1_rcd_eoi_intr(struct hfi1_ctxtdata *rcd)
{
if (!rcd->rcvhdrq)
return;
clear_recv_intr(rcd);
if (check_packet_present(rcd))
force_recv_intr(rcd);
......
......@@ -1012,6 +1012,8 @@ int handle_receive_interrupt(struct hfi1_ctxtdata *rcd, int thread)
struct hfi1_packet packet;
int skip_pkt = 0;
if (!rcd->rcvhdrq)
return RCV_PKT_OK;
/* Control context will always use the slow path interrupt handler */
needset = (rcd->ctxt == HFI1_CTRL_CTXT) ? 0 : 1;
......
......@@ -113,7 +113,6 @@ static int hfi1_create_kctxt(struct hfi1_devdata *dd,
rcd->fast_handler = get_dma_rtail_setting(rcd) ?
handle_receive_interrupt_dma_rtail :
handle_receive_interrupt_nodma_rtail;
rcd->slow_handler = handle_receive_interrupt;
hfi1_set_seq_cnt(rcd, 1);
......@@ -334,6 +333,8 @@ int hfi1_create_ctxtdata(struct hfi1_pportdata *ppd, int numa,
rcd->numa_id = numa;
rcd->rcv_array_groups = dd->rcv_entries.ngroups;
rcd->rhf_rcv_function_map = normal_rhf_rcv_functions;
rcd->slow_handler = handle_receive_interrupt;
rcd->do_interrupt = rcd->slow_handler;
rcd->msix_intr = CCE_NUM_MSIX_VECTORS;
mutex_init(&rcd->exp_mutex);
......@@ -874,18 +875,6 @@ int hfi1_init(struct hfi1_devdata *dd, int reinit)
if (ret)
goto done;
/* allocate dummy tail memory for all receive contexts */
dd->rcvhdrtail_dummy_kvaddr = dma_alloc_coherent(&dd->pcidev->dev,
sizeof(u64),
&dd->rcvhdrtail_dummy_dma,
GFP_KERNEL);
if (!dd->rcvhdrtail_dummy_kvaddr) {
dd_dev_err(dd, "cannot allocate dummy tail memory\n");
ret = -ENOMEM;
goto done;
}
/* dd->rcd can be NULL if early initialization failed */
for (i = 0; dd->rcd && i < dd->first_dyn_alloc_ctxt; ++i) {
/*
......@@ -898,8 +887,6 @@ int hfi1_init(struct hfi1_devdata *dd, int reinit)
if (!rcd)
continue;
rcd->do_interrupt = &handle_receive_interrupt;
lastfail = hfi1_create_rcvhdrq(dd, rcd);
if (!lastfail)
lastfail = hfi1_setup_eagerbufs(rcd);
......@@ -1120,7 +1107,7 @@ void hfi1_free_ctxtdata(struct hfi1_devdata *dd, struct hfi1_ctxtdata *rcd)
rcd->egrbufs.rcvtids = NULL;
for (e = 0; e < rcd->egrbufs.alloced; e++) {
if (rcd->egrbufs.buffers[e].dma)
if (rcd->egrbufs.buffers[e].addr)
dma_free_coherent(&dd->pcidev->dev,
rcd->egrbufs.buffers[e].len,
rcd->egrbufs.buffers[e].addr,
......@@ -1201,6 +1188,11 @@ void hfi1_free_devdata(struct hfi1_devdata *dd)
dd->tx_opstats = NULL;
kfree(dd->comp_vect);
dd->comp_vect = NULL;
if (dd->rcvhdrtail_dummy_kvaddr)
dma_free_coherent(&dd->pcidev->dev, sizeof(u64),
(void *)dd->rcvhdrtail_dummy_kvaddr,
dd->rcvhdrtail_dummy_dma);
dd->rcvhdrtail_dummy_kvaddr = NULL;
sdma_clean(dd, dd->num_sdma);
rvt_dealloc_device(&dd->verbs_dev.rdi);
}
......@@ -1298,6 +1290,15 @@ static struct hfi1_devdata *hfi1_alloc_devdata(struct pci_dev *pdev,
goto bail;
}
/* allocate dummy tail memory for all receive contexts */
dd->rcvhdrtail_dummy_kvaddr =
dma_alloc_coherent(&dd->pcidev->dev, sizeof(u64),
&dd->rcvhdrtail_dummy_dma, GFP_KERNEL);
if (!dd->rcvhdrtail_dummy_kvaddr) {
ret = -ENOMEM;
goto bail;
}
atomic_set(&dd->ipoib_rsm_usr_num, 0);
return dd;
......@@ -1505,13 +1506,6 @@ static void cleanup_device_data(struct hfi1_devdata *dd)
free_credit_return(dd);
if (dd->rcvhdrtail_dummy_kvaddr) {
dma_free_coherent(&dd->pcidev->dev, sizeof(u64),
(void *)dd->rcvhdrtail_dummy_kvaddr,
dd->rcvhdrtail_dummy_dma);
dd->rcvhdrtail_dummy_kvaddr = NULL;
}
/*
* Free any resources still in use (usually just kernel contexts)
* at unload; we do for ctxtcnt, because that's what we allocate.
......
......@@ -838,8 +838,8 @@ struct sdma_engine *sdma_select_user_engine(struct hfi1_devdata *dd,
if (current->nr_cpus_allowed != 1)
goto out;
cpu_id = smp_processor_id();
rcu_read_lock();
cpu_id = smp_processor_id();
rht_node = rhashtable_lookup(dd->sdma_rht, &cpu_id,
sdma_rht_params);
......
......@@ -33,6 +33,7 @@
#include <linux/acpi.h>
#include <linux/etherdevice.h>
#include <linux/interrupt.h>
#include <linux/iopoll.h>
#include <linux/kernel.h>
#include <linux/types.h>
#include <net/addrconf.h>
......@@ -1050,9 +1051,14 @@ static u32 hns_roce_v2_cmd_hw_resetting(struct hns_roce_dev *hr_dev,
unsigned long instance_stage,
unsigned long reset_stage)
{
#define HW_RESET_TIMEOUT_US 1000000
#define HW_RESET_SLEEP_US 1000
struct hns_roce_v2_priv *priv = hr_dev->priv;
struct hnae3_handle *handle = priv->handle;
const struct hnae3_ae_ops *ops = handle->ae_algo->ops;
unsigned long val;
int ret;
/* When hardware reset is detected, we should stop sending mailbox&cmq&
* doorbell to hardware. If now in .init_instance() function, we should
......@@ -1064,7 +1070,11 @@ static u32 hns_roce_v2_cmd_hw_resetting(struct hns_roce_dev *hr_dev,
* again.
*/
hr_dev->dis_db = true;
if (!ops->get_hw_reset_stat(handle))
ret = read_poll_timeout(ops->ae_dev_reset_cnt, val,
val > hr_dev->reset_cnt, HW_RESET_SLEEP_US,
HW_RESET_TIMEOUT_US, false, handle);
if (!ret)
hr_dev->is_reset = true;
if (!hr_dev->is_reset || reset_stage == HNS_ROCE_STATE_RST_INIT ||
......@@ -6387,10 +6397,8 @@ static int hns_roce_hw_v2_reset_notify_down(struct hnae3_handle *handle)
if (!hr_dev)
return 0;
hr_dev->is_reset = true;
hr_dev->active = false;
hr_dev->dis_db = true;
hr_dev->state = HNS_ROCE_DEVICE_STATE_RST_DOWN;
return 0;
......
......@@ -60,6 +60,8 @@ static void irdma_iwarp_ce_handler(struct irdma_sc_cq *iwcq)
{
struct irdma_cq *cq = iwcq->back_cq;
if (!cq->user_mode)
cq->armed = false;
if (cq->ibcq.comp_handler)
cq->ibcq.comp_handler(&cq->ibcq, cq->ibcq.cq_context);
}
......@@ -146,6 +148,7 @@ static void irdma_set_flush_fields(struct irdma_sc_qp *qp,
qp->flush_code = FLUSH_PROT_ERR;
break;
case IRDMA_AE_AMP_BAD_QP:
case IRDMA_AE_WQE_UNEXPECTED_OPCODE:
qp->flush_code = FLUSH_LOC_QP_OP_ERR;
break;
case IRDMA_AE_AMP_BAD_STAG_KEY:
......@@ -156,7 +159,6 @@ static void irdma_set_flush_fields(struct irdma_sc_qp *qp,
case IRDMA_AE_PRIV_OPERATION_DENIED:
case IRDMA_AE_IB_INVALID_REQUEST:
case IRDMA_AE_IB_REMOTE_ACCESS_ERROR:
case IRDMA_AE_IB_REMOTE_OP_ERROR:
qp->flush_code = FLUSH_REM_ACCESS_ERR;
qp->event_type = IRDMA_QP_EVENT_ACCESS_ERR;
break;
......@@ -184,6 +186,9 @@ static void irdma_set_flush_fields(struct irdma_sc_qp *qp,
case IRDMA_AE_AMP_MWBIND_INVALID_BOUNDS:
qp->flush_code = FLUSH_MW_BIND_ERR;
break;
case IRDMA_AE_IB_REMOTE_OP_ERROR:
qp->flush_code = FLUSH_REM_OP_ERR;
break;
default:
qp->flush_code = FLUSH_FATAL_ERR;
break;
......
......@@ -542,6 +542,7 @@ int irdma_ah_cqp_op(struct irdma_pci_f *rf, struct irdma_sc_ah *sc_ah, u8 cmd,
void (*callback_fcn)(struct irdma_cqp_request *cqp_request),
void *cb_param);
void irdma_gsi_ud_qp_ah_cb(struct irdma_cqp_request *cqp_request);
bool irdma_cq_empty(struct irdma_cq *iwcq);
int irdma_inetaddr_event(struct notifier_block *notifier, unsigned long event,
void *ptr);
int irdma_inet6addr_event(struct notifier_block *notifier, unsigned long event,
......
......@@ -25,8 +25,7 @@ void irdma_destroy_pble_prm(struct irdma_hmc_pble_rsrc *pble_rsrc)
list_del(&chunk->list);
if (chunk->type == PBLE_SD_PAGED)
irdma_pble_free_paged_mem(chunk);
if (chunk->bitmapbuf)
kfree(chunk->bitmapmem.va);
bitmap_free(chunk->bitmapbuf);
kfree(chunk->chunkmem.va);
}
}
......@@ -283,7 +282,6 @@ add_pble_prm(struct irdma_hmc_pble_rsrc *pble_rsrc)
"PBLE: next_fpm_addr = %llx chunk_size[%llu] = 0x%llx\n",
pble_rsrc->next_fpm_addr, chunk->size, chunk->size);
pble_rsrc->unallocated_pble -= (u32)(chunk->size >> 3);
list_add(&chunk->list, &pble_rsrc->pinfo.clist);
sd_reg_val = (sd_entry_type == IRDMA_SD_TYPE_PAGED) ?
sd_entry->u.pd_table.pd_page_addr.pa :
sd_entry->u.bp.addr.pa;
......@@ -295,12 +293,12 @@ add_pble_prm(struct irdma_hmc_pble_rsrc *pble_rsrc)
goto error;
}
list_add(&chunk->list, &pble_rsrc->pinfo.clist);
sd_entry->valid = true;
return 0;
error:
if (chunk->bitmapbuf)
kfree(chunk->bitmapmem.va);
bitmap_free(chunk->bitmapbuf);
kfree(chunk->chunkmem.va);
return ret_code;
......
......@@ -78,7 +78,6 @@ struct irdma_chunk {
u32 pg_cnt;
enum irdma_alloc_type type;
struct irdma_sc_dev *dev;
struct irdma_virt_mem bitmapmem;
struct irdma_virt_mem chunkmem;
};
......
......@@ -2239,15 +2239,10 @@ enum irdma_status_code irdma_prm_add_pble_mem(struct irdma_pble_prm *pprm,
sizeofbitmap = (u64)pchunk->size >> pprm->pble_shift;
pchunk->bitmapmem.size = sizeofbitmap >> 3;
pchunk->bitmapmem.va = kzalloc(pchunk->bitmapmem.size, GFP_KERNEL);
if (!pchunk->bitmapmem.va)
pchunk->bitmapbuf = bitmap_zalloc(sizeofbitmap, GFP_KERNEL);
if (!pchunk->bitmapbuf)
return IRDMA_ERR_NO_MEMORY;
pchunk->bitmapbuf = pchunk->bitmapmem.va;
bitmap_zero(pchunk->bitmapbuf, sizeofbitmap);
pchunk->sizeofbitmap = sizeofbitmap;
/* each pble is 8 bytes hence shift by 3 */
pprm->total_pble_alloc += pchunk->size >> 3;
......@@ -2491,3 +2486,18 @@ void irdma_ib_qp_event(struct irdma_qp *iwqp, enum irdma_qp_event_type event)
ibevent.element.qp = &iwqp->ibqp;
iwqp->ibqp.event_handler(&ibevent, iwqp->ibqp.qp_context);
}
bool irdma_cq_empty(struct irdma_cq *iwcq)
{
struct irdma_cq_uk *ukcq;
u64 qword3;
__le64 *cqe;
u8 polarity;
ukcq = &iwcq->sc_cq.cq_uk;
cqe = IRDMA_GET_CURRENT_CQ_ELEM(ukcq);
get_64bit_val(cqe, 24, &qword3);
polarity = (u8)FIELD_GET(IRDMA_CQ_VALID, qword3);
return polarity != ukcq->polarity;
}
......@@ -3584,18 +3584,31 @@ static int irdma_req_notify_cq(struct ib_cq *ibcq,
struct irdma_cq *iwcq;
struct irdma_cq_uk *ukcq;
unsigned long flags;
enum irdma_cmpl_notify cq_notify = IRDMA_CQ_COMPL_EVENT;
enum irdma_cmpl_notify cq_notify;
bool promo_event = false;
int ret = 0;
cq_notify = notify_flags == IB_CQ_SOLICITED ?
IRDMA_CQ_COMPL_SOLICITED : IRDMA_CQ_COMPL_EVENT;
iwcq = to_iwcq(ibcq);
ukcq = &iwcq->sc_cq.cq_uk;
if (notify_flags == IB_CQ_SOLICITED)
cq_notify = IRDMA_CQ_COMPL_SOLICITED;
spin_lock_irqsave(&iwcq->lock, flags);
irdma_uk_cq_request_notification(ukcq, cq_notify);
/* Only promote to arm the CQ for any event if the last arm event was solicited. */
if (iwcq->last_notify == IRDMA_CQ_COMPL_SOLICITED && notify_flags != IB_CQ_SOLICITED)
promo_event = true;
if (!iwcq->armed || promo_event) {
iwcq->armed = true;
iwcq->last_notify = cq_notify;
irdma_uk_cq_request_notification(ukcq, cq_notify);
}
if ((notify_flags & IB_CQ_REPORT_MISSED_EVENTS) && !irdma_cq_empty(iwcq))
ret = 1;
spin_unlock_irqrestore(&iwcq->lock, flags);
return 0;
return ret;
}
static int irdma_roce_port_immutable(struct ib_device *ibdev, u32 port_num,
......
......@@ -110,6 +110,8 @@ struct irdma_cq {
u16 cq_size;
u16 cq_num;
bool user_mode;
bool armed;
enum irdma_cmpl_notify last_notify;
u32 polled_cmpls;
u32 cq_mem_size;
struct irdma_dma_mem kmem;
......
......@@ -664,7 +664,6 @@ struct mlx5_ib_mr {
/* User MR data */
struct mlx5_cache_ent *cache_ent;
struct ib_umem *umem;
/* This is zero'd when the MR is allocated */
union {
......@@ -676,7 +675,7 @@ struct mlx5_ib_mr {
struct list_head list;
};
/* Used only by kernel MRs (umem == NULL) */
/* Used only by kernel MRs */
struct {
void *descs;
void *descs_alloc;
......@@ -697,8 +696,9 @@ struct mlx5_ib_mr {
int data_length;
};
/* Used only by User MRs (umem != NULL) */
/* Used only by User MRs */
struct {
struct ib_umem *umem;
unsigned int page_shift;
/* Current access_flags */
int access_flags;
......
......@@ -1904,19 +1904,18 @@ mlx5_alloc_priv_descs(struct ib_device *device,
return ret;
}
static void
mlx5_free_priv_descs(struct mlx5_ib_mr *mr)
static void mlx5_free_priv_descs(struct mlx5_ib_mr *mr)
{
if (!mr->umem && mr->descs) {
struct ib_device *device = mr->ibmr.device;
int size = mr->max_descs * mr->desc_size;
struct mlx5_ib_dev *dev = to_mdev(device);
struct mlx5_ib_dev *dev = to_mdev(mr->ibmr.device);
int size = mr->max_descs * mr->desc_size;
dma_unmap_single(&dev->mdev->pdev->dev, mr->desc_map, size,
DMA_TO_DEVICE);
kfree(mr->descs_alloc);
mr->descs = NULL;
}
if (!mr->descs)
return;
dma_unmap_single(&dev->mdev->pdev->dev, mr->desc_map, size,
DMA_TO_DEVICE);
kfree(mr->descs_alloc);
mr->descs = NULL;
}
int mlx5_ib_dereg_mr(struct ib_mr *ibmr, struct ib_udata *udata)
......@@ -1992,7 +1991,8 @@ int mlx5_ib_dereg_mr(struct ib_mr *ibmr, struct ib_udata *udata)
if (mr->cache_ent) {
mlx5_mr_cache_free(dev, mr);
} else {
mlx5_free_priv_descs(mr);
if (!udata)
mlx5_free_priv_descs(mr);
kfree(mr);
}
return 0;
......@@ -2079,7 +2079,6 @@ static struct mlx5_ib_mr *mlx5_ib_alloc_pi_mr(struct ib_pd *pd,
if (err)
goto err_free_in;
mr->umem = NULL;
kfree(in);
return mr;
......@@ -2206,7 +2205,6 @@ static struct ib_mr *__mlx5_ib_alloc_mr(struct ib_pd *pd,
}
mr->ibmr.device = pd->device;
mr->umem = NULL;
switch (mr_type) {
case IB_MR_TYPE_MEM_REG:
......
......@@ -359,6 +359,7 @@ int rxe_qp_from_init(struct rxe_dev *rxe, struct rxe_qp *qp, struct rxe_pd *pd,
err2:
rxe_queue_cleanup(qp->sq.queue);
qp->sq.queue = NULL;
err1:
qp->pd = NULL;
qp->rcq = NULL;
......
......@@ -19,7 +19,7 @@ void rtrs_clt_update_wc_stats(struct rtrs_clt_con *con)
int cpu;
cpu = raw_smp_processor_id();
s = this_cpu_ptr(stats->pcpu_stats);
s = get_cpu_ptr(stats->pcpu_stats);
if (con->cpu != cpu) {
s->cpu_migr.to++;
......@@ -27,14 +27,16 @@ void rtrs_clt_update_wc_stats(struct rtrs_clt_con *con)
s = per_cpu_ptr(stats->pcpu_stats, con->cpu);
atomic_inc(&s->cpu_migr.from);
}
put_cpu_ptr(stats->pcpu_stats);
}
void rtrs_clt_inc_failover_cnt(struct rtrs_clt_stats *stats)
{
struct rtrs_clt_stats_pcpu *s;
s = this_cpu_ptr(stats->pcpu_stats);
s = get_cpu_ptr(stats->pcpu_stats);
s->rdma.failover_cnt++;
put_cpu_ptr(stats->pcpu_stats);
}
int rtrs_clt_stats_migration_from_cnt_to_str(struct rtrs_clt_stats *stats, char *buf)
......@@ -169,9 +171,10 @@ static inline void rtrs_clt_update_rdma_stats(struct rtrs_clt_stats *stats,
{
struct rtrs_clt_stats_pcpu *s;
s = this_cpu_ptr(stats->pcpu_stats);
s = get_cpu_ptr(stats->pcpu_stats);
s->rdma.dir[d].cnt++;
s->rdma.dir[d].size_total += size;
put_cpu_ptr(stats->pcpu_stats);
}
void rtrs_clt_update_all_stats(struct rtrs_clt_io_req *req, int dir)
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册