提交 6ff64d25 编写于 作者: L Linus Torvalds

Merge tag 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/rdma/rdma

Pull rdma fixes from Jason Gunthorpe:
 "Nothing profound here, just a last set of long standing bug fixes:

   - Incorrect error unwind in qib and pvrdma

   - User triggerable NULL pointer crash in mlx5 with ODP prefetch

   - syzkaller RCU race in uverbs

   - Rare double free crash in ipoib"

* tag 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/rdma/rdma:
  IB/ipoib: Fix double free of skb in case of multicast traffic in CM mode
  RDMA/core: Fix double destruction of uobject
  RDMA/pvrdma: Fix missing pci disable in pvrdma_pci_probe()
  RDMA/mlx5: Fix NULL pointer dereference in destroy_prefetch_work
  IB/qib: Call kobject_put() when kobject_init_and_add() fails
...@@ -153,9 +153,9 @@ static int uverbs_destroy_uobject(struct ib_uobject *uobj, ...@@ -153,9 +153,9 @@ static int uverbs_destroy_uobject(struct ib_uobject *uobj,
uobj->context = NULL; uobj->context = NULL;
/* /*
* For DESTROY the usecnt is held write locked, the caller is expected * For DESTROY the usecnt is not changed, the caller is expected to
* to put it unlock and put the object when done with it. Only DESTROY * manage it via uobj_put_destroy(). Only DESTROY can remove the IDR
* can remove the IDR handle. * handle.
*/ */
if (reason != RDMA_REMOVE_DESTROY) if (reason != RDMA_REMOVE_DESTROY)
atomic_set(&uobj->usecnt, 0); atomic_set(&uobj->usecnt, 0);
...@@ -187,7 +187,7 @@ static int uverbs_destroy_uobject(struct ib_uobject *uobj, ...@@ -187,7 +187,7 @@ static int uverbs_destroy_uobject(struct ib_uobject *uobj,
/* /*
* This calls uverbs_destroy_uobject() using the RDMA_REMOVE_DESTROY * This calls uverbs_destroy_uobject() using the RDMA_REMOVE_DESTROY
* sequence. It should only be used from command callbacks. On success the * sequence. It should only be used from command callbacks. On success the
* caller must pair this with rdma_lookup_put_uobject(LOOKUP_WRITE). This * caller must pair this with uobj_put_destroy(). This
* version requires the caller to have already obtained an * version requires the caller to have already obtained an
* LOOKUP_DESTROY uobject kref. * LOOKUP_DESTROY uobject kref.
*/ */
...@@ -198,6 +198,13 @@ int uobj_destroy(struct ib_uobject *uobj, struct uverbs_attr_bundle *attrs) ...@@ -198,6 +198,13 @@ int uobj_destroy(struct ib_uobject *uobj, struct uverbs_attr_bundle *attrs)
down_read(&ufile->hw_destroy_rwsem); down_read(&ufile->hw_destroy_rwsem);
/*
* Once the uobject is destroyed by RDMA_REMOVE_DESTROY then it is left
* write locked as the callers put it back with UVERBS_LOOKUP_DESTROY.
* This is because any other concurrent thread can still see the object
* in the xarray due to RCU. Leaving it locked ensures nothing else will
* touch it.
*/
ret = uverbs_try_lock_object(uobj, UVERBS_LOOKUP_WRITE); ret = uverbs_try_lock_object(uobj, UVERBS_LOOKUP_WRITE);
if (ret) if (ret)
goto out_unlock; goto out_unlock;
...@@ -216,7 +223,7 @@ int uobj_destroy(struct ib_uobject *uobj, struct uverbs_attr_bundle *attrs) ...@@ -216,7 +223,7 @@ int uobj_destroy(struct ib_uobject *uobj, struct uverbs_attr_bundle *attrs)
/* /*
* uobj_get_destroy destroys the HW object and returns a handle to the uobj * uobj_get_destroy destroys the HW object and returns a handle to the uobj
* with a NULL object pointer. The caller must pair this with * with a NULL object pointer. The caller must pair this with
* uverbs_put_destroy. * uobj_put_destroy().
*/ */
struct ib_uobject *__uobj_get_destroy(const struct uverbs_api_object *obj, struct ib_uobject *__uobj_get_destroy(const struct uverbs_api_object *obj,
u32 id, struct uverbs_attr_bundle *attrs) u32 id, struct uverbs_attr_bundle *attrs)
...@@ -250,8 +257,7 @@ int __uobj_perform_destroy(const struct uverbs_api_object *obj, u32 id, ...@@ -250,8 +257,7 @@ int __uobj_perform_destroy(const struct uverbs_api_object *obj, u32 id,
uobj = __uobj_get_destroy(obj, id, attrs); uobj = __uobj_get_destroy(obj, id, attrs);
if (IS_ERR(uobj)) if (IS_ERR(uobj))
return PTR_ERR(uobj); return PTR_ERR(uobj);
uobj_put_destroy(uobj);
rdma_lookup_put_uobject(uobj, UVERBS_LOOKUP_WRITE);
return 0; return 0;
} }
......
...@@ -1439,6 +1439,7 @@ struct ib_mr *mlx5_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, ...@@ -1439,6 +1439,7 @@ struct ib_mr *mlx5_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
if (is_odp_mr(mr)) { if (is_odp_mr(mr)) {
to_ib_umem_odp(mr->umem)->private = mr; to_ib_umem_odp(mr->umem)->private = mr;
init_waitqueue_head(&mr->q_deferred_work);
atomic_set(&mr->num_deferred_work, 0); atomic_set(&mr->num_deferred_work, 0);
err = xa_err(xa_store(&dev->odp_mkeys, err = xa_err(xa_store(&dev->odp_mkeys,
mlx5_base_mkey(mr->mmkey.key), &mr->mmkey, mlx5_base_mkey(mr->mmkey.key), &mr->mmkey,
......
...@@ -760,7 +760,7 @@ int qib_create_port_files(struct ib_device *ibdev, u8 port_num, ...@@ -760,7 +760,7 @@ int qib_create_port_files(struct ib_device *ibdev, u8 port_num,
qib_dev_err(dd, qib_dev_err(dd,
"Skipping linkcontrol sysfs info, (err %d) port %u\n", "Skipping linkcontrol sysfs info, (err %d) port %u\n",
ret, port_num); ret, port_num);
goto bail; goto bail_link;
} }
kobject_uevent(&ppd->pport_kobj, KOBJ_ADD); kobject_uevent(&ppd->pport_kobj, KOBJ_ADD);
...@@ -770,7 +770,7 @@ int qib_create_port_files(struct ib_device *ibdev, u8 port_num, ...@@ -770,7 +770,7 @@ int qib_create_port_files(struct ib_device *ibdev, u8 port_num,
qib_dev_err(dd, qib_dev_err(dd,
"Skipping sl2vl sysfs info, (err %d) port %u\n", "Skipping sl2vl sysfs info, (err %d) port %u\n",
ret, port_num); ret, port_num);
goto bail_link; goto bail_sl;
} }
kobject_uevent(&ppd->sl2vl_kobj, KOBJ_ADD); kobject_uevent(&ppd->sl2vl_kobj, KOBJ_ADD);
...@@ -780,7 +780,7 @@ int qib_create_port_files(struct ib_device *ibdev, u8 port_num, ...@@ -780,7 +780,7 @@ int qib_create_port_files(struct ib_device *ibdev, u8 port_num,
qib_dev_err(dd, qib_dev_err(dd,
"Skipping diag_counters sysfs info, (err %d) port %u\n", "Skipping diag_counters sysfs info, (err %d) port %u\n",
ret, port_num); ret, port_num);
goto bail_sl; goto bail_diagc;
} }
kobject_uevent(&ppd->diagc_kobj, KOBJ_ADD); kobject_uevent(&ppd->diagc_kobj, KOBJ_ADD);
...@@ -793,7 +793,7 @@ int qib_create_port_files(struct ib_device *ibdev, u8 port_num, ...@@ -793,7 +793,7 @@ int qib_create_port_files(struct ib_device *ibdev, u8 port_num,
qib_dev_err(dd, qib_dev_err(dd,
"Skipping Congestion Control sysfs info, (err %d) port %u\n", "Skipping Congestion Control sysfs info, (err %d) port %u\n",
ret, port_num); ret, port_num);
goto bail_diagc; goto bail_cc;
} }
kobject_uevent(&ppd->pport_cc_kobj, KOBJ_ADD); kobject_uevent(&ppd->pport_cc_kobj, KOBJ_ADD);
...@@ -854,6 +854,7 @@ void qib_verbs_unregister_sysfs(struct qib_devdata *dd) ...@@ -854,6 +854,7 @@ void qib_verbs_unregister_sysfs(struct qib_devdata *dd)
&cc_table_bin_attr); &cc_table_bin_attr);
kobject_put(&ppd->pport_cc_kobj); kobject_put(&ppd->pport_cc_kobj);
} }
kobject_put(&ppd->diagc_kobj);
kobject_put(&ppd->sl2vl_kobj); kobject_put(&ppd->sl2vl_kobj);
kobject_put(&ppd->pport_kobj); kobject_put(&ppd->pport_kobj);
} }
......
...@@ -829,7 +829,7 @@ static int pvrdma_pci_probe(struct pci_dev *pdev, ...@@ -829,7 +829,7 @@ static int pvrdma_pci_probe(struct pci_dev *pdev,
!(pci_resource_flags(pdev, 1) & IORESOURCE_MEM)) { !(pci_resource_flags(pdev, 1) & IORESOURCE_MEM)) {
dev_err(&pdev->dev, "PCI BAR region not MMIO\n"); dev_err(&pdev->dev, "PCI BAR region not MMIO\n");
ret = -ENOMEM; ret = -ENOMEM;
goto err_free_device; goto err_disable_pdev;
} }
ret = pci_request_regions(pdev, DRV_NAME); ret = pci_request_regions(pdev, DRV_NAME);
......
...@@ -377,8 +377,12 @@ struct ipoib_dev_priv { ...@@ -377,8 +377,12 @@ struct ipoib_dev_priv {
struct ipoib_rx_buf *rx_ring; struct ipoib_rx_buf *rx_ring;
struct ipoib_tx_buf *tx_ring; struct ipoib_tx_buf *tx_ring;
/* cyclic ring variables for managing tx_ring, for UD only */
unsigned int tx_head; unsigned int tx_head;
unsigned int tx_tail; unsigned int tx_tail;
/* cyclic ring variables for counting overall outstanding send WRs */
unsigned int global_tx_head;
unsigned int global_tx_tail;
struct ib_sge tx_sge[MAX_SKB_FRAGS + 1]; struct ib_sge tx_sge[MAX_SKB_FRAGS + 1];
struct ib_ud_wr tx_wr; struct ib_ud_wr tx_wr;
struct ib_wc send_wc[MAX_SEND_CQE]; struct ib_wc send_wc[MAX_SEND_CQE];
......
...@@ -756,7 +756,8 @@ void ipoib_cm_send(struct net_device *dev, struct sk_buff *skb, struct ipoib_cm_ ...@@ -756,7 +756,8 @@ void ipoib_cm_send(struct net_device *dev, struct sk_buff *skb, struct ipoib_cm_
return; return;
} }
if ((priv->tx_head - priv->tx_tail) == ipoib_sendq_size - 1) { if ((priv->global_tx_head - priv->global_tx_tail) ==
ipoib_sendq_size - 1) {
ipoib_dbg(priv, "TX ring 0x%x full, stopping kernel net queue\n", ipoib_dbg(priv, "TX ring 0x%x full, stopping kernel net queue\n",
tx->qp->qp_num); tx->qp->qp_num);
netif_stop_queue(dev); netif_stop_queue(dev);
...@@ -786,7 +787,7 @@ void ipoib_cm_send(struct net_device *dev, struct sk_buff *skb, struct ipoib_cm_ ...@@ -786,7 +787,7 @@ void ipoib_cm_send(struct net_device *dev, struct sk_buff *skb, struct ipoib_cm_
} else { } else {
netif_trans_update(dev); netif_trans_update(dev);
++tx->tx_head; ++tx->tx_head;
++priv->tx_head; ++priv->global_tx_head;
} }
} }
...@@ -820,10 +821,11 @@ void ipoib_cm_handle_tx_wc(struct net_device *dev, struct ib_wc *wc) ...@@ -820,10 +821,11 @@ void ipoib_cm_handle_tx_wc(struct net_device *dev, struct ib_wc *wc)
netif_tx_lock(dev); netif_tx_lock(dev);
++tx->tx_tail; ++tx->tx_tail;
++priv->tx_tail; ++priv->global_tx_tail;
if (unlikely(netif_queue_stopped(dev) && if (unlikely(netif_queue_stopped(dev) &&
(priv->tx_head - priv->tx_tail) <= ipoib_sendq_size >> 1 && ((priv->global_tx_head - priv->global_tx_tail) <=
ipoib_sendq_size >> 1) &&
test_bit(IPOIB_FLAG_ADMIN_UP, &priv->flags))) test_bit(IPOIB_FLAG_ADMIN_UP, &priv->flags)))
netif_wake_queue(dev); netif_wake_queue(dev);
...@@ -1232,8 +1234,9 @@ static void ipoib_cm_tx_destroy(struct ipoib_cm_tx *p) ...@@ -1232,8 +1234,9 @@ static void ipoib_cm_tx_destroy(struct ipoib_cm_tx *p)
dev_kfree_skb_any(tx_req->skb); dev_kfree_skb_any(tx_req->skb);
netif_tx_lock_bh(p->dev); netif_tx_lock_bh(p->dev);
++p->tx_tail; ++p->tx_tail;
++priv->tx_tail; ++priv->global_tx_tail;
if (unlikely(priv->tx_head - priv->tx_tail == ipoib_sendq_size >> 1) && if (unlikely((priv->global_tx_head - priv->global_tx_tail) <=
ipoib_sendq_size >> 1) &&
netif_queue_stopped(p->dev) && netif_queue_stopped(p->dev) &&
test_bit(IPOIB_FLAG_ADMIN_UP, &priv->flags)) test_bit(IPOIB_FLAG_ADMIN_UP, &priv->flags))
netif_wake_queue(p->dev); netif_wake_queue(p->dev);
......
...@@ -407,9 +407,11 @@ static void ipoib_ib_handle_tx_wc(struct net_device *dev, struct ib_wc *wc) ...@@ -407,9 +407,11 @@ static void ipoib_ib_handle_tx_wc(struct net_device *dev, struct ib_wc *wc)
dev_kfree_skb_any(tx_req->skb); dev_kfree_skb_any(tx_req->skb);
++priv->tx_tail; ++priv->tx_tail;
++priv->global_tx_tail;
if (unlikely(netif_queue_stopped(dev) && if (unlikely(netif_queue_stopped(dev) &&
((priv->tx_head - priv->tx_tail) <= ipoib_sendq_size >> 1) && ((priv->global_tx_head - priv->global_tx_tail) <=
ipoib_sendq_size >> 1) &&
test_bit(IPOIB_FLAG_ADMIN_UP, &priv->flags))) test_bit(IPOIB_FLAG_ADMIN_UP, &priv->flags)))
netif_wake_queue(dev); netif_wake_queue(dev);
...@@ -634,7 +636,8 @@ int ipoib_send(struct net_device *dev, struct sk_buff *skb, ...@@ -634,7 +636,8 @@ int ipoib_send(struct net_device *dev, struct sk_buff *skb,
else else
priv->tx_wr.wr.send_flags &= ~IB_SEND_IP_CSUM; priv->tx_wr.wr.send_flags &= ~IB_SEND_IP_CSUM;
/* increase the tx_head after send success, but use it for queue state */ /* increase the tx_head after send success, but use it for queue state */
if (priv->tx_head - priv->tx_tail == ipoib_sendq_size - 1) { if ((priv->global_tx_head - priv->global_tx_tail) ==
ipoib_sendq_size - 1) {
ipoib_dbg(priv, "TX ring full, stopping kernel net queue\n"); ipoib_dbg(priv, "TX ring full, stopping kernel net queue\n");
netif_stop_queue(dev); netif_stop_queue(dev);
} }
...@@ -662,6 +665,7 @@ int ipoib_send(struct net_device *dev, struct sk_buff *skb, ...@@ -662,6 +665,7 @@ int ipoib_send(struct net_device *dev, struct sk_buff *skb,
rc = priv->tx_head; rc = priv->tx_head;
++priv->tx_head; ++priv->tx_head;
++priv->global_tx_head;
} }
return rc; return rc;
} }
...@@ -807,6 +811,7 @@ int ipoib_ib_dev_stop_default(struct net_device *dev) ...@@ -807,6 +811,7 @@ int ipoib_ib_dev_stop_default(struct net_device *dev)
ipoib_dma_unmap_tx(priv, tx_req); ipoib_dma_unmap_tx(priv, tx_req);
dev_kfree_skb_any(tx_req->skb); dev_kfree_skb_any(tx_req->skb);
++priv->tx_tail; ++priv->tx_tail;
++priv->global_tx_tail;
} }
for (i = 0; i < ipoib_recvq_size; ++i) { for (i = 0; i < ipoib_recvq_size; ++i) {
......
...@@ -1184,9 +1184,11 @@ static void ipoib_timeout(struct net_device *dev, unsigned int txqueue) ...@@ -1184,9 +1184,11 @@ static void ipoib_timeout(struct net_device *dev, unsigned int txqueue)
ipoib_warn(priv, "transmit timeout: latency %d msecs\n", ipoib_warn(priv, "transmit timeout: latency %d msecs\n",
jiffies_to_msecs(jiffies - dev_trans_start(dev))); jiffies_to_msecs(jiffies - dev_trans_start(dev)));
ipoib_warn(priv, "queue stopped %d, tx_head %u, tx_tail %u\n", ipoib_warn(priv,
netif_queue_stopped(dev), "queue stopped %d, tx_head %u, tx_tail %u, global_tx_head %u, global_tx_tail %u\n",
priv->tx_head, priv->tx_tail); netif_queue_stopped(dev), priv->tx_head, priv->tx_tail,
priv->global_tx_head, priv->global_tx_tail);
/* XXX reset QP, etc. */ /* XXX reset QP, etc. */
} }
...@@ -1701,7 +1703,7 @@ static int ipoib_dev_init_default(struct net_device *dev) ...@@ -1701,7 +1703,7 @@ static int ipoib_dev_init_default(struct net_device *dev)
goto out_rx_ring_cleanup; goto out_rx_ring_cleanup;
} }
/* priv->tx_head, tx_tail & tx_outstanding are already 0 */ /* priv->tx_head, tx_tail and global_tx_tail/head are already 0 */
if (ipoib_transport_dev_init(dev, priv->ca)) { if (ipoib_transport_dev_init(dev, priv->ca)) {
pr_warn("%s: ipoib_transport_dev_init failed\n", pr_warn("%s: ipoib_transport_dev_init failed\n",
......
...@@ -88,7 +88,7 @@ struct ib_uobject *__uobj_get_destroy(const struct uverbs_api_object *obj, ...@@ -88,7 +88,7 @@ struct ib_uobject *__uobj_get_destroy(const struct uverbs_api_object *obj,
static inline void uobj_put_destroy(struct ib_uobject *uobj) static inline void uobj_put_destroy(struct ib_uobject *uobj)
{ {
rdma_lookup_put_uobject(uobj, UVERBS_LOOKUP_WRITE); rdma_lookup_put_uobject(uobj, UVERBS_LOOKUP_DESTROY);
} }
static inline void uobj_put_read(struct ib_uobject *uobj) static inline void uobj_put_read(struct ib_uobject *uobj)
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册