提交 452b6361 编写于 作者: L Linus Torvalds

Merge tag 'rdma-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/roland/infiniband

Pull infiniband/rdma fixes from Roland Dreier:
 "Last late set of InfiniBand/RDMA fixes for 3.17:

   - fixes for the new memory region re-registration support
   - iSER initiator error path fixes
   - grab bag of small fixes for the qib and ocrdma hardware drivers
   - larger set of fixes for mlx4, especially in RoCE mode"

* tag 'rdma-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/roland/infiniband: (26 commits)
  IB/mlx4: Fix VF mac handling in RoCE
  IB/mlx4: Do not allow APM under RoCE
  IB/mlx4: Don't update QP1 in native mode
  IB/mlx4: Avoid accessing netdevice when building RoCE qp1 header
  mlx4: Fix mlx4 reg/unreg mac to work properly with 0-mac addresses
  IB/core: When marshaling uverbs path, clear unused fields
  IB/mlx4: Avoid executing gid task when device is being removed
  IB/mlx4: Fix lockdep splat for the iboe lock
  IB/mlx4: Get upper dev addresses as RoCE GIDs when port comes up
  IB/mlx4: Reorder steps in RoCE GID table initialization
  IB/mlx4: Don't duplicate the default RoCE GID
  IB/mlx4: Avoid null pointer dereference in mlx4_ib_scan_netdevs()
  IB/iser: Bump version to 1.4.1
  IB/iser: Allow bind only when connection state is UP
  IB/iser: Fix RX/TX CQ resource leak on error flow
  RDMA/ocrdma: Use right macro in query AH
  RDMA/ocrdma: Resolve L2 address when creating user AH
  mlx4: Correct error flows in rereg_mr
  IB/qib: Correct reference counting in debugfs qp_stats
  IPoIB: Remove unnecessary port query
  ...
......@@ -105,6 +105,7 @@ struct ib_umem *ib_umem_get(struct ib_ucontext *context, unsigned long addr,
umem->length = size;
umem->offset = addr & ~PAGE_MASK;
umem->page_size = PAGE_SIZE;
umem->pid = get_task_pid(current, PIDTYPE_PID);
/*
* We ask for writable memory if any access flags other than
* "remote read" are set. "Local write" and "remote write"
......@@ -198,6 +199,7 @@ struct ib_umem *ib_umem_get(struct ib_ucontext *context, unsigned long addr,
if (ret < 0) {
if (need_release)
__ib_umem_release(context->device, umem, 0);
put_pid(umem->pid);
kfree(umem);
} else
current->mm->pinned_vm = locked;
......@@ -230,15 +232,19 @@ void ib_umem_release(struct ib_umem *umem)
{
struct ib_ucontext *context = umem->context;
struct mm_struct *mm;
struct task_struct *task;
unsigned long diff;
__ib_umem_release(umem->context->device, umem, 1);
mm = get_task_mm(current);
if (!mm) {
kfree(umem);
return;
}
task = get_pid_task(umem->pid, PIDTYPE_PID);
put_pid(umem->pid);
if (!task)
goto out;
mm = get_task_mm(task);
put_task_struct(task);
if (!mm)
goto out;
diff = PAGE_ALIGN(umem->length + umem->offset) >> PAGE_SHIFT;
......@@ -262,9 +268,10 @@ void ib_umem_release(struct ib_umem *umem)
} else
down_write(&mm->mmap_sem);
current->mm->pinned_vm -= diff;
mm->pinned_vm -= diff;
up_write(&mm->mmap_sem);
mmput(mm);
out:
kfree(umem);
}
EXPORT_SYMBOL(ib_umem_release);
......
......@@ -140,5 +140,9 @@ void ib_copy_path_rec_from_user(struct ib_sa_path_rec *dst,
dst->packet_life_time = src->packet_life_time;
dst->preference = src->preference;
dst->packet_life_time_selector = src->packet_life_time_selector;
memset(dst->smac, 0, sizeof(dst->smac));
memset(dst->dmac, 0, sizeof(dst->dmac));
dst->vlan_id = 0xffff;
}
EXPORT_SYMBOL(ib_copy_path_rec_from_user);
......@@ -54,7 +54,7 @@ static void __ipath_release_user_pages(struct page **p, size_t num_pages,
/* call with current->mm->mmap_sem held */
static int __ipath_get_user_pages(unsigned long start_page, size_t num_pages,
struct page **p, struct vm_area_struct **vma)
struct page **p)
{
unsigned long lock_limit;
size_t got;
......@@ -74,7 +74,7 @@ static int __ipath_get_user_pages(unsigned long start_page, size_t num_pages,
ret = get_user_pages(current, current->mm,
start_page + got * PAGE_SIZE,
num_pages - got, 1, 1,
p + got, vma);
p + got, NULL);
if (ret < 0)
goto bail_release;
}
......@@ -165,7 +165,7 @@ int ipath_get_user_pages(unsigned long start_page, size_t num_pages,
down_write(&current->mm->mmap_sem);
ret = __ipath_get_user_pages(start_page, num_pages, p, NULL);
ret = __ipath_get_user_pages(start_page, num_pages, p);
up_write(&current->mm->mmap_sem);
......
......@@ -59,6 +59,7 @@
#define MLX4_IB_FLOW_MAX_PRIO 0xFFF
#define MLX4_IB_FLOW_QPN_MASK 0xFFFFFF
#define MLX4_IB_CARD_REV_A0 0xA0
MODULE_AUTHOR("Roland Dreier");
MODULE_DESCRIPTION("Mellanox ConnectX HCA InfiniBand driver");
......@@ -119,6 +120,17 @@ static int check_flow_steering_support(struct mlx4_dev *dev)
return dmfs;
}
static int num_ib_ports(struct mlx4_dev *dev)
{
int ib_ports = 0;
int i;
mlx4_foreach_port(i, dev, MLX4_PORT_TYPE_IB)
ib_ports++;
return ib_ports;
}
static int mlx4_ib_query_device(struct ib_device *ibdev,
struct ib_device_attr *props)
{
......@@ -126,6 +138,7 @@ static int mlx4_ib_query_device(struct ib_device *ibdev,
struct ib_smp *in_mad = NULL;
struct ib_smp *out_mad = NULL;
int err = -ENOMEM;
int have_ib_ports;
in_mad = kzalloc(sizeof *in_mad, GFP_KERNEL);
out_mad = kmalloc(sizeof *out_mad, GFP_KERNEL);
......@@ -142,6 +155,8 @@ static int mlx4_ib_query_device(struct ib_device *ibdev,
memset(props, 0, sizeof *props);
have_ib_ports = num_ib_ports(dev->dev);
props->fw_ver = dev->dev->caps.fw_ver;
props->device_cap_flags = IB_DEVICE_CHANGE_PHY_PORT |
IB_DEVICE_PORT_ACTIVE_EVENT |
......@@ -152,13 +167,15 @@ static int mlx4_ib_query_device(struct ib_device *ibdev,
props->device_cap_flags |= IB_DEVICE_BAD_PKEY_CNTR;
if (dev->dev->caps.flags & MLX4_DEV_CAP_FLAG_BAD_QKEY_CNTR)
props->device_cap_flags |= IB_DEVICE_BAD_QKEY_CNTR;
if (dev->dev->caps.flags & MLX4_DEV_CAP_FLAG_APM)
if (dev->dev->caps.flags & MLX4_DEV_CAP_FLAG_APM && have_ib_ports)
props->device_cap_flags |= IB_DEVICE_AUTO_PATH_MIG;
if (dev->dev->caps.flags & MLX4_DEV_CAP_FLAG_UD_AV_PORT)
props->device_cap_flags |= IB_DEVICE_UD_AV_PORT_ENFORCE;
if (dev->dev->caps.flags & MLX4_DEV_CAP_FLAG_IPOIB_CSUM)
props->device_cap_flags |= IB_DEVICE_UD_IP_CSUM;
if (dev->dev->caps.max_gso_sz && dev->dev->caps.flags & MLX4_DEV_CAP_FLAG_BLH)
if (dev->dev->caps.max_gso_sz &&
(dev->dev->rev_id != MLX4_IB_CARD_REV_A0) &&
(dev->dev->caps.flags & MLX4_DEV_CAP_FLAG_BLH))
props->device_cap_flags |= IB_DEVICE_UD_TSO;
if (dev->dev->caps.bmme_flags & MLX4_BMME_FLAG_RESERVED_LKEY)
props->device_cap_flags |= IB_DEVICE_LOCAL_DMA_LKEY;
......@@ -357,7 +374,7 @@ static int eth_link_query_port(struct ib_device *ibdev, u8 port,
props->state = IB_PORT_DOWN;
props->phys_state = state_to_phys_state(props->state);
props->active_mtu = IB_MTU_256;
spin_lock(&iboe->lock);
spin_lock_bh(&iboe->lock);
ndev = iboe->netdevs[port - 1];
if (!ndev)
goto out_unlock;
......@@ -369,7 +386,7 @@ static int eth_link_query_port(struct ib_device *ibdev, u8 port,
IB_PORT_ACTIVE : IB_PORT_DOWN;
props->phys_state = state_to_phys_state(props->state);
out_unlock:
spin_unlock(&iboe->lock);
spin_unlock_bh(&iboe->lock);
out:
mlx4_free_cmd_mailbox(mdev->dev, mailbox);
return err;
......@@ -811,11 +828,11 @@ int mlx4_ib_add_mc(struct mlx4_ib_dev *mdev, struct mlx4_ib_qp *mqp,
if (!mqp->port)
return 0;
spin_lock(&mdev->iboe.lock);
spin_lock_bh(&mdev->iboe.lock);
ndev = mdev->iboe.netdevs[mqp->port - 1];
if (ndev)
dev_hold(ndev);
spin_unlock(&mdev->iboe.lock);
spin_unlock_bh(&mdev->iboe.lock);
if (ndev) {
ret = 1;
......@@ -1292,11 +1309,11 @@ static int mlx4_ib_mcg_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
mutex_lock(&mqp->mutex);
ge = find_gid_entry(mqp, gid->raw);
if (ge) {
spin_lock(&mdev->iboe.lock);
spin_lock_bh(&mdev->iboe.lock);
ndev = ge->added ? mdev->iboe.netdevs[ge->port - 1] : NULL;
if (ndev)
dev_hold(ndev);
spin_unlock(&mdev->iboe.lock);
spin_unlock_bh(&mdev->iboe.lock);
if (ndev)
dev_put(ndev);
list_del(&ge->list);
......@@ -1417,6 +1434,9 @@ static void update_gids_task(struct work_struct *work)
int err;
struct mlx4_dev *dev = gw->dev->dev;
if (!gw->dev->ib_active)
return;
mailbox = mlx4_alloc_cmd_mailbox(dev);
if (IS_ERR(mailbox)) {
pr_warn("update gid table failed %ld\n", PTR_ERR(mailbox));
......@@ -1447,6 +1467,9 @@ static void reset_gids_task(struct work_struct *work)
int err;
struct mlx4_dev *dev = gw->dev->dev;
if (!gw->dev->ib_active)
return;
mailbox = mlx4_alloc_cmd_mailbox(dev);
if (IS_ERR(mailbox)) {
pr_warn("reset gid table failed\n");
......@@ -1581,7 +1604,7 @@ static int mlx4_ib_addr_event(int event, struct net_device *event_netdev,
return 0;
iboe = &ibdev->iboe;
spin_lock(&iboe->lock);
spin_lock_bh(&iboe->lock);
for (port = 1; port <= ibdev->dev->caps.num_ports; ++port)
if ((netif_is_bond_master(real_dev) &&
......@@ -1591,7 +1614,7 @@ static int mlx4_ib_addr_event(int event, struct net_device *event_netdev,
update_gid_table(ibdev, port, gid,
event == NETDEV_DOWN, 0);
spin_unlock(&iboe->lock);
spin_unlock_bh(&iboe->lock);
return 0;
}
......@@ -1664,13 +1687,21 @@ static void mlx4_ib_update_qps(struct mlx4_ib_dev *ibdev,
new_smac = mlx4_mac_to_u64(dev->dev_addr);
read_unlock(&dev_base_lock);
atomic64_set(&ibdev->iboe.mac[port - 1], new_smac);
/* no need for update QP1 and mac registration in non-SRIOV */
if (!mlx4_is_mfunc(ibdev->dev))
return;
mutex_lock(&ibdev->qp1_proxy_lock[port - 1]);
qp = ibdev->qp1_proxy[port - 1];
if (qp) {
int new_smac_index;
u64 old_smac = qp->pri.smac;
u64 old_smac;
struct mlx4_update_qp_params update_params;
mutex_lock(&qp->mutex);
old_smac = qp->pri.smac;
if (new_smac == old_smac)
goto unlock;
......@@ -1685,17 +1716,20 @@ static void mlx4_ib_update_qps(struct mlx4_ib_dev *ibdev,
release_mac = new_smac;
goto unlock;
}
/* if old port was zero, no mac was yet registered for this QP */
if (qp->pri.smac_port)
release_mac = old_smac;
qp->pri.smac = new_smac;
qp->pri.smac_port = port;
qp->pri.smac_index = new_smac_index;
release_mac = old_smac;
}
unlock:
mutex_unlock(&ibdev->qp1_proxy_lock[port - 1]);
if (release_mac != MLX4_IB_INVALID_MAC)
mlx4_unregister_mac(ibdev->dev, port, release_mac);
if (qp)
mutex_unlock(&qp->mutex);
mutex_unlock(&ibdev->qp1_proxy_lock[port - 1]);
}
static void mlx4_ib_get_dev_addr(struct net_device *dev,
......@@ -1706,6 +1740,7 @@ static void mlx4_ib_get_dev_addr(struct net_device *dev,
struct inet6_dev *in6_dev;
union ib_gid *pgid;
struct inet6_ifaddr *ifp;
union ib_gid default_gid;
#endif
union ib_gid gid;
......@@ -1726,12 +1761,15 @@ static void mlx4_ib_get_dev_addr(struct net_device *dev,
in_dev_put(in_dev);
}
#if IS_ENABLED(CONFIG_IPV6)
mlx4_make_default_gid(dev, &default_gid);
/* IPv6 gids */
in6_dev = in6_dev_get(dev);
if (in6_dev) {
read_lock_bh(&in6_dev->lock);
list_for_each_entry(ifp, &in6_dev->addr_list, if_list) {
pgid = (union ib_gid *)&ifp->addr;
if (!memcmp(pgid, &default_gid, sizeof(*pgid)))
continue;
update_gid_table(ibdev, port, pgid, 0, 0);
}
read_unlock_bh(&in6_dev->lock);
......@@ -1753,24 +1791,33 @@ static int mlx4_ib_init_gid_table(struct mlx4_ib_dev *ibdev)
struct net_device *dev;
struct mlx4_ib_iboe *iboe = &ibdev->iboe;
int i;
int err = 0;
for (i = 1; i <= ibdev->num_ports; ++i)
if (reset_gid_table(ibdev, i))
return -1;
for (i = 1; i <= ibdev->num_ports; ++i) {
if (rdma_port_get_link_layer(&ibdev->ib_dev, i) ==
IB_LINK_LAYER_ETHERNET) {
err = reset_gid_table(ibdev, i);
if (err)
goto out;
}
}
read_lock(&dev_base_lock);
spin_lock(&iboe->lock);
spin_lock_bh(&iboe->lock);
for_each_netdev(&init_net, dev) {
u8 port = mlx4_ib_get_dev_port(dev, ibdev);
if (port)
/* port will be non-zero only for ETH ports */
if (port) {
mlx4_ib_set_default_gid(ibdev, dev, port);
mlx4_ib_get_dev_addr(dev, ibdev, port);
}
}
spin_unlock(&iboe->lock);
spin_unlock_bh(&iboe->lock);
read_unlock(&dev_base_lock);
return 0;
out:
return err;
}
static void mlx4_ib_scan_netdevs(struct mlx4_ib_dev *ibdev,
......@@ -1784,7 +1831,7 @@ static void mlx4_ib_scan_netdevs(struct mlx4_ib_dev *ibdev,
iboe = &ibdev->iboe;
spin_lock(&iboe->lock);
spin_lock_bh(&iboe->lock);
mlx4_foreach_ib_transport_port(port, ibdev->dev) {
enum ib_port_state port_state = IB_PORT_NOP;
struct net_device *old_master = iboe->masters[port - 1];
......@@ -1816,35 +1863,47 @@ static void mlx4_ib_scan_netdevs(struct mlx4_ib_dev *ibdev,
port_state = (netif_running(curr_netdev) && netif_carrier_ok(curr_netdev)) ?
IB_PORT_ACTIVE : IB_PORT_DOWN;
mlx4_ib_set_default_gid(ibdev, curr_netdev, port);
} else {
reset_gid_table(ibdev, port);
}
/* if using bonding/team and a slave port is down, we don't the bond IP
* based gids in the table since flows that select port by gid may get
* the down port.
*/
if (curr_master && (port_state == IB_PORT_DOWN)) {
reset_gid_table(ibdev, port);
mlx4_ib_set_default_gid(ibdev, curr_netdev, port);
}
/* if bonding is used it is possible that we add it to masters
* only after IP address is assigned to the net bonding
* interface.
*/
if (curr_master && (old_master != curr_master)) {
reset_gid_table(ibdev, port);
mlx4_ib_set_default_gid(ibdev, curr_netdev, port);
mlx4_ib_get_dev_addr(curr_master, ibdev, port);
}
if (curr_master) {
/* if using bonding/team and a slave port is down, we
* don't want the bond IP based gids in the table since
* flows that select port by gid may get the down port.
*/
if (port_state == IB_PORT_DOWN) {
reset_gid_table(ibdev, port);
mlx4_ib_set_default_gid(ibdev,
curr_netdev,
port);
} else {
/* gids from the upper dev (bond/team)
* should appear in port's gid table
*/
mlx4_ib_get_dev_addr(curr_master,
ibdev, port);
}
}
/* if bonding is used it is possible that we add it to
* masters only after IP address is assigned to the
* net bonding interface.
*/
if (curr_master && (old_master != curr_master)) {
reset_gid_table(ibdev, port);
mlx4_ib_set_default_gid(ibdev,
curr_netdev, port);
mlx4_ib_get_dev_addr(curr_master, ibdev, port);
}
if (!curr_master && (old_master != curr_master)) {
if (!curr_master && (old_master != curr_master)) {
reset_gid_table(ibdev, port);
mlx4_ib_set_default_gid(ibdev,
curr_netdev, port);
mlx4_ib_get_dev_addr(curr_netdev, ibdev, port);
}
} else {
reset_gid_table(ibdev, port);
mlx4_ib_set_default_gid(ibdev, curr_netdev, port);
mlx4_ib_get_dev_addr(curr_netdev, ibdev, port);
}
}
spin_unlock(&iboe->lock);
spin_unlock_bh(&iboe->lock);
if (update_qps_port > 0)
mlx4_ib_update_qps(ibdev, dev, update_qps_port);
......@@ -2186,6 +2245,9 @@ static void *mlx4_ib_add(struct mlx4_dev *dev)
goto err_steer_free_bitmap;
}
for (j = 1; j <= ibdev->dev->caps.num_ports; j++)
atomic64_set(&iboe->mac[j - 1], ibdev->dev->caps.def_mac[j]);
if (ib_register_device(&ibdev->ib_dev, NULL))
goto err_steer_free_bitmap;
......@@ -2222,12 +2284,8 @@ static void *mlx4_ib_add(struct mlx4_dev *dev)
}
}
#endif
for (i = 1 ; i <= ibdev->num_ports ; ++i)
reset_gid_table(ibdev, i);
rtnl_lock();
mlx4_ib_scan_netdevs(ibdev, NULL, 0);
rtnl_unlock();
mlx4_ib_init_gid_table(ibdev);
if (mlx4_ib_init_gid_table(ibdev))
goto err_notif;
}
for (j = 0; j < ARRAY_SIZE(mlx4_class_attributes); ++j) {
......@@ -2375,6 +2433,9 @@ static void mlx4_ib_remove(struct mlx4_dev *dev, void *ibdev_ptr)
struct mlx4_ib_dev *ibdev = ibdev_ptr;
int p;
ibdev->ib_active = false;
flush_workqueue(wq);
mlx4_ib_close_sriov(ibdev);
mlx4_ib_mad_cleanup(ibdev);
ib_unregister_device(&ibdev->ib_dev);
......
......@@ -451,6 +451,7 @@ struct mlx4_ib_iboe {
spinlock_t lock;
struct net_device *netdevs[MLX4_MAX_PORTS];
struct net_device *masters[MLX4_MAX_PORTS];
atomic64_t mac[MLX4_MAX_PORTS];
struct notifier_block nb;
struct notifier_block nb_inet;
struct notifier_block nb_inet6;
......
......@@ -234,14 +234,13 @@ int mlx4_ib_rereg_user_mr(struct ib_mr *mr, int flags,
0);
if (IS_ERR(mmr->umem)) {
err = PTR_ERR(mmr->umem);
/* Prevent mlx4_ib_dereg_mr from free'ing invalid pointer */
mmr->umem = NULL;
goto release_mpt_entry;
}
n = ib_umem_page_count(mmr->umem);
shift = ilog2(mmr->umem->page_size);
mmr->mmr.iova = virt_addr;
mmr->mmr.size = length;
err = mlx4_mr_rereg_mem_write(dev->dev, &mmr->mmr,
virt_addr, length, n, shift,
*pmpt_entry);
......@@ -249,6 +248,8 @@ int mlx4_ib_rereg_user_mr(struct ib_mr *mr, int flags,
ib_umem_release(mmr->umem);
goto release_mpt_entry;
}
mmr->mmr.iova = virt_addr;
mmr->mmr.size = length;
err = mlx4_ib_umem_write_mtt(dev, &mmr->mmr.mtt, mmr->umem);
if (err) {
......@@ -262,6 +263,8 @@ int mlx4_ib_rereg_user_mr(struct ib_mr *mr, int flags,
* return a failure. But dereg_mr will free the resources.
*/
err = mlx4_mr_hw_write_mpt(dev->dev, &mmr->mmr, pmpt_entry);
if (!err && flags & IB_MR_REREG_ACCESS)
mmr->mmr.access = mr_access_flags;
release_mpt_entry:
mlx4_mr_hw_put_mpt(dev->dev, pmpt_entry);
......
......@@ -964,9 +964,10 @@ static void destroy_qp_common(struct mlx4_ib_dev *dev, struct mlx4_ib_qp *qp,
MLX4_QP_STATE_RST, NULL, 0, 0, &qp->mqp))
pr_warn("modify QP %06x to RESET failed.\n",
qp->mqp.qpn);
if (qp->pri.smac) {
if (qp->pri.smac || (!qp->pri.smac && qp->pri.smac_port)) {
mlx4_unregister_mac(dev->dev, qp->pri.smac_port, qp->pri.smac);
qp->pri.smac = 0;
qp->pri.smac_port = 0;
}
if (qp->alt.smac) {
mlx4_unregister_mac(dev->dev, qp->alt.smac_port, qp->alt.smac);
......@@ -1325,7 +1326,8 @@ static int _mlx4_set_path(struct mlx4_ib_dev *dev, const struct ib_ah_attr *ah,
* If one was already assigned, but the new mac differs,
* unregister the old one and register the new one.
*/
if (!smac_info->smac || smac_info->smac != smac) {
if ((!smac_info->smac && !smac_info->smac_port) ||
smac_info->smac != smac) {
/* register candidate now, unreg if needed, after success */
smac_index = mlx4_register_mac(dev->dev, port, smac);
if (smac_index >= 0) {
......@@ -1390,21 +1392,13 @@ static void update_mcg_macs(struct mlx4_ib_dev *dev, struct mlx4_ib_qp *qp)
static int handle_eth_ud_smac_index(struct mlx4_ib_dev *dev, struct mlx4_ib_qp *qp, u8 *smac,
struct mlx4_qp_context *context)
{
struct net_device *ndev;
u64 u64_mac;
int smac_index;
ndev = dev->iboe.netdevs[qp->port - 1];
if (ndev) {
smac = ndev->dev_addr;
u64_mac = mlx4_mac_to_u64(smac);
} else {
u64_mac = dev->dev->caps.def_mac[qp->port];
}
u64_mac = atomic64_read(&dev->iboe.mac[qp->port - 1]);
context->pri_path.sched_queue = MLX4_IB_DEFAULT_SCHED_QUEUE | ((qp->port - 1) << 6);
if (!qp->pri.smac) {
if (!qp->pri.smac && !qp->pri.smac_port) {
smac_index = mlx4_register_mac(dev->dev, qp->port, u64_mac);
if (smac_index >= 0) {
qp->pri.candidate_smac_index = smac_index;
......@@ -1432,6 +1426,12 @@ static int __mlx4_ib_modify_qp(struct ib_qp *ibqp,
int steer_qp = 0;
int err = -EINVAL;
/* APM is not supported under RoCE */
if (attr_mask & IB_QP_ALT_PATH &&
rdma_port_get_link_layer(&dev->ib_dev, qp->port) ==
IB_LINK_LAYER_ETHERNET)
return -ENOTSUPP;
context = kzalloc(sizeof *context, GFP_KERNEL);
if (!context)
return -ENOMEM;
......@@ -1786,9 +1786,10 @@ static int __mlx4_ib_modify_qp(struct ib_qp *ibqp,
if (qp->flags & MLX4_IB_QP_NETIF)
mlx4_ib_steer_qp_reg(dev, qp, 0);
}
if (qp->pri.smac) {
if (qp->pri.smac || (!qp->pri.smac && qp->pri.smac_port)) {
mlx4_unregister_mac(dev->dev, qp->pri.smac_port, qp->pri.smac);
qp->pri.smac = 0;
qp->pri.smac_port = 0;
}
if (qp->alt.smac) {
mlx4_unregister_mac(dev->dev, qp->alt.smac_port, qp->alt.smac);
......@@ -1812,11 +1813,12 @@ static int __mlx4_ib_modify_qp(struct ib_qp *ibqp,
if (err && steer_qp)
mlx4_ib_steer_qp_reg(dev, qp, 0);
kfree(context);
if (qp->pri.candidate_smac) {
if (qp->pri.candidate_smac ||
(!qp->pri.candidate_smac && qp->pri.candidate_smac_port)) {
if (err) {
mlx4_unregister_mac(dev->dev, qp->pri.candidate_smac_port, qp->pri.candidate_smac);
} else {
if (qp->pri.smac)
if (qp->pri.smac || (!qp->pri.smac && qp->pri.smac_port))
mlx4_unregister_mac(dev->dev, qp->pri.smac_port, qp->pri.smac);
qp->pri.smac = qp->pri.candidate_smac;
qp->pri.smac_index = qp->pri.candidate_smac_index;
......@@ -2089,6 +2091,16 @@ static int build_sriov_qp0_header(struct mlx4_ib_sqp *sqp,
return 0;
}
static void mlx4_u64_to_smac(u8 *dst_mac, u64 src_mac)
{
int i;
for (i = ETH_ALEN; i; i--) {
dst_mac[i - 1] = src_mac & 0xff;
src_mac >>= 8;
}
}
static int build_mlx_header(struct mlx4_ib_sqp *sqp, struct ib_send_wr *wr,
void *wqe, unsigned *mlx_seg_len)
{
......@@ -2203,7 +2215,6 @@ static int build_mlx_header(struct mlx4_ib_sqp *sqp, struct ib_send_wr *wr,
}
if (is_eth) {
u8 *smac;
struct in6_addr in6;
u16 pcp = (be32_to_cpu(ah->av.ib.sl_tclass_flowlabel) >> 29) << 13;
......@@ -2216,12 +2227,17 @@ static int build_mlx_header(struct mlx4_ib_sqp *sqp, struct ib_send_wr *wr,
memcpy(&ctrl->imm, ah->av.eth.mac + 2, 4);
memcpy(&in6, sgid.raw, sizeof(in6));
if (!mlx4_is_mfunc(to_mdev(ib_dev)->dev))
smac = to_mdev(sqp->qp.ibqp.device)->
iboe.netdevs[sqp->qp.port - 1]->dev_addr;
else /* use the src mac of the tunnel */
smac = ah->av.eth.s_mac;
memcpy(sqp->ud_header.eth.smac_h, smac, 6);
if (!mlx4_is_mfunc(to_mdev(ib_dev)->dev)) {
u64 mac = atomic64_read(&to_mdev(ib_dev)->iboe.mac[sqp->qp.port - 1]);
u8 smac[ETH_ALEN];
mlx4_u64_to_smac(smac, mac);
memcpy(sqp->ud_header.eth.smac_h, smac, ETH_ALEN);
} else {
/* use the src mac of the tunnel */
memcpy(sqp->ud_header.eth.smac_h, ah->av.eth.s_mac, ETH_ALEN);
}
if (!memcmp(sqp->ud_header.eth.smac_h, sqp->ud_header.eth.dmac_h, 6))
mlx->flags |= cpu_to_be32(MLX4_WQE_CTRL_FORCE_LOOPBACK);
if (!is_vlan) {
......
......@@ -38,7 +38,7 @@
#define OCRDMA_VID_PCP_SHIFT 0xD
static inline int set_av_attr(struct ocrdma_dev *dev, struct ocrdma_ah *ah,
struct ib_ah_attr *attr, int pdid)
struct ib_ah_attr *attr, union ib_gid *sgid, int pdid)
{
int status = 0;
u16 vlan_tag; bool vlan_enabled = false;
......@@ -49,8 +49,7 @@ static inline int set_av_attr(struct ocrdma_dev *dev, struct ocrdma_ah *ah,
memset(&eth, 0, sizeof(eth));
memset(&grh, 0, sizeof(grh));
ah->sgid_index = attr->grh.sgid_index;
/* VLAN */
vlan_tag = attr->vlan_id;
if (!vlan_tag || (vlan_tag > 0xFFF))
vlan_tag = dev->pvid;
......@@ -65,15 +64,14 @@ static inline int set_av_attr(struct ocrdma_dev *dev, struct ocrdma_ah *ah,
eth.eth_type = cpu_to_be16(OCRDMA_ROCE_ETH_TYPE);
eth_sz = sizeof(struct ocrdma_eth_basic);
}
/* MAC */
memcpy(&eth.smac[0], &dev->nic_info.mac_addr[0], ETH_ALEN);
memcpy(&eth.dmac[0], attr->dmac, ETH_ALEN);
status = ocrdma_resolve_dmac(dev, attr, &eth.dmac[0]);
if (status)
return status;
status = ocrdma_query_gid(&dev->ibdev, 1, attr->grh.sgid_index,
(union ib_gid *)&grh.sgid[0]);
if (status)
return status;
ah->sgid_index = attr->grh.sgid_index;
memcpy(&grh.sgid[0], sgid->raw, sizeof(union ib_gid));
memcpy(&grh.dgid[0], attr->grh.dgid.raw, sizeof(attr->grh.dgid.raw));
grh.tclass_flow = cpu_to_be32((6 << 28) |
(attr->grh.traffic_class << 24) |
......@@ -81,8 +79,7 @@ static inline int set_av_attr(struct ocrdma_dev *dev, struct ocrdma_ah *ah,
/* 0x1b is next header value in GRH */
grh.pdid_hoplimit = cpu_to_be32((pdid << 16) |
(0x1b << 8) | attr->grh.hop_limit);
memcpy(&grh.dgid[0], attr->grh.dgid.raw, sizeof(attr->grh.dgid.raw));
/* Eth HDR */
memcpy(&ah->av->eth_hdr, &eth, eth_sz);
memcpy((u8 *)ah->av + eth_sz, &grh, sizeof(struct ocrdma_grh));
if (vlan_enabled)
......@@ -98,6 +95,8 @@ struct ib_ah *ocrdma_create_ah(struct ib_pd *ibpd, struct ib_ah_attr *attr)
struct ocrdma_ah *ah;
struct ocrdma_pd *pd = get_ocrdma_pd(ibpd);
struct ocrdma_dev *dev = get_ocrdma_dev(ibpd->device);
union ib_gid sgid;
u8 zmac[ETH_ALEN];
if (!(attr->ah_flags & IB_AH_GRH))
return ERR_PTR(-EINVAL);
......@@ -111,7 +110,27 @@ struct ib_ah *ocrdma_create_ah(struct ib_pd *ibpd, struct ib_ah_attr *attr)
status = ocrdma_alloc_av(dev, ah);
if (status)
goto av_err;
status = set_av_attr(dev, ah, attr, pd->id);
status = ocrdma_query_gid(&dev->ibdev, 1, attr->grh.sgid_index, &sgid);
if (status) {
pr_err("%s(): Failed to query sgid, status = %d\n",
__func__, status);
goto av_conf_err;
}
memset(&zmac, 0, ETH_ALEN);
if (pd->uctx &&
memcmp(attr->dmac, &zmac, ETH_ALEN)) {
status = rdma_addr_find_dmac_by_grh(&sgid, &attr->grh.dgid,
attr->dmac, &attr->vlan_id);
if (status) {
pr_err("%s(): Failed to resolve dmac from gid."
"status = %d\n", __func__, status);
goto av_conf_err;
}
}
status = set_av_attr(dev, ah, attr, &sgid, pd->id);
if (status)
goto av_conf_err;
......@@ -145,7 +164,7 @@ int ocrdma_query_ah(struct ib_ah *ibah, struct ib_ah_attr *attr)
struct ocrdma_av *av = ah->av;
struct ocrdma_grh *grh;
attr->ah_flags |= IB_AH_GRH;
if (ah->av->valid & Bit(1)) {
if (ah->av->valid & OCRDMA_AV_VALID) {
grh = (struct ocrdma_grh *)((u8 *)ah->av +
sizeof(struct ocrdma_eth_vlan));
attr->sl = be16_to_cpu(av->eth_hdr.vlan_tag) >> 13;
......
......@@ -101,7 +101,7 @@ int ocrdma_query_device(struct ib_device *ibdev, struct ib_device_attr *attr)
attr->max_srq_sge = dev->attr.max_srq_sge;
attr->max_srq_wr = dev->attr.max_rqe;
attr->local_ca_ack_delay = dev->attr.local_ca_ack_delay;
attr->max_fast_reg_page_list_len = 0;
attr->max_fast_reg_page_list_len = dev->attr.max_pages_per_frmr;
attr->max_pkeys = 1;
return 0;
}
......@@ -2846,11 +2846,9 @@ int ocrdma_arm_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags cq_flags)
if (cq->first_arm) {
ocrdma_ring_cq_db(dev, cq_id, arm_needed, sol_needed, 0);
cq->first_arm = false;
goto skip_defer;
}
cq->deferred_arm = true;
skip_defer:
cq->deferred_arm = true;
cq->deferred_sol = sol_needed;
spin_unlock_irqrestore(&cq->cq_lock, flags);
......
......@@ -193,6 +193,7 @@ static void *_qp_stats_seq_start(struct seq_file *s, loff_t *pos)
struct qib_qp_iter *iter;
loff_t n = *pos;
rcu_read_lock();
iter = qib_qp_iter_init(s->private);
if (!iter)
return NULL;
......@@ -224,7 +225,7 @@ static void *_qp_stats_seq_next(struct seq_file *s, void *iter_ptr,
static void _qp_stats_seq_stop(struct seq_file *s, void *iter_ptr)
{
/* nothing for now */
rcu_read_unlock();
}
static int _qp_stats_seq_show(struct seq_file *s, void *iter_ptr)
......
......@@ -1325,7 +1325,6 @@ int qib_qp_iter_next(struct qib_qp_iter *iter)
struct qib_qp *pqp = iter->qp;
struct qib_qp *qp;
rcu_read_lock();
for (; n < dev->qp_table_size; n++) {
if (pqp)
qp = rcu_dereference(pqp->next);
......@@ -1333,18 +1332,11 @@ int qib_qp_iter_next(struct qib_qp_iter *iter)
qp = rcu_dereference(dev->qp_table[n]);
pqp = qp;
if (qp) {
if (iter->qp)
atomic_dec(&iter->qp->refcount);
atomic_inc(&qp->refcount);
rcu_read_unlock();
iter->qp = qp;
iter->n = n;
return 0;
}
}
rcu_read_unlock();
if (iter->qp)
atomic_dec(&iter->qp->refcount);
return ret;
}
......
......@@ -52,7 +52,7 @@ static void __qib_release_user_pages(struct page **p, size_t num_pages,
* Call with current->mm->mmap_sem held.
*/
static int __qib_get_user_pages(unsigned long start_page, size_t num_pages,
struct page **p, struct vm_area_struct **vma)
struct page **p)
{
unsigned long lock_limit;
size_t got;
......@@ -69,7 +69,7 @@ static int __qib_get_user_pages(unsigned long start_page, size_t num_pages,
ret = get_user_pages(current, current->mm,
start_page + got * PAGE_SIZE,
num_pages - got, 1, 1,
p + got, vma);
p + got, NULL);
if (ret < 0)
goto bail_release;
}
......@@ -136,7 +136,7 @@ int qib_get_user_pages(unsigned long start_page, size_t num_pages,
down_write(&current->mm->mmap_sem);
ret = __qib_get_user_pages(start_page, num_pages, p, NULL);
ret = __qib_get_user_pages(start_page, num_pages, p);
up_write(&current->mm->mmap_sem);
......
......@@ -529,21 +529,13 @@ void ipoib_mcast_join_task(struct work_struct *work)
port_attr.state);
return;
}
priv->local_lid = port_attr.lid;
if (ib_query_gid(priv->ca, priv->port, 0, &priv->local_gid))
ipoib_warn(priv, "ib_query_gid() failed\n");
else
memcpy(priv->dev->dev_addr + 4, priv->local_gid.raw, sizeof (union ib_gid));
{
struct ib_port_attr attr;
if (!ib_query_port(priv->ca, priv->port, &attr))
priv->local_lid = attr.lid;
else
ipoib_warn(priv, "ib_query_port failed\n");
}
if (!priv->broadcast) {
struct ipoib_mcast *broadcast;
......
......@@ -344,7 +344,6 @@ iscsi_iser_conn_bind(struct iscsi_cls_session *cls_session,
int is_leading)
{
struct iscsi_conn *conn = cls_conn->dd_data;
struct iscsi_session *session;
struct iser_conn *ib_conn;
struct iscsi_endpoint *ep;
int error;
......@@ -363,9 +362,17 @@ iscsi_iser_conn_bind(struct iscsi_cls_session *cls_session,
}
ib_conn = ep->dd_data;
session = conn->session;
if (iser_alloc_rx_descriptors(ib_conn, session))
return -ENOMEM;
mutex_lock(&ib_conn->state_mutex);
if (ib_conn->state != ISER_CONN_UP) {
error = -EINVAL;
iser_err("iser_conn %p state is %d, teardown started\n",
ib_conn, ib_conn->state);
goto out;
}
error = iser_alloc_rx_descriptors(ib_conn, conn->session);
if (error)
goto out;
/* binds the iSER connection retrieved from the previously
* connected ep_handle to the iSCSI layer connection. exchanges
......@@ -375,7 +382,9 @@ iscsi_iser_conn_bind(struct iscsi_cls_session *cls_session,
conn->dd_data = ib_conn;
ib_conn->iscsi_conn = conn;
return 0;
out:
mutex_unlock(&ib_conn->state_mutex);
return error;
}
static int
......
......@@ -69,7 +69,7 @@
#define DRV_NAME "iser"
#define PFX DRV_NAME ": "
#define DRV_VER "1.4"
#define DRV_VER "1.4.1"
#define iser_dbg(fmt, arg...) \
do { \
......
......@@ -73,7 +73,7 @@ static int iser_create_device_ib_res(struct iser_device *device)
{
struct iser_cq_desc *cq_desc;
struct ib_device_attr *dev_attr = &device->dev_attr;
int ret, i, j;
int ret, i;
ret = ib_query_device(device->ib_device, dev_attr);
if (ret) {
......@@ -125,16 +125,20 @@ static int iser_create_device_ib_res(struct iser_device *device)
iser_cq_event_callback,
(void *)&cq_desc[i],
ISER_MAX_RX_CQ_LEN, i);
if (IS_ERR(device->rx_cq[i]))
if (IS_ERR(device->rx_cq[i])) {
device->rx_cq[i] = NULL;
goto cq_err;
}
device->tx_cq[i] = ib_create_cq(device->ib_device,
NULL, iser_cq_event_callback,
(void *)&cq_desc[i],
ISER_MAX_TX_CQ_LEN, i);
if (IS_ERR(device->tx_cq[i]))
if (IS_ERR(device->tx_cq[i])) {
device->tx_cq[i] = NULL;
goto cq_err;
}
if (ib_req_notify_cq(device->rx_cq[i], IB_CQ_NEXT_COMP))
goto cq_err;
......@@ -160,14 +164,14 @@ static int iser_create_device_ib_res(struct iser_device *device)
handler_err:
ib_dereg_mr(device->mr);
dma_mr_err:
for (j = 0; j < device->cqs_used; j++)
tasklet_kill(&device->cq_tasklet[j]);
for (i = 0; i < device->cqs_used; i++)
tasklet_kill(&device->cq_tasklet[i]);
cq_err:
for (j = 0; j < i; j++) {
if (device->tx_cq[j])
ib_destroy_cq(device->tx_cq[j]);
if (device->rx_cq[j])
ib_destroy_cq(device->rx_cq[j]);
for (i = 0; i < device->cqs_used; i++) {
if (device->tx_cq[i])
ib_destroy_cq(device->tx_cq[i]);
if (device->rx_cq[i])
ib_destroy_cq(device->rx_cq[i]);
}
ib_dealloc_pd(device->pd);
pd_err:
......
......@@ -298,6 +298,7 @@ static int mlx4_HW2SW_MPT(struct mlx4_dev *dev, struct mlx4_cmd_mailbox *mailbox
MLX4_CMD_TIME_CLASS_B, MLX4_CMD_WRAPPED);
}
/* Must protect against concurrent access */
int mlx4_mr_hw_get_mpt(struct mlx4_dev *dev, struct mlx4_mr *mmr,
struct mlx4_mpt_entry ***mpt_entry)
{
......@@ -305,13 +306,10 @@ int mlx4_mr_hw_get_mpt(struct mlx4_dev *dev, struct mlx4_mr *mmr,
int key = key_to_hw_index(mmr->key) & (dev->caps.num_mpts - 1);
struct mlx4_cmd_mailbox *mailbox = NULL;
/* Make sure that at this point we have single-threaded access only */
if (mmr->enabled != MLX4_MPT_EN_HW)
return -EINVAL;
err = mlx4_HW2SW_MPT(dev, NULL, key);
if (err) {
mlx4_warn(dev, "HW2SW_MPT failed (%d).", err);
mlx4_warn(dev, "Most likely the MR has MWs bound to it.\n");
......@@ -333,7 +331,6 @@ int mlx4_mr_hw_get_mpt(struct mlx4_dev *dev, struct mlx4_mr *mmr,
0, MLX4_CMD_QUERY_MPT,
MLX4_CMD_TIME_CLASS_B,
MLX4_CMD_WRAPPED);
if (err)
goto free_mailbox;
......@@ -378,9 +375,10 @@ int mlx4_mr_hw_write_mpt(struct mlx4_dev *dev, struct mlx4_mr *mmr,
err = mlx4_SW2HW_MPT(dev, mailbox, key);
}
mmr->pd = be32_to_cpu((*mpt_entry)->pd_flags) & MLX4_MPT_PD_MASK;
if (!err)
if (!err) {
mmr->pd = be32_to_cpu((*mpt_entry)->pd_flags) & MLX4_MPT_PD_MASK;
mmr->enabled = MLX4_MPT_EN_HW;
}
return err;
}
EXPORT_SYMBOL_GPL(mlx4_mr_hw_write_mpt);
......@@ -400,11 +398,12 @@ EXPORT_SYMBOL_GPL(mlx4_mr_hw_put_mpt);
int mlx4_mr_hw_change_pd(struct mlx4_dev *dev, struct mlx4_mpt_entry *mpt_entry,
u32 pdn)
{
u32 pd_flags = be32_to_cpu(mpt_entry->pd_flags);
u32 pd_flags = be32_to_cpu(mpt_entry->pd_flags) & ~MLX4_MPT_PD_MASK;
/* The wrapper function will put the slave's id here */
if (mlx4_is_mfunc(dev))
pd_flags &= ~MLX4_MPT_PD_VF_MASK;
mpt_entry->pd_flags = cpu_to_be32((pd_flags & ~MLX4_MPT_PD_MASK) |
mpt_entry->pd_flags = cpu_to_be32(pd_flags |
(pdn & MLX4_MPT_PD_MASK)
| MLX4_MPT_PD_FLAG_EN_INV);
return 0;
......@@ -600,14 +599,18 @@ int mlx4_mr_rereg_mem_write(struct mlx4_dev *dev, struct mlx4_mr *mr,
{
int err;
mpt_entry->start = cpu_to_be64(mr->iova);
mpt_entry->length = cpu_to_be64(mr->size);
mpt_entry->entity_size = cpu_to_be32(mr->mtt.page_shift);
mpt_entry->start = cpu_to_be64(iova);
mpt_entry->length = cpu_to_be64(size);
mpt_entry->entity_size = cpu_to_be32(page_shift);
err = mlx4_mtt_init(dev, npages, page_shift, &mr->mtt);
if (err)
return err;
mpt_entry->pd_flags &= cpu_to_be32(MLX4_MPT_PD_MASK |
MLX4_MPT_PD_FLAG_EN_INV);
mpt_entry->flags &= cpu_to_be32(MLX4_MPT_FLAG_FREE |
MLX4_MPT_FLAG_SW_OWNS);
if (mr->mtt.order < 0) {
mpt_entry->flags |= cpu_to_be32(MLX4_MPT_FLAG_PHYSICAL);
mpt_entry->mtt_addr = 0;
......@@ -617,6 +620,14 @@ int mlx4_mr_rereg_mem_write(struct mlx4_dev *dev, struct mlx4_mr *mr,
if (mr->mtt.page_shift == 0)
mpt_entry->mtt_sz = cpu_to_be32(1 << mr->mtt.order);
}
if (mr->mtt.order >= 0 && mr->mtt.page_shift == 0) {
/* fast register MR in free state */
mpt_entry->flags |= cpu_to_be32(MLX4_MPT_FLAG_FREE);
mpt_entry->pd_flags |= cpu_to_be32(MLX4_MPT_PD_FLAG_FAST_REG |
MLX4_MPT_PD_FLAG_RAE);
} else {
mpt_entry->flags |= cpu_to_be32(MLX4_MPT_FLAG_SW_OWNS);
}
mr->enabled = MLX4_MPT_EN_SW;
return 0;
......
......@@ -103,7 +103,8 @@ static int find_index(struct mlx4_dev *dev,
int i;
for (i = 0; i < MLX4_MAX_MAC_NUM; i++) {
if ((mac & MLX4_MAC_MASK) ==
if (table->refs[i] &&
(MLX4_MAC_MASK & mac) ==
(MLX4_MAC_MASK & be64_to_cpu(table->entries[i])))
return i;
}
......@@ -165,12 +166,14 @@ int __mlx4_register_mac(struct mlx4_dev *dev, u8 port, u64 mac)
mutex_lock(&table->mutex);
for (i = 0; i < MLX4_MAX_MAC_NUM; i++) {
if (free < 0 && !table->entries[i]) {
free = i;
if (!table->refs[i]) {
if (free < 0)
free = i;
continue;
}
if (mac == (MLX4_MAC_MASK & be64_to_cpu(table->entries[i]))) {
if ((MLX4_MAC_MASK & mac) ==
(MLX4_MAC_MASK & be64_to_cpu(table->entries[i]))) {
/* MAC already registered, increment ref count */
err = i;
++table->refs[i];
......
......@@ -47,6 +47,7 @@ struct ib_umem {
int writable;
int hugetlb;
struct work_struct work;
struct pid *pid;
struct mm_struct *mm;
unsigned long diff;
struct sg_table sg_head;
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册