提交 7c034dfd 编写于 作者: L Linus Torvalds

Merge tag 'rdma-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/roland/infiniband

Pull InfiniBand/RDMA updates from Roland Dreier:

 - IPoIB fixes from Doug Ledford and Erez Shitrit

 - iSER updates from Sagi Grimberg

 - mlx4 GUID handling changes from Yishai Hadas

 - other misc fixes

* tag 'rdma-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/roland/infiniband: (51 commits)
  mlx5: wrong page mask if CONFIG_ARCH_DMA_ADDR_T_64BIT enabled for 32Bit architectures
  IB/iser: Rewrite bounce buffer code path
  IB/iser: Bump version to 1.6
  IB/iser: Remove code duplication for a single DMA entry
  IB/iser: Pass struct iser_mem_reg to iser_fast_reg_mr and iser_reg_sig_mr
  IB/iser: Modify struct iser_mem_reg members
  IB/iser: Make fastreg pool cache friendly
  IB/iser: Move PI context alloc/free to routines
  IB/iser: Move fastreg descriptor pool get/put to helper functions
  IB/iser: Merge build page-vec into register page-vec
  IB/iser: Get rid of struct iser_rdma_regd
  IB/iser: Remove redundant assignments in iser_reg_page_vec
  IB/iser: Move memory reg/dereg routines to iser_memory.c
  IB/iser: Don't pass ib_device to fall_to_bounce_buff routine
  IB/iser: Remove a redundant struct iser_data_buf
  IB/iser: Remove redundant cmd_data_len calculation
  IB/iser: Fix wrong calculation of protection buffer length
  IB/iser: Handle fastreg/local_inv completion errors
  IB/iser: Fix unload during ep_poll wrong dereference
  ib_srpt: convert printk's to pr_* functions
  ...
......@@ -187,8 +187,10 @@ Check RDMA and NFS Setup
To further test the InfiniBand software stack, use IPoIB (this
assumes you have two IB hosts named host1 and host2):
host1$ ifconfig ib0 a.b.c.x
host2$ ifconfig ib0 a.b.c.y
host1$ ip link set dev ib0 up
host1$ ip address add dev ib0 a.b.c.x
host2$ ip link set dev ib0 up
host2$ ip address add dev ib0 a.b.c.y
host1$ ping a.b.c.y
host2$ ping a.b.c.x
......@@ -229,7 +231,8 @@ NFS/RDMA Setup
$ modprobe ib_mthca
$ modprobe ib_ipoib
$ ifconfig ib0 a.b.c.d
$ ip li set dev ib0 up
$ ip addr add dev ib0 a.b.c.d
NOTE: use unique addresses for the client and server
......
......@@ -8803,6 +8803,15 @@ W: http://www.emulex.com
S: Supported
F: drivers/net/ethernet/emulex/benet/
EMULEX ONECONNECT ROCE DRIVER
M: Selvin Xavier <selvin.xavier@emulex.com>
M: Devesh Sharma <devesh.sharma@emulex.com>
M: Mitesh Ahuja <mitesh.ahuja@emulex.com>
L: linux-rdma@vger.kernel.org
W: http://www.emulex.com
S: Supported
F: drivers/infiniband/hw/ocrdma/
SFC NETWORK DRIVER
M: Solarflare linux maintainers <linux-net-drivers@solarflare.com>
M: Shradha Shah <sshah@solarflare.com>
......
......@@ -99,12 +99,15 @@ struct ib_umem *ib_umem_get(struct ib_ucontext *context, unsigned long addr,
if (dmasync)
dma_set_attr(DMA_ATTR_WRITE_BARRIER, &attrs);
if (!size)
return ERR_PTR(-EINVAL);
/*
* If the combination of the addr and size requested for this memory
* region causes an integer overflow, return error.
*/
if ((PAGE_ALIGN(addr + size) <= size) ||
(PAGE_ALIGN(addr + size) <= addr))
if (((addr + size) < addr) ||
PAGE_ALIGN(addr + size) < (addr + size))
return ERR_PTR(-EINVAL);
if (!can_do_mlock())
......
......@@ -246,6 +246,17 @@ static int ib_uverbs_cleanup_ucontext(struct ib_uverbs_file *file,
kfree(uqp);
}
list_for_each_entry_safe(uobj, tmp, &context->srq_list, list) {
struct ib_srq *srq = uobj->object;
struct ib_uevent_object *uevent =
container_of(uobj, struct ib_uevent_object, uobject);
idr_remove_uobj(&ib_uverbs_srq_idr, uobj);
ib_destroy_srq(srq);
ib_uverbs_release_uevent(file, uevent);
kfree(uevent);
}
list_for_each_entry_safe(uobj, tmp, &context->cq_list, list) {
struct ib_cq *cq = uobj->object;
struct ib_uverbs_event_file *ev_file = cq->cq_context;
......@@ -258,17 +269,6 @@ static int ib_uverbs_cleanup_ucontext(struct ib_uverbs_file *file,
kfree(ucq);
}
list_for_each_entry_safe(uobj, tmp, &context->srq_list, list) {
struct ib_srq *srq = uobj->object;
struct ib_uevent_object *uevent =
container_of(uobj, struct ib_uevent_object, uobject);
idr_remove_uobj(&ib_uverbs_srq_idr, uobj);
ib_destroy_srq(srq);
ib_uverbs_release_uevent(file, uevent);
kfree(uevent);
}
list_for_each_entry_safe(uobj, tmp, &context->mr_list, list) {
struct ib_mr *mr = uobj->object;
......
......@@ -1430,6 +1430,10 @@ static int mlx4_ib_alloc_pv_bufs(struct mlx4_ib_demux_pv_ctx *ctx,
tun_qp->ring[i].addr,
rx_buf_size,
DMA_FROM_DEVICE);
if (ib_dma_mapping_error(ctx->ib_dev, tun_qp->ring[i].map)) {
kfree(tun_qp->ring[i].addr);
goto err;
}
}
for (i = 0; i < MLX4_NUM_TUNNEL_BUFS; i++) {
......@@ -1442,6 +1446,11 @@ static int mlx4_ib_alloc_pv_bufs(struct mlx4_ib_demux_pv_ctx *ctx,
tun_qp->tx_ring[i].buf.addr,
tx_buf_size,
DMA_TO_DEVICE);
if (ib_dma_mapping_error(ctx->ib_dev,
tun_qp->tx_ring[i].buf.map)) {
kfree(tun_qp->tx_ring[i].buf.addr);
goto tx_err;
}
tun_qp->tx_ring[i].ah = NULL;
}
spin_lock_init(&tun_qp->tx_lock);
......
......@@ -66,9 +66,9 @@ MODULE_DESCRIPTION("Mellanox ConnectX HCA InfiniBand driver");
MODULE_LICENSE("Dual BSD/GPL");
MODULE_VERSION(DRV_VERSION);
int mlx4_ib_sm_guid_assign = 1;
int mlx4_ib_sm_guid_assign = 0;
module_param_named(sm_guid_assign, mlx4_ib_sm_guid_assign, int, 0444);
MODULE_PARM_DESC(sm_guid_assign, "Enable SM alias_GUID assignment if sm_guid_assign > 0 (Default: 1)");
MODULE_PARM_DESC(sm_guid_assign, "Enable SM alias_GUID assignment if sm_guid_assign > 0 (Default: 0)");
static const char mlx4_ib_version[] =
DRV_NAME ": Mellanox ConnectX InfiniBand driver v"
......@@ -2791,9 +2791,31 @@ static void mlx4_ib_event(struct mlx4_dev *dev, void *ibdev_ptr,
case MLX4_DEV_EVENT_SLAVE_INIT:
/* here, p is the slave id */
do_slave_init(ibdev, p, 1);
if (mlx4_is_master(dev)) {
int i;
for (i = 1; i <= ibdev->num_ports; i++) {
if (rdma_port_get_link_layer(&ibdev->ib_dev, i)
== IB_LINK_LAYER_INFINIBAND)
mlx4_ib_slave_alias_guid_event(ibdev,
p, i,
1);
}
}
return;
case MLX4_DEV_EVENT_SLAVE_SHUTDOWN:
if (mlx4_is_master(dev)) {
int i;
for (i = 1; i <= ibdev->num_ports; i++) {
if (rdma_port_get_link_layer(&ibdev->ib_dev, i)
== IB_LINK_LAYER_INFINIBAND)
mlx4_ib_slave_alias_guid_event(ibdev,
p, i,
0);
}
}
/* here, p is the slave id */
do_slave_init(ibdev, p, 0);
return;
......
......@@ -342,14 +342,9 @@ struct mlx4_ib_ah {
enum mlx4_guid_alias_rec_status {
MLX4_GUID_INFO_STATUS_IDLE,
MLX4_GUID_INFO_STATUS_SET,
MLX4_GUID_INFO_STATUS_PENDING,
};
enum mlx4_guid_alias_rec_ownership {
MLX4_GUID_DRIVER_ASSIGN,
MLX4_GUID_SYSADMIN_ASSIGN,
MLX4_GUID_NONE_ASSIGN, /*init state of each record*/
};
#define GUID_STATE_NEED_PORT_INIT 0x01
enum mlx4_guid_alias_rec_method {
MLX4_GUID_INFO_RECORD_SET = IB_MGMT_METHOD_SET,
......@@ -360,8 +355,8 @@ struct mlx4_sriov_alias_guid_info_rec_det {
u8 all_recs[GUID_REC_SIZE * NUM_ALIAS_GUID_IN_REC];
ib_sa_comp_mask guid_indexes; /*indicates what from the 8 records are valid*/
enum mlx4_guid_alias_rec_status status; /*indicates the administraively status of the record.*/
u8 method; /*set or delete*/
enum mlx4_guid_alias_rec_ownership ownership; /*indicates who assign that alias_guid record*/
unsigned int guids_retry_schedule[NUM_ALIAS_GUID_IN_REC];
u64 time_to_run;
};
struct mlx4_sriov_alias_guid_port_rec_det {
......@@ -369,6 +364,7 @@ struct mlx4_sriov_alias_guid_port_rec_det {
struct workqueue_struct *wq;
struct delayed_work alias_guid_work;
u8 port;
u32 state_flags;
struct mlx4_sriov_alias_guid *parent;
struct list_head cb_list;
};
......@@ -802,6 +798,8 @@ int add_sysfs_port_mcg_attr(struct mlx4_ib_dev *device, int port_num,
void del_sysfs_port_mcg_attr(struct mlx4_ib_dev *device, int port_num,
struct attribute *attr);
ib_sa_comp_mask mlx4_ib_get_aguid_comp_mask_from_ix(int index);
void mlx4_ib_slave_alias_guid_event(struct mlx4_ib_dev *dev, int slave,
int port, int slave_init);
int mlx4_ib_device_register_sysfs(struct mlx4_ib_dev *device) ;
......
......@@ -566,6 +566,10 @@ static int alloc_proxy_bufs(struct ib_device *dev, struct mlx4_ib_qp *qp)
ib_dma_map_single(dev, qp->sqp_proxy_rcv[i].addr,
sizeof (struct mlx4_ib_proxy_sqp_hdr),
DMA_FROM_DEVICE);
if (ib_dma_mapping_error(dev, qp->sqp_proxy_rcv[i].map)) {
kfree(qp->sqp_proxy_rcv[i].addr);
goto err;
}
}
return 0;
......@@ -2605,8 +2609,7 @@ static int build_lso_seg(struct mlx4_wqe_lso_seg *wqe, struct ib_send_wr *wr,
memcpy(wqe->header, wr->wr.ud.header, wr->wr.ud.hlen);
*lso_hdr_sz = cpu_to_be32((wr->wr.ud.mss - wr->wr.ud.hlen) << 16 |
wr->wr.ud.hlen);
*lso_hdr_sz = cpu_to_be32(wr->wr.ud.mss << 16 | wr->wr.ud.hlen);
*lso_seg_len = halign;
return 0;
}
......
......@@ -46,21 +46,17 @@
static ssize_t show_admin_alias_guid(struct device *dev,
struct device_attribute *attr, char *buf)
{
int record_num;/*0-15*/
int guid_index_in_rec; /*0 - 7*/
struct mlx4_ib_iov_sysfs_attr *mlx4_ib_iov_dentry =
container_of(attr, struct mlx4_ib_iov_sysfs_attr, dentry);
struct mlx4_ib_iov_port *port = mlx4_ib_iov_dentry->ctx;
struct mlx4_ib_dev *mdev = port->dev;
__be64 sysadmin_ag_val;
record_num = mlx4_ib_iov_dentry->entry_num / 8 ;
guid_index_in_rec = mlx4_ib_iov_dentry->entry_num % 8 ;
sysadmin_ag_val = mlx4_get_admin_guid(mdev->dev,
mlx4_ib_iov_dentry->entry_num,
port->num);
return sprintf(buf, "%llx\n",
be64_to_cpu(*(__be64 *)&mdev->sriov.alias_guid.
ports_guid[port->num - 1].
all_rec_per_port[record_num].
all_recs[8 * guid_index_in_rec]));
return sprintf(buf, "%llx\n", be64_to_cpu(sysadmin_ag_val));
}
/* store_admin_alias_guid stores the (new) administratively assigned value of that GUID.
......@@ -80,6 +76,7 @@ static ssize_t store_admin_alias_guid(struct device *dev,
struct mlx4_ib_iov_port *port = mlx4_ib_iov_dentry->ctx;
struct mlx4_ib_dev *mdev = port->dev;
u64 sysadmin_ag_val;
unsigned long flags;
record_num = mlx4_ib_iov_dentry->entry_num / 8;
guid_index_in_rec = mlx4_ib_iov_dentry->entry_num % 8;
......@@ -87,6 +84,7 @@ static ssize_t store_admin_alias_guid(struct device *dev,
pr_err("GUID 0 block 0 is RO\n");
return count;
}
spin_lock_irqsave(&mdev->sriov.alias_guid.ag_work_lock, flags);
sscanf(buf, "%llx", &sysadmin_ag_val);
*(__be64 *)&mdev->sriov.alias_guid.ports_guid[port->num - 1].
all_rec_per_port[record_num].
......@@ -96,33 +94,15 @@ static ssize_t store_admin_alias_guid(struct device *dev,
/* Change the state to be pending for update */
mdev->sriov.alias_guid.ports_guid[port->num - 1].all_rec_per_port[record_num].status
= MLX4_GUID_INFO_STATUS_IDLE ;
mdev->sriov.alias_guid.ports_guid[port->num - 1].all_rec_per_port[record_num].method
= MLX4_GUID_INFO_RECORD_SET;
switch (sysadmin_ag_val) {
case MLX4_GUID_FOR_DELETE_VAL:
mdev->sriov.alias_guid.ports_guid[port->num - 1].all_rec_per_port[record_num].method
= MLX4_GUID_INFO_RECORD_DELETE;
mdev->sriov.alias_guid.ports_guid[port->num - 1].all_rec_per_port[record_num].ownership
= MLX4_GUID_SYSADMIN_ASSIGN;
break;
/* The sysadmin requests the SM to re-assign */
case MLX4_NOT_SET_GUID:
mdev->sriov.alias_guid.ports_guid[port->num - 1].all_rec_per_port[record_num].ownership
= MLX4_GUID_DRIVER_ASSIGN;
break;
/* The sysadmin requests a specific value.*/
default:
mdev->sriov.alias_guid.ports_guid[port->num - 1].all_rec_per_port[record_num].ownership
= MLX4_GUID_SYSADMIN_ASSIGN;
break;
}
mlx4_set_admin_guid(mdev->dev, cpu_to_be64(sysadmin_ag_val),
mlx4_ib_iov_dentry->entry_num,
port->num);
/* set the record index */
mdev->sriov.alias_guid.ports_guid[port->num - 1].all_rec_per_port[record_num].guid_indexes
= mlx4_ib_get_aguid_comp_mask_from_ix(guid_index_in_rec);
|= mlx4_ib_get_aguid_comp_mask_from_ix(guid_index_in_rec);
spin_unlock_irqrestore(&mdev->sriov.alias_guid.ag_work_lock, flags);
mlx4_ib_init_alias_guid_work(mdev, port->num - 1);
return count;
......
......@@ -87,7 +87,6 @@ enum {
IPOIB_FLAG_ADMIN_UP = 2,
IPOIB_PKEY_ASSIGNED = 3,
IPOIB_FLAG_SUBINTERFACE = 5,
IPOIB_MCAST_RUN = 6,
IPOIB_STOP_REAPER = 7,
IPOIB_FLAG_ADMIN_CM = 9,
IPOIB_FLAG_UMCAST = 10,
......@@ -98,9 +97,15 @@ enum {
IPOIB_MCAST_FLAG_FOUND = 0, /* used in set_multicast_list */
IPOIB_MCAST_FLAG_SENDONLY = 1,
IPOIB_MCAST_FLAG_BUSY = 2, /* joining or already joined */
/*
* For IPOIB_MCAST_FLAG_BUSY
* When set, in flight join and mcast->mc is unreliable
* When clear and mcast->mc IS_ERR_OR_NULL, need to restart or
* haven't started yet
* When clear and mcast->mc is valid pointer, join was successful
*/
IPOIB_MCAST_FLAG_BUSY = 2,
IPOIB_MCAST_FLAG_ATTACHED = 3,
IPOIB_MCAST_JOIN_STARTED = 4,
MAX_SEND_CQE = 16,
IPOIB_CM_COPYBREAK = 256,
......@@ -148,6 +153,7 @@ struct ipoib_mcast {
unsigned long created;
unsigned long backoff;
unsigned long delay_until;
unsigned long flags;
unsigned char logcount;
......@@ -292,6 +298,11 @@ struct ipoib_neigh_table {
struct completion deleted;
};
struct ipoib_qp_state_validate {
struct work_struct work;
struct ipoib_dev_priv *priv;
};
/*
* Device private locking: network stack tx_lock protects members used
* in TX fast path, lock protects everything else. lock nests inside
......@@ -317,6 +328,7 @@ struct ipoib_dev_priv {
struct list_head multicast_list;
struct rb_root multicast_tree;
struct workqueue_struct *wq;
struct delayed_work mcast_task;
struct work_struct carrier_on_task;
struct work_struct flush_light;
......@@ -426,11 +438,6 @@ struct ipoib_neigh {
#define IPOIB_UD_MTU(ib_mtu) (ib_mtu - IPOIB_ENCAP_LEN)
#define IPOIB_UD_BUF_SIZE(ib_mtu) (ib_mtu + IB_GRH_BYTES)
static inline int ipoib_ud_need_sg(unsigned int ib_mtu)
{
return IPOIB_UD_BUF_SIZE(ib_mtu) > PAGE_SIZE;
}
void ipoib_neigh_dtor(struct ipoib_neigh *neigh);
static inline void ipoib_neigh_put(struct ipoib_neigh *neigh)
{
......@@ -477,10 +484,10 @@ void ipoib_ib_dev_flush_heavy(struct work_struct *work);
void ipoib_pkey_event(struct work_struct *work);
void ipoib_ib_dev_cleanup(struct net_device *dev);
int ipoib_ib_dev_open(struct net_device *dev, int flush);
int ipoib_ib_dev_open(struct net_device *dev);
int ipoib_ib_dev_up(struct net_device *dev);
int ipoib_ib_dev_down(struct net_device *dev, int flush);
int ipoib_ib_dev_stop(struct net_device *dev, int flush);
int ipoib_ib_dev_down(struct net_device *dev);
int ipoib_ib_dev_stop(struct net_device *dev);
void ipoib_pkey_dev_check_presence(struct net_device *dev);
int ipoib_dev_init(struct net_device *dev, struct ib_device *ca, int port);
......@@ -492,7 +499,7 @@ void ipoib_mcast_send(struct net_device *dev, u8 *daddr, struct sk_buff *skb);
void ipoib_mcast_restart_task(struct work_struct *work);
int ipoib_mcast_start_thread(struct net_device *dev);
int ipoib_mcast_stop_thread(struct net_device *dev, int flush);
int ipoib_mcast_stop_thread(struct net_device *dev);
void ipoib_mcast_dev_down(struct net_device *dev);
void ipoib_mcast_dev_flush(struct net_device *dev);
......
......@@ -474,7 +474,7 @@ static int ipoib_cm_req_handler(struct ib_cm_id *cm_id, struct ib_cm_event *even
}
spin_lock_irq(&priv->lock);
queue_delayed_work(ipoib_workqueue,
queue_delayed_work(priv->wq,
&priv->cm.stale_task, IPOIB_CM_RX_DELAY);
/* Add this entry to passive ids list head, but do not re-add it
* if IB_EVENT_QP_LAST_WQE_REACHED has moved it to flush list. */
......@@ -576,7 +576,7 @@ void ipoib_cm_handle_rx_wc(struct net_device *dev, struct ib_wc *wc)
spin_lock_irqsave(&priv->lock, flags);
list_splice_init(&priv->cm.rx_drain_list, &priv->cm.rx_reap_list);
ipoib_cm_start_rx_drain(priv);
queue_work(ipoib_workqueue, &priv->cm.rx_reap_task);
queue_work(priv->wq, &priv->cm.rx_reap_task);
spin_unlock_irqrestore(&priv->lock, flags);
} else
ipoib_warn(priv, "cm recv completion event with wrid %d (> %d)\n",
......@@ -603,7 +603,7 @@ void ipoib_cm_handle_rx_wc(struct net_device *dev, struct ib_wc *wc)
spin_lock_irqsave(&priv->lock, flags);
list_move(&p->list, &priv->cm.rx_reap_list);
spin_unlock_irqrestore(&priv->lock, flags);
queue_work(ipoib_workqueue, &priv->cm.rx_reap_task);
queue_work(priv->wq, &priv->cm.rx_reap_task);
}
return;
}
......@@ -827,7 +827,7 @@ void ipoib_cm_handle_tx_wc(struct net_device *dev, struct ib_wc *wc)
if (test_and_clear_bit(IPOIB_FLAG_INITIALIZED, &tx->flags)) {
list_move(&tx->list, &priv->cm.reap_list);
queue_work(ipoib_workqueue, &priv->cm.reap_task);
queue_work(priv->wq, &priv->cm.reap_task);
}
clear_bit(IPOIB_FLAG_OPER_UP, &tx->flags);
......@@ -1255,7 +1255,7 @@ static int ipoib_cm_tx_handler(struct ib_cm_id *cm_id,
if (test_and_clear_bit(IPOIB_FLAG_INITIALIZED, &tx->flags)) {
list_move(&tx->list, &priv->cm.reap_list);
queue_work(ipoib_workqueue, &priv->cm.reap_task);
queue_work(priv->wq, &priv->cm.reap_task);
}
spin_unlock_irqrestore(&priv->lock, flags);
......@@ -1284,7 +1284,7 @@ struct ipoib_cm_tx *ipoib_cm_create_tx(struct net_device *dev, struct ipoib_path
tx->dev = dev;
list_add(&tx->list, &priv->cm.start_list);
set_bit(IPOIB_FLAG_INITIALIZED, &tx->flags);
queue_work(ipoib_workqueue, &priv->cm.start_task);
queue_work(priv->wq, &priv->cm.start_task);
return tx;
}
......@@ -1295,7 +1295,7 @@ void ipoib_cm_destroy_tx(struct ipoib_cm_tx *tx)
if (test_and_clear_bit(IPOIB_FLAG_INITIALIZED, &tx->flags)) {
spin_lock_irqsave(&priv->lock, flags);
list_move(&tx->list, &priv->cm.reap_list);
queue_work(ipoib_workqueue, &priv->cm.reap_task);
queue_work(priv->wq, &priv->cm.reap_task);
ipoib_dbg(priv, "Reap connection for gid %pI6\n",
tx->neigh->daddr + 4);
tx->neigh = NULL;
......@@ -1417,7 +1417,7 @@ void ipoib_cm_skb_too_long(struct net_device *dev, struct sk_buff *skb,
skb_queue_tail(&priv->cm.skb_queue, skb);
if (e)
queue_work(ipoib_workqueue, &priv->cm.skb_task);
queue_work(priv->wq, &priv->cm.skb_task);
}
static void ipoib_cm_rx_reap(struct work_struct *work)
......@@ -1450,7 +1450,7 @@ static void ipoib_cm_stale_task(struct work_struct *work)
}
if (!list_empty(&priv->cm.passive_ids))
queue_delayed_work(ipoib_workqueue,
queue_delayed_work(priv->wq,
&priv->cm.stale_task, IPOIB_CM_RX_DELAY);
spin_unlock_irq(&priv->lock);
}
......
......@@ -94,39 +94,9 @@ void ipoib_free_ah(struct kref *kref)
static void ipoib_ud_dma_unmap_rx(struct ipoib_dev_priv *priv,
u64 mapping[IPOIB_UD_RX_SG])
{
if (ipoib_ud_need_sg(priv->max_ib_mtu)) {
ib_dma_unmap_single(priv->ca, mapping[0], IPOIB_UD_HEAD_SIZE,
DMA_FROM_DEVICE);
ib_dma_unmap_page(priv->ca, mapping[1], PAGE_SIZE,
DMA_FROM_DEVICE);
} else
ib_dma_unmap_single(priv->ca, mapping[0],
IPOIB_UD_BUF_SIZE(priv->max_ib_mtu),
DMA_FROM_DEVICE);
}
static void ipoib_ud_skb_put_frags(struct ipoib_dev_priv *priv,
struct sk_buff *skb,
unsigned int length)
{
if (ipoib_ud_need_sg(priv->max_ib_mtu)) {
skb_frag_t *frag = &skb_shinfo(skb)->frags[0];
unsigned int size;
/*
* There is only two buffers needed for max_payload = 4K,
* first buf size is IPOIB_UD_HEAD_SIZE
*/
skb->tail += IPOIB_UD_HEAD_SIZE;
skb->len += length;
size = length - IPOIB_UD_HEAD_SIZE;
skb_frag_size_set(frag, size);
skb->data_len += size;
skb->truesize += PAGE_SIZE;
} else
skb_put(skb, length);
ib_dma_unmap_single(priv->ca, mapping[0],
IPOIB_UD_BUF_SIZE(priv->max_ib_mtu),
DMA_FROM_DEVICE);
}
static int ipoib_ib_post_receive(struct net_device *dev, int id)
......@@ -156,18 +126,11 @@ static struct sk_buff *ipoib_alloc_rx_skb(struct net_device *dev, int id)
struct ipoib_dev_priv *priv = netdev_priv(dev);
struct sk_buff *skb;
int buf_size;
int tailroom;
u64 *mapping;
if (ipoib_ud_need_sg(priv->max_ib_mtu)) {
buf_size = IPOIB_UD_HEAD_SIZE;
tailroom = 128; /* reserve some tailroom for IP/TCP headers */
} else {
buf_size = IPOIB_UD_BUF_SIZE(priv->max_ib_mtu);
tailroom = 0;
}
buf_size = IPOIB_UD_BUF_SIZE(priv->max_ib_mtu);
skb = dev_alloc_skb(buf_size + tailroom + 4);
skb = dev_alloc_skb(buf_size + IPOIB_ENCAP_LEN);
if (unlikely(!skb))
return NULL;
......@@ -184,23 +147,8 @@ static struct sk_buff *ipoib_alloc_rx_skb(struct net_device *dev, int id)
if (unlikely(ib_dma_mapping_error(priv->ca, mapping[0])))
goto error;
if (ipoib_ud_need_sg(priv->max_ib_mtu)) {
struct page *page = alloc_page(GFP_ATOMIC);
if (!page)
goto partial_error;
skb_fill_page_desc(skb, 0, page, 0, PAGE_SIZE);
mapping[1] =
ib_dma_map_page(priv->ca, page,
0, PAGE_SIZE, DMA_FROM_DEVICE);
if (unlikely(ib_dma_mapping_error(priv->ca, mapping[1])))
goto partial_error;
}
priv->rx_ring[id].skb = skb;
return skb;
partial_error:
ib_dma_unmap_single(priv->ca, mapping[0], buf_size, DMA_FROM_DEVICE);
error:
dev_kfree_skb_any(skb);
return NULL;
......@@ -278,7 +226,8 @@ static void ipoib_ib_handle_rx_wc(struct net_device *dev, struct ib_wc *wc)
wc->byte_len, wc->slid);
ipoib_ud_dma_unmap_rx(priv, mapping);
ipoib_ud_skb_put_frags(priv, skb, wc->byte_len);
skb_put(skb, wc->byte_len);
/* First byte of dgid signals multicast when 0xff */
dgid = &((struct ib_grh *)skb->data)->dgid;
......@@ -296,6 +245,8 @@ static void ipoib_ib_handle_rx_wc(struct net_device *dev, struct ib_wc *wc)
skb_reset_mac_header(skb);
skb_pull(skb, IPOIB_ENCAP_LEN);
skb->truesize = SKB_TRUESIZE(skb->len);
++dev->stats.rx_packets;
dev->stats.rx_bytes += skb->len;
......@@ -376,6 +327,51 @@ static void ipoib_dma_unmap_tx(struct ib_device *ca,
}
}
/*
* As the result of a completion error the QP Can be transferred to SQE states.
* The function checks if the (send)QP is in SQE state and
* moves it back to RTS state, that in order to have it functional again.
*/
static void ipoib_qp_state_validate_work(struct work_struct *work)
{
struct ipoib_qp_state_validate *qp_work =
container_of(work, struct ipoib_qp_state_validate, work);
struct ipoib_dev_priv *priv = qp_work->priv;
struct ib_qp_attr qp_attr;
struct ib_qp_init_attr query_init_attr;
int ret;
ret = ib_query_qp(priv->qp, &qp_attr, IB_QP_STATE, &query_init_attr);
if (ret) {
ipoib_warn(priv, "%s: Failed to query QP ret: %d\n",
__func__, ret);
goto free_res;
}
pr_info("%s: QP: 0x%x is in state: %d\n",
__func__, priv->qp->qp_num, qp_attr.qp_state);
/* currently support only in SQE->RTS transition*/
if (qp_attr.qp_state == IB_QPS_SQE) {
qp_attr.qp_state = IB_QPS_RTS;
ret = ib_modify_qp(priv->qp, &qp_attr, IB_QP_STATE);
if (ret) {
pr_warn("failed(%d) modify QP:0x%x SQE->RTS\n",
ret, priv->qp->qp_num);
goto free_res;
}
pr_info("%s: QP: 0x%x moved from IB_QPS_SQE to IB_QPS_RTS\n",
__func__, priv->qp->qp_num);
} else {
pr_warn("QP (%d) will stay in state: %d\n",
priv->qp->qp_num, qp_attr.qp_state);
}
free_res:
kfree(qp_work);
}
static void ipoib_ib_handle_tx_wc(struct net_device *dev, struct ib_wc *wc)
{
struct ipoib_dev_priv *priv = netdev_priv(dev);
......@@ -407,10 +403,22 @@ static void ipoib_ib_handle_tx_wc(struct net_device *dev, struct ib_wc *wc)
netif_wake_queue(dev);
if (wc->status != IB_WC_SUCCESS &&
wc->status != IB_WC_WR_FLUSH_ERR)
wc->status != IB_WC_WR_FLUSH_ERR) {
struct ipoib_qp_state_validate *qp_work;
ipoib_warn(priv, "failed send event "
"(status=%d, wrid=%d vend_err %x)\n",
wc->status, wr_id, wc->vendor_err);
qp_work = kzalloc(sizeof(*qp_work), GFP_ATOMIC);
if (!qp_work) {
ipoib_warn(priv, "%s Failed alloc ipoib_qp_state_validate for qp: 0x%x\n",
__func__, priv->qp->qp_num);
return;
}
INIT_WORK(&qp_work->work, ipoib_qp_state_validate_work);
qp_work->priv = priv;
queue_work(priv->wq, &qp_work->work);
}
}
static int poll_tx(struct ipoib_dev_priv *priv)
......@@ -655,16 +663,33 @@ void ipoib_reap_ah(struct work_struct *work)
__ipoib_reap_ah(dev);
if (!test_bit(IPOIB_STOP_REAPER, &priv->flags))
queue_delayed_work(ipoib_workqueue, &priv->ah_reap_task,
queue_delayed_work(priv->wq, &priv->ah_reap_task,
round_jiffies_relative(HZ));
}
static void ipoib_flush_ah(struct net_device *dev)
{
struct ipoib_dev_priv *priv = netdev_priv(dev);
cancel_delayed_work(&priv->ah_reap_task);
flush_workqueue(priv->wq);
ipoib_reap_ah(&priv->ah_reap_task.work);
}
static void ipoib_stop_ah(struct net_device *dev)
{
struct ipoib_dev_priv *priv = netdev_priv(dev);
set_bit(IPOIB_STOP_REAPER, &priv->flags);
ipoib_flush_ah(dev);
}
static void ipoib_ib_tx_timer_func(unsigned long ctx)
{
drain_tx_cq((struct net_device *)ctx);
}
int ipoib_ib_dev_open(struct net_device *dev, int flush)
int ipoib_ib_dev_open(struct net_device *dev)
{
struct ipoib_dev_priv *priv = netdev_priv(dev);
int ret;
......@@ -696,7 +721,7 @@ int ipoib_ib_dev_open(struct net_device *dev, int flush)
}
clear_bit(IPOIB_STOP_REAPER, &priv->flags);
queue_delayed_work(ipoib_workqueue, &priv->ah_reap_task,
queue_delayed_work(priv->wq, &priv->ah_reap_task,
round_jiffies_relative(HZ));
if (!test_and_set_bit(IPOIB_FLAG_INITIALIZED, &priv->flags))
......@@ -706,7 +731,7 @@ int ipoib_ib_dev_open(struct net_device *dev, int flush)
dev_stop:
if (!test_and_set_bit(IPOIB_FLAG_INITIALIZED, &priv->flags))
napi_enable(&priv->napi);
ipoib_ib_dev_stop(dev, flush);
ipoib_ib_dev_stop(dev);
return -1;
}
......@@ -738,7 +763,7 @@ int ipoib_ib_dev_up(struct net_device *dev)
return ipoib_mcast_start_thread(dev);
}
int ipoib_ib_dev_down(struct net_device *dev, int flush)
int ipoib_ib_dev_down(struct net_device *dev)
{
struct ipoib_dev_priv *priv = netdev_priv(dev);
......@@ -747,7 +772,7 @@ int ipoib_ib_dev_down(struct net_device *dev, int flush)
clear_bit(IPOIB_FLAG_OPER_UP, &priv->flags);
netif_carrier_off(dev);
ipoib_mcast_stop_thread(dev, flush);
ipoib_mcast_stop_thread(dev);
ipoib_mcast_dev_flush(dev);
ipoib_flush_paths(dev);
......@@ -807,7 +832,7 @@ void ipoib_drain_cq(struct net_device *dev)
local_bh_enable();
}
int ipoib_ib_dev_stop(struct net_device *dev, int flush)
int ipoib_ib_dev_stop(struct net_device *dev)
{
struct ipoib_dev_priv *priv = netdev_priv(dev);
struct ib_qp_attr qp_attr;
......@@ -877,24 +902,7 @@ int ipoib_ib_dev_stop(struct net_device *dev, int flush)
if (ib_modify_qp(priv->qp, &qp_attr, IB_QP_STATE))
ipoib_warn(priv, "Failed to modify QP to RESET state\n");
/* Wait for all AHs to be reaped */
set_bit(IPOIB_STOP_REAPER, &priv->flags);
cancel_delayed_work(&priv->ah_reap_task);
if (flush)
flush_workqueue(ipoib_workqueue);
begin = jiffies;
while (!list_empty(&priv->dead_ahs)) {
__ipoib_reap_ah(dev);
if (time_after(jiffies, begin + HZ)) {
ipoib_warn(priv, "timing out; will leak address handles\n");
break;
}
msleep(1);
}
ipoib_flush_ah(dev);
ib_req_notify_cq(priv->recv_cq, IB_CQ_NEXT_COMP);
......@@ -918,7 +926,7 @@ int ipoib_ib_dev_init(struct net_device *dev, struct ib_device *ca, int port)
(unsigned long) dev);
if (dev->flags & IFF_UP) {
if (ipoib_ib_dev_open(dev, 1)) {
if (ipoib_ib_dev_open(dev)) {
ipoib_transport_dev_cleanup(dev);
return -ENODEV;
}
......@@ -1037,15 +1045,16 @@ static void __ipoib_ib_dev_flush(struct ipoib_dev_priv *priv,
if (level == IPOIB_FLUSH_LIGHT) {
ipoib_mark_paths_invalid(dev);
ipoib_mcast_dev_flush(dev);
ipoib_flush_ah(dev);
}
if (level >= IPOIB_FLUSH_NORMAL)
ipoib_ib_dev_down(dev, 0);
ipoib_ib_dev_down(dev);
if (level == IPOIB_FLUSH_HEAVY) {
if (test_bit(IPOIB_FLAG_INITIALIZED, &priv->flags))
ipoib_ib_dev_stop(dev, 0);
if (ipoib_ib_dev_open(dev, 0) != 0)
ipoib_ib_dev_stop(dev);
if (ipoib_ib_dev_open(dev) != 0)
return;
if (netif_queue_stopped(dev))
netif_start_queue(dev);
......@@ -1097,9 +1106,17 @@ void ipoib_ib_dev_cleanup(struct net_device *dev)
*/
ipoib_flush_paths(dev);
ipoib_mcast_stop_thread(dev, 1);
ipoib_mcast_stop_thread(dev);
ipoib_mcast_dev_flush(dev);
/*
* All of our ah references aren't free until after
* ipoib_mcast_dev_flush(), ipoib_flush_paths, and
* the neighbor garbage collection is stopped and reaped.
* That should all be done now, so make a final ah flush.
*/
ipoib_stop_ah(dev);
ipoib_transport_dev_cleanup(dev);
}
......
......@@ -108,7 +108,7 @@ int ipoib_open(struct net_device *dev)
set_bit(IPOIB_FLAG_ADMIN_UP, &priv->flags);
if (ipoib_ib_dev_open(dev, 1)) {
if (ipoib_ib_dev_open(dev)) {
if (!test_bit(IPOIB_PKEY_ASSIGNED, &priv->flags))
return 0;
goto err_disable;
......@@ -139,7 +139,7 @@ int ipoib_open(struct net_device *dev)
return 0;
err_stop:
ipoib_ib_dev_stop(dev, 1);
ipoib_ib_dev_stop(dev);
err_disable:
clear_bit(IPOIB_FLAG_ADMIN_UP, &priv->flags);
......@@ -157,8 +157,8 @@ static int ipoib_stop(struct net_device *dev)
netif_stop_queue(dev);
ipoib_ib_dev_down(dev, 1);
ipoib_ib_dev_stop(dev, 0);
ipoib_ib_dev_down(dev);
ipoib_ib_dev_stop(dev);
if (!test_bit(IPOIB_FLAG_SUBINTERFACE, &priv->flags)) {
struct ipoib_dev_priv *cpriv;
......@@ -640,8 +640,10 @@ static void neigh_add_path(struct sk_buff *skb, u8 *daddr,
if (!path->query && path_rec_start(dev, path))
goto err_path;
__skb_queue_tail(&neigh->queue, skb);
if (skb_queue_len(&neigh->queue) < IPOIB_MAX_PATH_REC_QUEUE)
__skb_queue_tail(&neigh->queue, skb);
else
goto err_drop;
}
spin_unlock_irqrestore(&priv->lock, flags);
......@@ -676,7 +678,12 @@ static void unicast_arp_send(struct sk_buff *skb, struct net_device *dev,
new_path = 1;
}
if (path) {
__skb_queue_tail(&path->queue, skb);
if (skb_queue_len(&path->queue) < IPOIB_MAX_PATH_REC_QUEUE) {
__skb_queue_tail(&path->queue, skb);
} else {
++dev->stats.tx_dropped;
dev_kfree_skb_any(skb);
}
if (!path->query && path_rec_start(dev, path)) {
spin_unlock_irqrestore(&priv->lock, flags);
......@@ -839,7 +846,7 @@ static void ipoib_set_mcast_list(struct net_device *dev)
return;
}
queue_work(ipoib_workqueue, &priv->restart_task);
queue_work(priv->wq, &priv->restart_task);
}
static int ipoib_get_iflink(const struct net_device *dev)
......@@ -966,7 +973,7 @@ static void ipoib_reap_neigh(struct work_struct *work)
__ipoib_reap_neigh(priv);
if (!test_bit(IPOIB_STOP_NEIGH_GC, &priv->flags))
queue_delayed_work(ipoib_workqueue, &priv->neigh_reap_task,
queue_delayed_work(priv->wq, &priv->neigh_reap_task,
arp_tbl.gc_interval);
}
......@@ -1145,7 +1152,7 @@ static int ipoib_neigh_hash_init(struct ipoib_dev_priv *priv)
/* start garbage collection */
clear_bit(IPOIB_STOP_NEIGH_GC, &priv->flags);
queue_delayed_work(ipoib_workqueue, &priv->neigh_reap_task,
queue_delayed_work(priv->wq, &priv->neigh_reap_task,
arp_tbl.gc_interval);
return 0;
......@@ -1274,15 +1281,13 @@ int ipoib_dev_init(struct net_device *dev, struct ib_device *ca, int port)
{
struct ipoib_dev_priv *priv = netdev_priv(dev);
if (ipoib_neigh_hash_init(priv) < 0)
goto out;
/* Allocate RX/TX "rings" to hold queued skbs */
priv->rx_ring = kzalloc(ipoib_recvq_size * sizeof *priv->rx_ring,
GFP_KERNEL);
if (!priv->rx_ring) {
printk(KERN_WARNING "%s: failed to allocate RX ring (%d entries)\n",
ca->name, ipoib_recvq_size);
goto out_neigh_hash_cleanup;
goto out;
}
priv->tx_ring = vzalloc(ipoib_sendq_size * sizeof *priv->tx_ring);
......@@ -1297,16 +1302,24 @@ int ipoib_dev_init(struct net_device *dev, struct ib_device *ca, int port)
if (ipoib_ib_dev_init(dev, ca, port))
goto out_tx_ring_cleanup;
/*
* Must be after ipoib_ib_dev_init so we can allocate a per
* device wq there and use it here
*/
if (ipoib_neigh_hash_init(priv) < 0)
goto out_dev_uninit;
return 0;
out_dev_uninit:
ipoib_ib_dev_cleanup(dev);
out_tx_ring_cleanup:
vfree(priv->tx_ring);
out_rx_ring_cleanup:
kfree(priv->rx_ring);
out_neigh_hash_cleanup:
ipoib_neigh_hash_uninit(dev);
out:
return -ENOMEM;
}
......@@ -1329,6 +1342,12 @@ void ipoib_dev_cleanup(struct net_device *dev)
}
unregister_netdevice_many(&head);
/*
* Must be before ipoib_ib_dev_cleanup or we delete an in use
* work queue
*/
ipoib_neigh_hash_uninit(dev);
ipoib_ib_dev_cleanup(dev);
kfree(priv->rx_ring);
......@@ -1336,8 +1355,6 @@ void ipoib_dev_cleanup(struct net_device *dev)
priv->rx_ring = NULL;
priv->tx_ring = NULL;
ipoib_neigh_hash_uninit(dev);
}
static const struct header_ops ipoib_header_ops = {
......@@ -1646,10 +1663,11 @@ static struct net_device *ipoib_add_port(const char *format,
register_failed:
ib_unregister_event_handler(&priv->event_handler);
flush_workqueue(ipoib_workqueue);
/* Stop GC if started before flush */
set_bit(IPOIB_STOP_NEIGH_GC, &priv->flags);
cancel_delayed_work(&priv->neigh_reap_task);
flush_workqueue(ipoib_workqueue);
flush_workqueue(priv->wq);
event_failed:
ipoib_dev_cleanup(priv->dev);
......@@ -1712,6 +1730,7 @@ static void ipoib_remove_one(struct ib_device *device)
list_for_each_entry_safe(priv, tmp, dev_list, list) {
ib_unregister_event_handler(&priv->event_handler);
flush_workqueue(ipoib_workqueue);
rtnl_lock();
dev_change_flags(priv->dev, priv->dev->flags & ~IFF_UP);
......@@ -1720,7 +1739,7 @@ static void ipoib_remove_one(struct ib_device *device)
/* Stop GC */
set_bit(IPOIB_STOP_NEIGH_GC, &priv->flags);
cancel_delayed_work(&priv->neigh_reap_task);
flush_workqueue(ipoib_workqueue);
flush_workqueue(priv->wq);
unregister_netdev(priv->dev);
free_netdev(priv->dev);
......@@ -1755,14 +1774,16 @@ static int __init ipoib_init_module(void)
return ret;
/*
* We create our own workqueue mainly because we want to be
* able to flush it when devices are being removed. We can't
* use schedule_work()/flush_scheduled_work() because both
* unregister_netdev() and linkwatch_event take the rtnl lock,
* so flush_scheduled_work() can deadlock during device
* removal.
* We create a global workqueue here that is used for all flush
* operations. However, if you attempt to flush a workqueue
* from a task on that same workqueue, it deadlocks the system.
* We want to be able to flush the tasks associated with a
* specific net device, so we also create a workqueue for each
* netdevice. We queue up the tasks for that device only on
* its private workqueue, and we only queue up flush events
* on our global flush workqueue. This avoids the deadlocks.
*/
ipoib_workqueue = create_singlethread_workqueue("ipoib");
ipoib_workqueue = create_singlethread_workqueue("ipoib_flush");
if (!ipoib_workqueue) {
ret = -ENOMEM;
goto err_fs;
......
......@@ -157,6 +157,16 @@ int ipoib_transport_dev_init(struct net_device *dev, struct ib_device *ca)
goto out_free_pd;
}
/*
* the various IPoIB tasks assume they will never race against
* themselves, so always use a single thread workqueue
*/
priv->wq = create_singlethread_workqueue("ipoib_wq");
if (!priv->wq) {
printk(KERN_WARNING "ipoib: failed to allocate device WQ\n");
goto out_free_mr;
}
size = ipoib_recvq_size + 1;
ret = ipoib_cm_dev_init(dev);
if (!ret) {
......@@ -165,12 +175,13 @@ int ipoib_transport_dev_init(struct net_device *dev, struct ib_device *ca)
size += ipoib_recvq_size + 1; /* 1 extra for rx_drain_qp */
else
size += ipoib_recvq_size * ipoib_max_conn_qp;
}
} else
goto out_free_wq;
priv->recv_cq = ib_create_cq(priv->ca, ipoib_ib_completion, NULL, dev, size, 0);
if (IS_ERR(priv->recv_cq)) {
printk(KERN_WARNING "%s: failed to create receive CQ\n", ca->name);
goto out_free_mr;
goto out_cm_dev_cleanup;
}
priv->send_cq = ib_create_cq(priv->ca, ipoib_send_comp_handler, NULL,
......@@ -216,15 +227,10 @@ int ipoib_transport_dev_init(struct net_device *dev, struct ib_device *ca)
priv->tx_wr.send_flags = IB_SEND_SIGNALED;
priv->rx_sge[0].lkey = priv->mr->lkey;
if (ipoib_ud_need_sg(priv->max_ib_mtu)) {
priv->rx_sge[0].length = IPOIB_UD_HEAD_SIZE;
priv->rx_sge[1].length = PAGE_SIZE;
priv->rx_sge[1].lkey = priv->mr->lkey;
priv->rx_wr.num_sge = IPOIB_UD_RX_SG;
} else {
priv->rx_sge[0].length = IPOIB_UD_BUF_SIZE(priv->max_ib_mtu);
priv->rx_wr.num_sge = 1;
}
priv->rx_sge[0].length = IPOIB_UD_BUF_SIZE(priv->max_ib_mtu);
priv->rx_wr.num_sge = 1;
priv->rx_wr.next = NULL;
priv->rx_wr.sg_list = priv->rx_sge;
......@@ -236,12 +242,19 @@ int ipoib_transport_dev_init(struct net_device *dev, struct ib_device *ca)
out_free_recv_cq:
ib_destroy_cq(priv->recv_cq);
out_cm_dev_cleanup:
ipoib_cm_dev_cleanup(dev);
out_free_wq:
destroy_workqueue(priv->wq);
priv->wq = NULL;
out_free_mr:
ib_dereg_mr(priv->mr);
ipoib_cm_dev_cleanup(dev);
out_free_pd:
ib_dealloc_pd(priv->pd);
return -ENODEV;
}
......@@ -265,11 +278,18 @@ void ipoib_transport_dev_cleanup(struct net_device *dev)
ipoib_cm_dev_cleanup(dev);
if (priv->wq) {
flush_workqueue(priv->wq);
destroy_workqueue(priv->wq);
priv->wq = NULL;
}
if (ib_dereg_mr(priv->mr))
ipoib_warn(priv, "ib_dereg_mr failed\n");
if (ib_dealloc_pd(priv->pd))
ipoib_warn(priv, "ib_dealloc_pd failed\n");
}
void ipoib_event(struct ib_event_handler *handler,
......
......@@ -69,7 +69,7 @@
#define DRV_NAME "iser"
#define PFX DRV_NAME ": "
#define DRV_VER "1.5"
#define DRV_VER "1.6"
#define iser_dbg(fmt, arg...) \
do { \
......@@ -218,22 +218,21 @@ enum iser_data_dir {
/**
* struct iser_data_buf - iSER data buffer
*
* @buf: pointer to the sg list
* @sg: pointer to the sg list
* @size: num entries of this sg
* @data_len: total beffer byte len
* @dma_nents: returned by dma_map_sg
* @copy_buf: allocated copy buf for SGs unaligned
* for rdma which are copied
* @sg_single: SG-ified clone of a non SG SC or
* unaligned SG
* @orig_sg: pointer to the original sg list (in case
* we used a copy)
* @orig_size: num entris of orig sg list
*/
struct iser_data_buf {
void *buf;
struct scatterlist *sg;
unsigned int size;
unsigned long data_len;
unsigned int dma_nents;
char *copy_buf;
struct scatterlist sg_single;
struct scatterlist *orig_sg;
unsigned int orig_size;
};
/* fwd declarations */
......@@ -244,35 +243,14 @@ struct iscsi_endpoint;
/**
* struct iser_mem_reg - iSER memory registration info
*
* @lkey: MR local key
* @rkey: MR remote key
* @va: MR start address (buffer va)
* @len: MR length
* @sge: memory region sg element
* @rkey: memory region remote key
* @mem_h: pointer to registration context (FMR/Fastreg)
*/
struct iser_mem_reg {
u32 lkey;
u32 rkey;
u64 va;
u64 len;
void *mem_h;
};
/**
* struct iser_regd_buf - iSER buffer registration desc
*
* @reg: memory registration info
* @virt_addr: virtual address of buffer
* @device: reference to iser device
* @direction: dma direction (for dma_unmap)
* @data_size: data buffer size in bytes
*/
struct iser_regd_buf {
struct iser_mem_reg reg;
void *virt_addr;
struct iser_device *device;
enum dma_data_direction direction;
unsigned int data_size;
struct ib_sge sge;
u32 rkey;
void *mem_h;
};
enum iser_desc_type {
......@@ -534,11 +512,9 @@ struct iser_conn {
* @sc: link to scsi command
* @command_sent: indicate if command was sent
* @dir: iser data direction
* @rdma_regd: task rdma registration desc
* @rdma_reg: task rdma registration desc
* @data: iser data buffer desc
* @data_copy: iser data copy buffer desc (bounce buffer)
* @prot: iser protection buffer desc
* @prot_copy: iser protection copy buffer desc (bounce buffer)
*/
struct iscsi_iser_task {
struct iser_tx_desc desc;
......@@ -547,11 +523,9 @@ struct iscsi_iser_task {
struct scsi_cmnd *sc;
int command_sent;
int dir[ISER_DIRS_NUM];
struct iser_regd_buf rdma_regd[ISER_DIRS_NUM];
struct iser_mem_reg rdma_reg[ISER_DIRS_NUM];
struct iser_data_buf data[ISER_DIRS_NUM];
struct iser_data_buf data_copy[ISER_DIRS_NUM];
struct iser_data_buf prot[ISER_DIRS_NUM];
struct iser_data_buf prot_copy[ISER_DIRS_NUM];
};
struct iser_page_vec {
......@@ -621,7 +595,6 @@ void iser_free_rx_descriptors(struct iser_conn *iser_conn);
void iser_finalize_rdma_unaligned_sg(struct iscsi_iser_task *iser_task,
struct iser_data_buf *mem,
struct iser_data_buf *mem_copy,
enum iser_data_dir cmd_dir);
int iser_reg_rdma_mem_fmr(struct iscsi_iser_task *task,
......@@ -634,10 +607,6 @@ int iser_connect(struct iser_conn *iser_conn,
struct sockaddr *dst_addr,
int non_blocking);
int iser_reg_page_vec(struct ib_conn *ib_conn,
struct iser_page_vec *page_vec,
struct iser_mem_reg *mem_reg);
void iser_unreg_mem_fmr(struct iscsi_iser_task *iser_task,
enum iser_data_dir cmd_dir);
void iser_unreg_mem_fastreg(struct iscsi_iser_task *iser_task,
......@@ -667,4 +636,9 @@ int iser_create_fastreg_pool(struct ib_conn *ib_conn, unsigned cmds_max);
void iser_free_fastreg_pool(struct ib_conn *ib_conn);
u8 iser_check_task_pi_status(struct iscsi_iser_task *iser_task,
enum iser_data_dir cmd_dir, sector_t *sector);
struct fast_reg_descriptor *
iser_reg_desc_get(struct ib_conn *ib_conn);
void
iser_reg_desc_put(struct ib_conn *ib_conn,
struct fast_reg_descriptor *desc);
#endif
......@@ -50,7 +50,7 @@ static int iser_prepare_read_cmd(struct iscsi_task *task)
{
struct iscsi_iser_task *iser_task = task->dd_data;
struct iser_device *device = iser_task->iser_conn->ib_conn.device;
struct iser_regd_buf *regd_buf;
struct iser_mem_reg *mem_reg;
int err;
struct iser_hdr *hdr = &iser_task->desc.iser_header;
struct iser_data_buf *buf_in = &iser_task->data[ISER_DIR_IN];
......@@ -78,15 +78,15 @@ static int iser_prepare_read_cmd(struct iscsi_task *task)
iser_err("Failed to set up Data-IN RDMA\n");
return err;
}
regd_buf = &iser_task->rdma_regd[ISER_DIR_IN];
mem_reg = &iser_task->rdma_reg[ISER_DIR_IN];
hdr->flags |= ISER_RSV;
hdr->read_stag = cpu_to_be32(regd_buf->reg.rkey);
hdr->read_va = cpu_to_be64(regd_buf->reg.va);
hdr->read_stag = cpu_to_be32(mem_reg->rkey);
hdr->read_va = cpu_to_be64(mem_reg->sge.addr);
iser_dbg("Cmd itt:%d READ tags RKEY:%#.4X VA:%#llX\n",
task->itt, regd_buf->reg.rkey,
(unsigned long long)regd_buf->reg.va);
task->itt, mem_reg->rkey,
(unsigned long long)mem_reg->sge.addr);
return 0;
}
......@@ -104,7 +104,7 @@ iser_prepare_write_cmd(struct iscsi_task *task,
{
struct iscsi_iser_task *iser_task = task->dd_data;
struct iser_device *device = iser_task->iser_conn->ib_conn.device;
struct iser_regd_buf *regd_buf;
struct iser_mem_reg *mem_reg;
int err;
struct iser_hdr *hdr = &iser_task->desc.iser_header;
struct iser_data_buf *buf_out = &iser_task->data[ISER_DIR_OUT];
......@@ -134,25 +134,25 @@ iser_prepare_write_cmd(struct iscsi_task *task,
return err;
}
regd_buf = &iser_task->rdma_regd[ISER_DIR_OUT];
mem_reg = &iser_task->rdma_reg[ISER_DIR_OUT];
if (unsol_sz < edtl) {
hdr->flags |= ISER_WSV;
hdr->write_stag = cpu_to_be32(regd_buf->reg.rkey);
hdr->write_va = cpu_to_be64(regd_buf->reg.va + unsol_sz);
hdr->write_stag = cpu_to_be32(mem_reg->rkey);
hdr->write_va = cpu_to_be64(mem_reg->sge.addr + unsol_sz);
iser_dbg("Cmd itt:%d, WRITE tags, RKEY:%#.4X "
"VA:%#llX + unsol:%d\n",
task->itt, regd_buf->reg.rkey,
(unsigned long long)regd_buf->reg.va, unsol_sz);
task->itt, mem_reg->rkey,
(unsigned long long)mem_reg->sge.addr, unsol_sz);
}
if (imm_sz > 0) {
iser_dbg("Cmd itt:%d, WRITE, adding imm.data sz: %d\n",
task->itt, imm_sz);
tx_dsg->addr = regd_buf->reg.va;
tx_dsg->addr = mem_reg->sge.addr;
tx_dsg->length = imm_sz;
tx_dsg->lkey = regd_buf->reg.lkey;
tx_dsg->lkey = mem_reg->sge.lkey;
iser_task->desc.num_sge = 2;
}
......@@ -401,16 +401,16 @@ int iser_send_command(struct iscsi_conn *conn,
}
if (scsi_sg_count(sc)) { /* using a scatter list */
data_buf->buf = scsi_sglist(sc);
data_buf->sg = scsi_sglist(sc);
data_buf->size = scsi_sg_count(sc);
}
data_buf->data_len = scsi_bufflen(sc);
if (scsi_prot_sg_count(sc)) {
prot_buf->buf = scsi_prot_sglist(sc);
prot_buf->sg = scsi_prot_sglist(sc);
prot_buf->size = scsi_prot_sg_count(sc);
prot_buf->data_len = data_buf->data_len >>
ilog2(sc->device->sector_size) * 8;
prot_buf->data_len = (data_buf->data_len >>
ilog2(sc->device->sector_size)) * 8;
}
if (hdr->flags & ISCSI_FLAG_CMD_READ) {
......@@ -450,7 +450,7 @@ int iser_send_data_out(struct iscsi_conn *conn,
struct iser_conn *iser_conn = conn->dd_data;
struct iscsi_iser_task *iser_task = task->dd_data;
struct iser_tx_desc *tx_desc = NULL;
struct iser_regd_buf *regd_buf;
struct iser_mem_reg *mem_reg;
unsigned long buf_offset;
unsigned long data_seg_len;
uint32_t itt;
......@@ -477,11 +477,11 @@ int iser_send_data_out(struct iscsi_conn *conn,
/* build the tx desc */
iser_initialize_task_headers(task, tx_desc);
regd_buf = &iser_task->rdma_regd[ISER_DIR_OUT];
mem_reg = &iser_task->rdma_reg[ISER_DIR_OUT];
tx_dsg = &tx_desc->tx_sg[1];
tx_dsg->addr = regd_buf->reg.va + buf_offset;
tx_dsg->length = data_seg_len;
tx_dsg->lkey = regd_buf->reg.lkey;
tx_dsg->addr = mem_reg->sge.addr + buf_offset;
tx_dsg->length = data_seg_len;
tx_dsg->lkey = mem_reg->sge.lkey;
tx_desc->num_sge = 2;
if (buf_offset + data_seg_len > iser_task->data[ISER_DIR_OUT].data_len) {
......@@ -658,10 +658,10 @@ void iser_task_rdma_init(struct iscsi_iser_task *iser_task)
iser_task->prot[ISER_DIR_IN].data_len = 0;
iser_task->prot[ISER_DIR_OUT].data_len = 0;
memset(&iser_task->rdma_regd[ISER_DIR_IN], 0,
sizeof(struct iser_regd_buf));
memset(&iser_task->rdma_regd[ISER_DIR_OUT], 0,
sizeof(struct iser_regd_buf));
memset(&iser_task->rdma_reg[ISER_DIR_IN], 0,
sizeof(struct iser_mem_reg));
memset(&iser_task->rdma_reg[ISER_DIR_OUT], 0,
sizeof(struct iser_mem_reg));
}
void iser_task_rdma_finalize(struct iscsi_iser_task *iser_task)
......@@ -674,35 +674,31 @@ void iser_task_rdma_finalize(struct iscsi_iser_task *iser_task)
/* if we were reading, copy back to unaligned sglist,
* anyway dma_unmap and free the copy
*/
if (iser_task->data_copy[ISER_DIR_IN].copy_buf != NULL) {
if (iser_task->data[ISER_DIR_IN].orig_sg) {
is_rdma_data_aligned = 0;
iser_finalize_rdma_unaligned_sg(iser_task,
&iser_task->data[ISER_DIR_IN],
&iser_task->data_copy[ISER_DIR_IN],
ISER_DIR_IN);
}
if (iser_task->data_copy[ISER_DIR_OUT].copy_buf != NULL) {
if (iser_task->data[ISER_DIR_OUT].orig_sg) {
is_rdma_data_aligned = 0;
iser_finalize_rdma_unaligned_sg(iser_task,
&iser_task->data[ISER_DIR_OUT],
&iser_task->data_copy[ISER_DIR_OUT],
ISER_DIR_OUT);
}
if (iser_task->prot_copy[ISER_DIR_IN].copy_buf != NULL) {
if (iser_task->prot[ISER_DIR_IN].orig_sg) {
is_rdma_prot_aligned = 0;
iser_finalize_rdma_unaligned_sg(iser_task,
&iser_task->prot[ISER_DIR_IN],
&iser_task->prot_copy[ISER_DIR_IN],
ISER_DIR_IN);
}
if (iser_task->prot_copy[ISER_DIR_OUT].copy_buf != NULL) {
if (iser_task->prot[ISER_DIR_OUT].orig_sg) {
is_rdma_prot_aligned = 0;
iser_finalize_rdma_unaligned_sg(iser_task,
&iser_task->prot[ISER_DIR_OUT],
&iser_task->prot_copy[ISER_DIR_OUT],
ISER_DIR_OUT);
}
......
......@@ -273,6 +273,65 @@ void iser_free_fmr_pool(struct ib_conn *ib_conn)
ib_conn->fmr.page_vec = NULL;
}
static int
iser_alloc_pi_ctx(struct ib_device *ib_device, struct ib_pd *pd,
struct fast_reg_descriptor *desc)
{
struct iser_pi_context *pi_ctx = NULL;
struct ib_mr_init_attr mr_init_attr = {.max_reg_descriptors = 2,
.flags = IB_MR_SIGNATURE_EN};
int ret = 0;
desc->pi_ctx = kzalloc(sizeof(*desc->pi_ctx), GFP_KERNEL);
if (!desc->pi_ctx)
return -ENOMEM;
pi_ctx = desc->pi_ctx;
pi_ctx->prot_frpl = ib_alloc_fast_reg_page_list(ib_device,
ISCSI_ISER_SG_TABLESIZE);
if (IS_ERR(pi_ctx->prot_frpl)) {
ret = PTR_ERR(pi_ctx->prot_frpl);
goto prot_frpl_failure;
}
pi_ctx->prot_mr = ib_alloc_fast_reg_mr(pd,
ISCSI_ISER_SG_TABLESIZE + 1);
if (IS_ERR(pi_ctx->prot_mr)) {
ret = PTR_ERR(pi_ctx->prot_mr);
goto prot_mr_failure;
}
desc->reg_indicators |= ISER_PROT_KEY_VALID;
pi_ctx->sig_mr = ib_create_mr(pd, &mr_init_attr);
if (IS_ERR(pi_ctx->sig_mr)) {
ret = PTR_ERR(pi_ctx->sig_mr);
goto sig_mr_failure;
}
desc->reg_indicators |= ISER_SIG_KEY_VALID;
desc->reg_indicators &= ~ISER_FASTREG_PROTECTED;
return 0;
sig_mr_failure:
ib_dereg_mr(desc->pi_ctx->prot_mr);
prot_mr_failure:
ib_free_fast_reg_page_list(desc->pi_ctx->prot_frpl);
prot_frpl_failure:
kfree(desc->pi_ctx);
return ret;
}
static void
iser_free_pi_ctx(struct iser_pi_context *pi_ctx)
{
ib_free_fast_reg_page_list(pi_ctx->prot_frpl);
ib_dereg_mr(pi_ctx->prot_mr);
ib_destroy_mr(pi_ctx->sig_mr);
kfree(pi_ctx);
}
static int
iser_create_fastreg_desc(struct ib_device *ib_device, struct ib_pd *pd,
bool pi_enable, struct fast_reg_descriptor *desc)
......@@ -297,59 +356,12 @@ iser_create_fastreg_desc(struct ib_device *ib_device, struct ib_pd *pd,
desc->reg_indicators |= ISER_DATA_KEY_VALID;
if (pi_enable) {
struct ib_mr_init_attr mr_init_attr = {0};
struct iser_pi_context *pi_ctx = NULL;
desc->pi_ctx = kzalloc(sizeof(*desc->pi_ctx), GFP_KERNEL);
if (!desc->pi_ctx) {
iser_err("Failed to allocate pi context\n");
ret = -ENOMEM;
ret = iser_alloc_pi_ctx(ib_device, pd, desc);
if (ret)
goto pi_ctx_alloc_failure;
}
pi_ctx = desc->pi_ctx;
pi_ctx->prot_frpl = ib_alloc_fast_reg_page_list(ib_device,
ISCSI_ISER_SG_TABLESIZE);
if (IS_ERR(pi_ctx->prot_frpl)) {
ret = PTR_ERR(pi_ctx->prot_frpl);
iser_err("Failed to allocate prot frpl ret=%d\n",
ret);
goto prot_frpl_failure;
}
pi_ctx->prot_mr = ib_alloc_fast_reg_mr(pd,
ISCSI_ISER_SG_TABLESIZE + 1);
if (IS_ERR(pi_ctx->prot_mr)) {
ret = PTR_ERR(pi_ctx->prot_mr);
iser_err("Failed to allocate prot frmr ret=%d\n",
ret);
goto prot_mr_failure;
}
desc->reg_indicators |= ISER_PROT_KEY_VALID;
mr_init_attr.max_reg_descriptors = 2;
mr_init_attr.flags |= IB_MR_SIGNATURE_EN;
pi_ctx->sig_mr = ib_create_mr(pd, &mr_init_attr);
if (IS_ERR(pi_ctx->sig_mr)) {
ret = PTR_ERR(pi_ctx->sig_mr);
iser_err("Failed to allocate signature enabled mr err=%d\n",
ret);
goto sig_mr_failure;
}
desc->reg_indicators |= ISER_SIG_KEY_VALID;
}
desc->reg_indicators &= ~ISER_FASTREG_PROTECTED;
iser_dbg("Create fr_desc %p page_list %p\n",
desc, desc->data_frpl->page_list);
return 0;
sig_mr_failure:
ib_dereg_mr(desc->pi_ctx->prot_mr);
prot_mr_failure:
ib_free_fast_reg_page_list(desc->pi_ctx->prot_frpl);
prot_frpl_failure:
kfree(desc->pi_ctx);
pi_ctx_alloc_failure:
ib_dereg_mr(desc->data_mr);
fast_reg_mr_failure:
......@@ -416,12 +428,8 @@ void iser_free_fastreg_pool(struct ib_conn *ib_conn)
list_del(&desc->list);
ib_free_fast_reg_page_list(desc->data_frpl);
ib_dereg_mr(desc->data_mr);
if (desc->pi_ctx) {
ib_free_fast_reg_page_list(desc->pi_ctx->prot_frpl);
ib_dereg_mr(desc->pi_ctx->prot_mr);
ib_destroy_mr(desc->pi_ctx->sig_mr);
kfree(desc->pi_ctx);
}
if (desc->pi_ctx)
iser_free_pi_ctx(desc->pi_ctx);
kfree(desc);
++i;
}
......@@ -721,7 +729,7 @@ static void iser_connect_error(struct rdma_cm_id *cma_id)
struct iser_conn *iser_conn;
iser_conn = (struct iser_conn *)cma_id->context;
iser_conn->state = ISER_CONN_DOWN;
iser_conn->state = ISER_CONN_TERMINATING;
}
/**
......@@ -992,93 +1000,6 @@ int iser_connect(struct iser_conn *iser_conn,
return err;
}
/**
* iser_reg_page_vec - Register physical memory
*
* returns: 0 on success, errno code on failure
*/
int iser_reg_page_vec(struct ib_conn *ib_conn,
struct iser_page_vec *page_vec,
struct iser_mem_reg *mem_reg)
{
struct ib_pool_fmr *mem;
u64 io_addr;
u64 *page_list;
int status;
page_list = page_vec->pages;
io_addr = page_list[0];
mem = ib_fmr_pool_map_phys(ib_conn->fmr.pool,
page_list,
page_vec->length,
io_addr);
if (IS_ERR(mem)) {
status = (int)PTR_ERR(mem);
iser_err("ib_fmr_pool_map_phys failed: %d\n", status);
return status;
}
mem_reg->lkey = mem->fmr->lkey;
mem_reg->rkey = mem->fmr->rkey;
mem_reg->len = page_vec->length * SIZE_4K;
mem_reg->va = io_addr;
mem_reg->mem_h = (void *)mem;
mem_reg->va += page_vec->offset;
mem_reg->len = page_vec->data_size;
iser_dbg("PHYSICAL Mem.register, [PHYS p_array: 0x%p, sz: %d, "
"entry[0]: (0x%08lx,%ld)] -> "
"[lkey: 0x%08X mem_h: 0x%p va: 0x%08lX sz: %ld]\n",
page_vec, page_vec->length,
(unsigned long)page_vec->pages[0],
(unsigned long)page_vec->data_size,
(unsigned int)mem_reg->lkey, mem_reg->mem_h,
(unsigned long)mem_reg->va, (unsigned long)mem_reg->len);
return 0;
}
/**
* Unregister (previosuly registered using FMR) memory.
* If memory is non-FMR does nothing.
*/
void iser_unreg_mem_fmr(struct iscsi_iser_task *iser_task,
enum iser_data_dir cmd_dir)
{
struct iser_mem_reg *reg = &iser_task->rdma_regd[cmd_dir].reg;
int ret;
if (!reg->mem_h)
return;
iser_dbg("PHYSICAL Mem.Unregister mem_h %p\n",reg->mem_h);
ret = ib_fmr_pool_unmap((struct ib_pool_fmr *)reg->mem_h);
if (ret)
iser_err("ib_fmr_pool_unmap failed %d\n", ret);
reg->mem_h = NULL;
}
void iser_unreg_mem_fastreg(struct iscsi_iser_task *iser_task,
enum iser_data_dir cmd_dir)
{
struct iser_mem_reg *reg = &iser_task->rdma_regd[cmd_dir].reg;
struct iser_conn *iser_conn = iser_task->iser_conn;
struct ib_conn *ib_conn = &iser_conn->ib_conn;
struct fast_reg_descriptor *desc = reg->mem_h;
if (!desc)
return;
reg->mem_h = NULL;
spin_lock_bh(&ib_conn->lock);
list_add_tail(&desc->list, &ib_conn->fastreg.pool);
spin_unlock_bh(&ib_conn->lock);
}
int iser_post_recvl(struct iser_conn *iser_conn)
{
struct ib_recv_wr rx_wr, *rx_wr_failed;
......@@ -1210,6 +1131,9 @@ iser_handle_comp_error(struct ib_conn *ib_conn,
iscsi_conn_failure(iser_conn->iscsi_conn,
ISCSI_ERR_CONN_FAILED);
if (wc->wr_id == ISER_FASTREG_LI_WRID)
return;
if (is_iser_tx_desc(iser_conn, wr_id)) {
struct iser_tx_desc *desc = wr_id;
......@@ -1254,13 +1178,11 @@ static void iser_handle_wc(struct ib_wc *wc)
else
iser_dbg("flush error: wr id %llx\n", wc->wr_id);
if (wc->wr_id != ISER_FASTREG_LI_WRID &&
wc->wr_id != ISER_BEACON_WRID)
iser_handle_comp_error(ib_conn, wc);
/* complete in case all flush errors were consumed */
if (wc->wr_id == ISER_BEACON_WRID)
/* all flush errors were consumed */
complete(&ib_conn->flush_comp);
else
iser_handle_comp_error(ib_conn, wc);
}
}
......@@ -1306,7 +1228,7 @@ static void iser_cq_callback(struct ib_cq *cq, void *cq_context)
u8 iser_check_task_pi_status(struct iscsi_iser_task *iser_task,
enum iser_data_dir cmd_dir, sector_t *sector)
{
struct iser_mem_reg *reg = &iser_task->rdma_regd[cmd_dir].reg;
struct iser_mem_reg *reg = &iser_task->rdma_reg[cmd_dir];
struct fast_reg_descriptor *desc = reg->mem_h;
unsigned long sector_size = iser_task->sc->device->sector_size;
struct ib_mr_status mr_status;
......
......@@ -40,6 +40,7 @@
#include <linux/parser.h>
#include <linux/random.h>
#include <linux/jiffies.h>
#include <rdma/ib_cache.h>
#include <linux/atomic.h>
......@@ -265,10 +266,10 @@ static int srp_init_qp(struct srp_target_port *target,
if (!attr)
return -ENOMEM;
ret = ib_find_pkey(target->srp_host->srp_dev->dev,
target->srp_host->port,
be16_to_cpu(target->pkey),
&attr->pkey_index);
ret = ib_find_cached_pkey(target->srp_host->srp_dev->dev,
target->srp_host->port,
be16_to_cpu(target->pkey),
&attr->pkey_index);
if (ret)
goto out;
......
......@@ -939,21 +939,34 @@ static int mlx4_MAD_IFC_wrapper(struct mlx4_dev *dev, int slave,
return err;
}
if (smp->attr_id == IB_SMP_ATTR_GUID_INFO) {
/* compute slave's gid block */
smp->attr_mod = cpu_to_be32(slave / 8);
/* execute cmd */
err = mlx4_cmd_box(dev, inbox->dma, outbox->dma,
vhcr->in_modifier, opcode_modifier,
vhcr->op, MLX4_CMD_TIME_CLASS_C, MLX4_CMD_NATIVE);
if (!err) {
/* if needed, move slave gid to index 0 */
if (slave % 8)
memcpy(outsmp->data,
outsmp->data + (slave % 8) * 8, 8);
/* delete all other gids */
memset(outsmp->data + 8, 0, 56);
__be64 guid = mlx4_get_admin_guid(dev, slave,
port);
/* set the PF admin guid to the FW/HW burned
* GUID, if it wasn't yet set
*/
if (slave == 0 && guid == 0) {
smp->attr_mod = 0;
err = mlx4_cmd_box(dev,
inbox->dma,
outbox->dma,
vhcr->in_modifier,
opcode_modifier,
vhcr->op,
MLX4_CMD_TIME_CLASS_C,
MLX4_CMD_NATIVE);
if (err)
return err;
mlx4_set_admin_guid(dev,
*(__be64 *)outsmp->
data, slave, port);
} else {
memcpy(outsmp->data, &guid, 8);
}
return err;
/* clean all other gids */
memset(outsmp->data + 8, 0, 56);
return 0;
}
if (smp->attr_id == IB_SMP_ATTR_NODE_INFO) {
err = mlx4_cmd_box(dev, inbox->dma, outbox->dma,
......@@ -2350,6 +2363,7 @@ int mlx4_multi_func_init(struct mlx4_dev *dev)
oper_vport->qos_vport = MLX4_VPP_DEFAULT_VPORT;
vf_oper->vport[port].vlan_idx = NO_INDX;
vf_oper->vport[port].mac_idx = NO_INDX;
mlx4_set_random_admin_guid(dev, i, port);
}
spin_lock_init(&s_state->lock);
}
......
......@@ -702,6 +702,8 @@ static int mlx4_eq_int(struct mlx4_dev *dev, struct mlx4_eq *eq)
priv->mfunc.master.slave_state[flr_slave].is_slave_going_down = 1;
}
spin_unlock_irqrestore(&priv->mfunc.master.slave_state_lock, flags);
mlx4_dispatch_event(dev, MLX4_DEV_EVENT_SLAVE_SHUTDOWN,
flr_slave);
queue_work(priv->mfunc.master.comm_wq,
&priv->mfunc.master.slave_flr_event_work);
break;
......
......@@ -2260,6 +2260,37 @@ void mlx4_counter_free(struct mlx4_dev *dev, u32 idx)
}
EXPORT_SYMBOL_GPL(mlx4_counter_free);
void mlx4_set_admin_guid(struct mlx4_dev *dev, __be64 guid, int entry, int port)
{
struct mlx4_priv *priv = mlx4_priv(dev);
priv->mfunc.master.vf_admin[entry].vport[port].guid = guid;
}
EXPORT_SYMBOL_GPL(mlx4_set_admin_guid);
__be64 mlx4_get_admin_guid(struct mlx4_dev *dev, int entry, int port)
{
struct mlx4_priv *priv = mlx4_priv(dev);
return priv->mfunc.master.vf_admin[entry].vport[port].guid;
}
EXPORT_SYMBOL_GPL(mlx4_get_admin_guid);
void mlx4_set_random_admin_guid(struct mlx4_dev *dev, int entry, int port)
{
struct mlx4_priv *priv = mlx4_priv(dev);
__be64 guid;
/* hw GUID */
if (entry == 0)
return;
get_random_bytes((char *)&guid, sizeof(guid));
guid &= ~(cpu_to_be64(1ULL << 56));
guid |= cpu_to_be64(1ULL << 57);
priv->mfunc.master.vf_admin[entry].vport[port].guid = guid;
}
static int mlx4_setup_hca(struct mlx4_dev *dev)
{
struct mlx4_priv *priv = mlx4_priv(dev);
......
......@@ -499,6 +499,7 @@ struct mlx4_vport_state {
bool spoofchk;
u32 link_state;
u8 qos_vport;
__be64 guid;
};
struct mlx4_vf_admin_state {
......
......@@ -211,26 +211,28 @@ static int alloc_4k(struct mlx5_core_dev *dev, u64 *addr)
return 0;
}
#define MLX5_U64_4K_PAGE_MASK ((~(u64)0U) << PAGE_SHIFT)
static void free_4k(struct mlx5_core_dev *dev, u64 addr)
{
struct fw_page *fwp;
int n;
fwp = find_fw_page(dev, addr & PAGE_MASK);
fwp = find_fw_page(dev, addr & MLX5_U64_4K_PAGE_MASK);
if (!fwp) {
mlx5_core_warn(dev, "page not found\n");
return;
}
n = (addr & ~PAGE_MASK) >> MLX5_ADAPTER_PAGE_SHIFT;
n = (addr & ~MLX5_U64_4K_PAGE_MASK) >> MLX5_ADAPTER_PAGE_SHIFT;
fwp->free_count++;
set_bit(n, &fwp->bitmask);
if (fwp->free_count == MLX5_NUM_4K_IN_PAGE) {
rb_erase(&fwp->rb_node, &dev->priv.page_root);
if (fwp->free_count != 1)
list_del(&fwp->list);
dma_unmap_page(&dev->pdev->dev, addr & PAGE_MASK, PAGE_SIZE,
DMA_BIDIRECTIONAL);
dma_unmap_page(&dev->pdev->dev, addr & MLX5_U64_4K_PAGE_MASK,
PAGE_SIZE, DMA_BIDIRECTIONAL);
__free_page(fwp->page);
kfree(fwp);
} else if (fwp->free_count == 1) {
......
......@@ -1345,6 +1345,10 @@ int mlx4_wol_write(struct mlx4_dev *dev, u64 config, int port);
int mlx4_counter_alloc(struct mlx4_dev *dev, u32 *idx);
void mlx4_counter_free(struct mlx4_dev *dev, u32 idx);
void mlx4_set_admin_guid(struct mlx4_dev *dev, __be64 guid, int entry,
int port);
__be64 mlx4_get_admin_guid(struct mlx4_dev *dev, int entry, int port);
void mlx4_set_random_admin_guid(struct mlx4_dev *dev, int entry, int port);
int mlx4_flow_attach(struct mlx4_dev *dev,
struct mlx4_net_trans_rule *rule, u64 *reg_id);
int mlx4_flow_detach(struct mlx4_dev *dev, u64 reg_id);
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册