提交 5a33a669 编写于 作者: A Alex Tabachnik 提交者: Roland Dreier

IB/iser: Add more RX CQs to scale out processing of SCSI responses

RX/TX CQs will now be selected from a per HCA pool.  For the RX flow
this has the effect of using different interrupt vectors when using
low level drivers (such as mlx4) that map the "vector" param provided
by the ULP on CQ creation to a dedicated IRQ/MSI-X vector.  This
allows the RX flow processing of IO responses to be distributed across
multiple CPUs.

QPs (--> iSER sessions) are assigned to CQs in round robin order using
the CQ with the minimum number of sessions attached to it.
Signed-off-by: NOr Gerlitz <ogerlitz@mellanox.com>
Signed-off-by: NAlex Tabachnik <alext@mellanox.com>
Signed-off-by: NRoland Dreier <roland@purestorage.com>
上级 7a9a2970
...@@ -177,6 +177,7 @@ struct iser_data_buf { ...@@ -177,6 +177,7 @@ struct iser_data_buf {
/* fwd declarations */ /* fwd declarations */
struct iser_device; struct iser_device;
struct iser_cq_desc;
struct iscsi_iser_conn; struct iscsi_iser_conn;
struct iscsi_iser_task; struct iscsi_iser_task;
struct iscsi_endpoint; struct iscsi_endpoint;
...@@ -226,16 +227,21 @@ struct iser_rx_desc { ...@@ -226,16 +227,21 @@ struct iser_rx_desc {
char pad[ISER_RX_PAD_SIZE]; char pad[ISER_RX_PAD_SIZE];
} __attribute__((packed)); } __attribute__((packed));
#define ISER_MAX_CQ 4
struct iser_device { struct iser_device {
struct ib_device *ib_device; struct ib_device *ib_device;
struct ib_pd *pd; struct ib_pd *pd;
struct ib_cq *rx_cq; struct ib_cq *rx_cq[ISER_MAX_CQ];
struct ib_cq *tx_cq; struct ib_cq *tx_cq[ISER_MAX_CQ];
struct ib_mr *mr; struct ib_mr *mr;
struct tasklet_struct cq_tasklet; struct tasklet_struct cq_tasklet[ISER_MAX_CQ];
struct ib_event_handler event_handler; struct ib_event_handler event_handler;
struct list_head ig_list; /* entry in ig devices list */ struct list_head ig_list; /* entry in ig devices list */
int refcount; int refcount;
int cq_active_qps[ISER_MAX_CQ];
int cqs_used;
struct iser_cq_desc *cq_desc;
}; };
struct iser_conn { struct iser_conn {
...@@ -287,6 +293,11 @@ struct iser_page_vec { ...@@ -287,6 +293,11 @@ struct iser_page_vec {
int data_size; int data_size;
}; };
struct iser_cq_desc {
struct iser_device *device;
int cq_index;
};
struct iser_global { struct iser_global {
struct mutex device_list_mutex;/* */ struct mutex device_list_mutex;/* */
struct list_head device_list; /* all iSER devices */ struct list_head device_list; /* all iSER devices */
......
...@@ -70,32 +70,50 @@ static void iser_event_handler(struct ib_event_handler *handler, ...@@ -70,32 +70,50 @@ static void iser_event_handler(struct ib_event_handler *handler,
*/ */
static int iser_create_device_ib_res(struct iser_device *device) static int iser_create_device_ib_res(struct iser_device *device)
{ {
int i, j;
struct iser_cq_desc *cq_desc;
device->cqs_used = min(ISER_MAX_CQ, device->ib_device->num_comp_vectors);
iser_err("using %d CQs, device %s supports %d vectors\n", device->cqs_used,
device->ib_device->name, device->ib_device->num_comp_vectors);
device->cq_desc = kmalloc(sizeof(struct iser_cq_desc) * device->cqs_used,
GFP_KERNEL);
if (device->cq_desc == NULL)
goto cq_desc_err;
cq_desc = device->cq_desc;
device->pd = ib_alloc_pd(device->ib_device); device->pd = ib_alloc_pd(device->ib_device);
if (IS_ERR(device->pd)) if (IS_ERR(device->pd))
goto pd_err; goto pd_err;
device->rx_cq = ib_create_cq(device->ib_device, for (i = 0; i < device->cqs_used; i++) {
iser_cq_callback, cq_desc[i].device = device;
iser_cq_event_callback, cq_desc[i].cq_index = i;
(void *)device,
ISER_MAX_RX_CQ_LEN, 0); device->rx_cq[i] = ib_create_cq(device->ib_device,
if (IS_ERR(device->rx_cq)) iser_cq_callback,
goto rx_cq_err; iser_cq_event_callback,
(void *)&cq_desc[i],
ISER_MAX_RX_CQ_LEN, i);
if (IS_ERR(device->rx_cq[i]))
goto cq_err;
device->tx_cq = ib_create_cq(device->ib_device, device->tx_cq[i] = ib_create_cq(device->ib_device,
NULL, iser_cq_event_callback, NULL, iser_cq_event_callback,
(void *)device, (void *)&cq_desc[i],
ISER_MAX_TX_CQ_LEN, 0); ISER_MAX_TX_CQ_LEN, i);
if (IS_ERR(device->tx_cq)) if (IS_ERR(device->tx_cq[i]))
goto tx_cq_err; goto cq_err;
if (ib_req_notify_cq(device->rx_cq, IB_CQ_NEXT_COMP)) if (ib_req_notify_cq(device->rx_cq[i], IB_CQ_NEXT_COMP))
goto cq_arm_err; goto cq_err;
tasklet_init(&device->cq_tasklet, tasklet_init(&device->cq_tasklet[i],
iser_cq_tasklet_fn, iser_cq_tasklet_fn,
(unsigned long)device); (unsigned long)&cq_desc[i]);
}
device->mr = ib_get_dma_mr(device->pd, IB_ACCESS_LOCAL_WRITE | device->mr = ib_get_dma_mr(device->pd, IB_ACCESS_LOCAL_WRITE |
IB_ACCESS_REMOTE_WRITE | IB_ACCESS_REMOTE_WRITE |
...@@ -113,14 +131,19 @@ static int iser_create_device_ib_res(struct iser_device *device) ...@@ -113,14 +131,19 @@ static int iser_create_device_ib_res(struct iser_device *device)
handler_err: handler_err:
ib_dereg_mr(device->mr); ib_dereg_mr(device->mr);
dma_mr_err: dma_mr_err:
tasklet_kill(&device->cq_tasklet); for (j = 0; j < device->cqs_used; j++)
cq_arm_err: tasklet_kill(&device->cq_tasklet[j]);
ib_destroy_cq(device->tx_cq); cq_err:
tx_cq_err: for (j = 0; j < i; j++) {
ib_destroy_cq(device->rx_cq); if (device->tx_cq[j])
rx_cq_err: ib_destroy_cq(device->tx_cq[j]);
if (device->rx_cq[j])
ib_destroy_cq(device->rx_cq[j]);
}
ib_dealloc_pd(device->pd); ib_dealloc_pd(device->pd);
pd_err: pd_err:
kfree(device->cq_desc);
cq_desc_err:
iser_err("failed to allocate an IB resource\n"); iser_err("failed to allocate an IB resource\n");
return -1; return -1;
} }
...@@ -131,18 +154,24 @@ static int iser_create_device_ib_res(struct iser_device *device) ...@@ -131,18 +154,24 @@ static int iser_create_device_ib_res(struct iser_device *device)
*/ */
static void iser_free_device_ib_res(struct iser_device *device) static void iser_free_device_ib_res(struct iser_device *device)
{ {
int i;
BUG_ON(device->mr == NULL); BUG_ON(device->mr == NULL);
tasklet_kill(&device->cq_tasklet); for (i = 0; i < device->cqs_used; i++) {
tasklet_kill(&device->cq_tasklet[i]);
(void)ib_destroy_cq(device->tx_cq[i]);
(void)ib_destroy_cq(device->rx_cq[i]);
device->tx_cq[i] = NULL;
device->rx_cq[i] = NULL;
}
(void)ib_unregister_event_handler(&device->event_handler); (void)ib_unregister_event_handler(&device->event_handler);
(void)ib_dereg_mr(device->mr); (void)ib_dereg_mr(device->mr);
(void)ib_destroy_cq(device->tx_cq);
(void)ib_destroy_cq(device->rx_cq);
(void)ib_dealloc_pd(device->pd); (void)ib_dealloc_pd(device->pd);
kfree(device->cq_desc);
device->mr = NULL; device->mr = NULL;
device->tx_cq = NULL;
device->rx_cq = NULL;
device->pd = NULL; device->pd = NULL;
} }
...@@ -157,6 +186,7 @@ static int iser_create_ib_conn_res(struct iser_conn *ib_conn) ...@@ -157,6 +186,7 @@ static int iser_create_ib_conn_res(struct iser_conn *ib_conn)
struct ib_qp_init_attr init_attr; struct ib_qp_init_attr init_attr;
int req_err, resp_err, ret = -ENOMEM; int req_err, resp_err, ret = -ENOMEM;
struct ib_fmr_pool_param params; struct ib_fmr_pool_param params;
int index, min_index = 0;
BUG_ON(ib_conn->device == NULL); BUG_ON(ib_conn->device == NULL);
...@@ -220,10 +250,20 @@ static int iser_create_ib_conn_res(struct iser_conn *ib_conn) ...@@ -220,10 +250,20 @@ static int iser_create_ib_conn_res(struct iser_conn *ib_conn)
memset(&init_attr, 0, sizeof init_attr); memset(&init_attr, 0, sizeof init_attr);
mutex_lock(&ig.connlist_mutex);
/* select the CQ with the minimal number of usages */
for (index = 0; index < device->cqs_used; index++)
if (device->cq_active_qps[index] <
device->cq_active_qps[min_index])
min_index = index;
device->cq_active_qps[min_index]++;
mutex_unlock(&ig.connlist_mutex);
iser_err("cq index %d used for ib_conn %p\n", min_index, ib_conn);
init_attr.event_handler = iser_qp_event_callback; init_attr.event_handler = iser_qp_event_callback;
init_attr.qp_context = (void *)ib_conn; init_attr.qp_context = (void *)ib_conn;
init_attr.send_cq = device->tx_cq; init_attr.send_cq = device->tx_cq[min_index];
init_attr.recv_cq = device->rx_cq; init_attr.recv_cq = device->rx_cq[min_index];
init_attr.cap.max_send_wr = ISER_QP_MAX_REQ_DTOS; init_attr.cap.max_send_wr = ISER_QP_MAX_REQ_DTOS;
init_attr.cap.max_recv_wr = ISER_QP_MAX_RECV_DTOS; init_attr.cap.max_recv_wr = ISER_QP_MAX_RECV_DTOS;
init_attr.cap.max_send_sge = 2; init_attr.cap.max_send_sge = 2;
...@@ -252,6 +292,7 @@ static int iser_create_ib_conn_res(struct iser_conn *ib_conn) ...@@ -252,6 +292,7 @@ static int iser_create_ib_conn_res(struct iser_conn *ib_conn)
*/ */
static int iser_free_ib_conn_res(struct iser_conn *ib_conn, int can_destroy_id) static int iser_free_ib_conn_res(struct iser_conn *ib_conn, int can_destroy_id)
{ {
int cq_index;
BUG_ON(ib_conn == NULL); BUG_ON(ib_conn == NULL);
iser_err("freeing conn %p cma_id %p fmr pool %p qp %p\n", iser_err("freeing conn %p cma_id %p fmr pool %p qp %p\n",
...@@ -262,9 +303,12 @@ static int iser_free_ib_conn_res(struct iser_conn *ib_conn, int can_destroy_id) ...@@ -262,9 +303,12 @@ static int iser_free_ib_conn_res(struct iser_conn *ib_conn, int can_destroy_id)
if (ib_conn->fmr_pool != NULL) if (ib_conn->fmr_pool != NULL)
ib_destroy_fmr_pool(ib_conn->fmr_pool); ib_destroy_fmr_pool(ib_conn->fmr_pool);
if (ib_conn->qp != NULL) if (ib_conn->qp != NULL) {
rdma_destroy_qp(ib_conn->cma_id); cq_index = ((struct iser_cq_desc *)ib_conn->qp->recv_cq->cq_context)->cq_index;
ib_conn->device->cq_active_qps[cq_index]--;
rdma_destroy_qp(ib_conn->cma_id);
}
/* if cma handler context, the caller acts s.t the cma destroy the id */ /* if cma handler context, the caller acts s.t the cma destroy the id */
if (ib_conn->cma_id != NULL && can_destroy_id) if (ib_conn->cma_id != NULL && can_destroy_id)
rdma_destroy_id(ib_conn->cma_id); rdma_destroy_id(ib_conn->cma_id);
...@@ -791,9 +835,9 @@ static void iser_handle_comp_error(struct iser_tx_desc *desc, ...@@ -791,9 +835,9 @@ static void iser_handle_comp_error(struct iser_tx_desc *desc,
} }
} }
static int iser_drain_tx_cq(struct iser_device *device) static int iser_drain_tx_cq(struct iser_device *device, int cq_index)
{ {
struct ib_cq *cq = device->tx_cq; struct ib_cq *cq = device->tx_cq[cq_index];
struct ib_wc wc; struct ib_wc wc;
struct iser_tx_desc *tx_desc; struct iser_tx_desc *tx_desc;
struct iser_conn *ib_conn; struct iser_conn *ib_conn;
...@@ -822,8 +866,10 @@ static int iser_drain_tx_cq(struct iser_device *device) ...@@ -822,8 +866,10 @@ static int iser_drain_tx_cq(struct iser_device *device)
static void iser_cq_tasklet_fn(unsigned long data) static void iser_cq_tasklet_fn(unsigned long data)
{ {
struct iser_device *device = (struct iser_device *)data; struct iser_cq_desc *cq_desc = (struct iser_cq_desc *)data;
struct ib_cq *cq = device->rx_cq; struct iser_device *device = cq_desc->device;
int cq_index = cq_desc->cq_index;
struct ib_cq *cq = device->rx_cq[cq_index];
struct ib_wc wc; struct ib_wc wc;
struct iser_rx_desc *desc; struct iser_rx_desc *desc;
unsigned long xfer_len; unsigned long xfer_len;
...@@ -851,19 +897,21 @@ static void iser_cq_tasklet_fn(unsigned long data) ...@@ -851,19 +897,21 @@ static void iser_cq_tasklet_fn(unsigned long data)
} }
completed_rx++; completed_rx++;
if (!(completed_rx & 63)) if (!(completed_rx & 63))
completed_tx += iser_drain_tx_cq(device); completed_tx += iser_drain_tx_cq(device, cq_index);
} }
/* #warning "it is assumed here that arming CQ only once its empty" * /* #warning "it is assumed here that arming CQ only once its empty" *
* " would not cause interrupts to be missed" */ * " would not cause interrupts to be missed" */
ib_req_notify_cq(cq, IB_CQ_NEXT_COMP); ib_req_notify_cq(cq, IB_CQ_NEXT_COMP);
completed_tx += iser_drain_tx_cq(device); completed_tx += iser_drain_tx_cq(device, cq_index);
iser_dbg("got %d rx %d tx completions\n", completed_rx, completed_tx); iser_dbg("got %d rx %d tx completions\n", completed_rx, completed_tx);
} }
static void iser_cq_callback(struct ib_cq *cq, void *cq_context) static void iser_cq_callback(struct ib_cq *cq, void *cq_context)
{ {
struct iser_device *device = (struct iser_device *)cq_context; struct iser_cq_desc *cq_desc = (struct iser_cq_desc *)cq_context;
struct iser_device *device = cq_desc->device;
int cq_index = cq_desc->cq_index;
tasklet_schedule(&device->cq_tasklet); tasklet_schedule(&device->cq_tasklet[cq_index]);
} }
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册