提交 5cfb1782 编写于 作者: B Bart Van Assche 提交者: Roland Dreier

IB/srp: Add fast registration support

Certain HCA types (e.g. Connect-IB) and certain configurations (e.g.
ConnectX VF) support fast registration but not FMR. Hence add fast
registration support.

In function srp_rport_reconnect(), move the the srp_finish_req()
loop from after to before the srp_create_target_ib() call. This is
needed to avoid that srp_finish_req() tries to queue any
invalidation requests for rkeys associated with the old queue pair
on the newly allocated queue pair. Invoking srp_finish_req() before
the queue pair has been reallocated is safe since srp_claim_req()
handles completions correctly that arrive after srp_finish_req()
has been invoked.
Signed-off-by: NBart Van Assche <bvanassche@acm.org>
Signed-off-by: NRoland Dreier <roland@purestorage.com>
上级 52ede08f
...@@ -66,6 +66,7 @@ static unsigned int srp_sg_tablesize; ...@@ -66,6 +66,7 @@ static unsigned int srp_sg_tablesize;
static unsigned int cmd_sg_entries; static unsigned int cmd_sg_entries;
static unsigned int indirect_sg_entries; static unsigned int indirect_sg_entries;
static bool allow_ext_sg; static bool allow_ext_sg;
static bool prefer_fr;
static bool register_always; static bool register_always;
static int topspin_workarounds = 1; static int topspin_workarounds = 1;
...@@ -88,6 +89,10 @@ module_param(topspin_workarounds, int, 0444); ...@@ -88,6 +89,10 @@ module_param(topspin_workarounds, int, 0444);
MODULE_PARM_DESC(topspin_workarounds, MODULE_PARM_DESC(topspin_workarounds,
"Enable workarounds for Topspin/Cisco SRP target bugs if != 0"); "Enable workarounds for Topspin/Cisco SRP target bugs if != 0");
module_param(prefer_fr, bool, 0444);
MODULE_PARM_DESC(prefer_fr,
"Whether to use fast registration if both FMR and fast registration are supported");
module_param(register_always, bool, 0444); module_param(register_always, bool, 0444);
MODULE_PARM_DESC(register_always, MODULE_PARM_DESC(register_always,
"Use memory registration even for contiguous memory regions"); "Use memory registration even for contiguous memory regions");
...@@ -311,6 +316,132 @@ static struct ib_fmr_pool *srp_alloc_fmr_pool(struct srp_target_port *target) ...@@ -311,6 +316,132 @@ static struct ib_fmr_pool *srp_alloc_fmr_pool(struct srp_target_port *target)
return ib_create_fmr_pool(dev->pd, &fmr_param); return ib_create_fmr_pool(dev->pd, &fmr_param);
} }
/**
* srp_destroy_fr_pool() - free the resources owned by a pool
* @pool: Fast registration pool to be destroyed.
*/
static void srp_destroy_fr_pool(struct srp_fr_pool *pool)
{
int i;
struct srp_fr_desc *d;
if (!pool)
return;
for (i = 0, d = &pool->desc[0]; i < pool->size; i++, d++) {
if (d->frpl)
ib_free_fast_reg_page_list(d->frpl);
if (d->mr)
ib_dereg_mr(d->mr);
}
kfree(pool);
}
/**
* srp_create_fr_pool() - allocate and initialize a pool for fast registration
* @device: IB device to allocate fast registration descriptors for.
* @pd: Protection domain associated with the FR descriptors.
* @pool_size: Number of descriptors to allocate.
* @max_page_list_len: Maximum fast registration work request page list length.
*/
static struct srp_fr_pool *srp_create_fr_pool(struct ib_device *device,
struct ib_pd *pd, int pool_size,
int max_page_list_len)
{
struct srp_fr_pool *pool;
struct srp_fr_desc *d;
struct ib_mr *mr;
struct ib_fast_reg_page_list *frpl;
int i, ret = -EINVAL;
if (pool_size <= 0)
goto err;
ret = -ENOMEM;
pool = kzalloc(sizeof(struct srp_fr_pool) +
pool_size * sizeof(struct srp_fr_desc), GFP_KERNEL);
if (!pool)
goto err;
pool->size = pool_size;
pool->max_page_list_len = max_page_list_len;
spin_lock_init(&pool->lock);
INIT_LIST_HEAD(&pool->free_list);
for (i = 0, d = &pool->desc[0]; i < pool->size; i++, d++) {
mr = ib_alloc_fast_reg_mr(pd, max_page_list_len);
if (IS_ERR(mr)) {
ret = PTR_ERR(mr);
goto destroy_pool;
}
d->mr = mr;
frpl = ib_alloc_fast_reg_page_list(device, max_page_list_len);
if (IS_ERR(frpl)) {
ret = PTR_ERR(frpl);
goto destroy_pool;
}
d->frpl = frpl;
list_add_tail(&d->entry, &pool->free_list);
}
out:
return pool;
destroy_pool:
srp_destroy_fr_pool(pool);
err:
pool = ERR_PTR(ret);
goto out;
}
/**
* srp_fr_pool_get() - obtain a descriptor suitable for fast registration
* @pool: Pool to obtain descriptor from.
*/
static struct srp_fr_desc *srp_fr_pool_get(struct srp_fr_pool *pool)
{
struct srp_fr_desc *d = NULL;
unsigned long flags;
spin_lock_irqsave(&pool->lock, flags);
if (!list_empty(&pool->free_list)) {
d = list_first_entry(&pool->free_list, typeof(*d), entry);
list_del(&d->entry);
}
spin_unlock_irqrestore(&pool->lock, flags);
return d;
}
/**
* srp_fr_pool_put() - put an FR descriptor back in the free list
* @pool: Pool the descriptor was allocated from.
* @desc: Pointer to an array of fast registration descriptor pointers.
* @n: Number of descriptors to put back.
*
* Note: The caller must already have queued an invalidation request for
* desc->mr->rkey before calling this function.
*/
static void srp_fr_pool_put(struct srp_fr_pool *pool, struct srp_fr_desc **desc,
int n)
{
unsigned long flags;
int i;
spin_lock_irqsave(&pool->lock, flags);
for (i = 0; i < n; i++)
list_add(&desc[i]->entry, &pool->free_list);
spin_unlock_irqrestore(&pool->lock, flags);
}
static struct srp_fr_pool *srp_alloc_fr_pool(struct srp_target_port *target)
{
struct srp_device *dev = target->srp_host->srp_dev;
return srp_create_fr_pool(dev->dev, dev->pd,
target->scsi_host->can_queue,
dev->max_pages_per_mr);
}
static int srp_create_target_ib(struct srp_target_port *target) static int srp_create_target_ib(struct srp_target_port *target)
{ {
struct srp_device *dev = target->srp_host->srp_dev; struct srp_device *dev = target->srp_host->srp_dev;
...@@ -318,6 +449,8 @@ static int srp_create_target_ib(struct srp_target_port *target) ...@@ -318,6 +449,8 @@ static int srp_create_target_ib(struct srp_target_port *target)
struct ib_cq *recv_cq, *send_cq; struct ib_cq *recv_cq, *send_cq;
struct ib_qp *qp; struct ib_qp *qp;
struct ib_fmr_pool *fmr_pool = NULL; struct ib_fmr_pool *fmr_pool = NULL;
struct srp_fr_pool *fr_pool = NULL;
const int m = 1 + dev->use_fast_reg;
int ret; int ret;
init_attr = kzalloc(sizeof *init_attr, GFP_KERNEL); init_attr = kzalloc(sizeof *init_attr, GFP_KERNEL);
...@@ -332,7 +465,7 @@ static int srp_create_target_ib(struct srp_target_port *target) ...@@ -332,7 +465,7 @@ static int srp_create_target_ib(struct srp_target_port *target)
} }
send_cq = ib_create_cq(dev->dev, srp_send_completion, NULL, target, send_cq = ib_create_cq(dev->dev, srp_send_completion, NULL, target,
target->queue_size, target->comp_vector); m * target->queue_size, target->comp_vector);
if (IS_ERR(send_cq)) { if (IS_ERR(send_cq)) {
ret = PTR_ERR(send_cq); ret = PTR_ERR(send_cq);
goto err_recv_cq; goto err_recv_cq;
...@@ -341,11 +474,11 @@ static int srp_create_target_ib(struct srp_target_port *target) ...@@ -341,11 +474,11 @@ static int srp_create_target_ib(struct srp_target_port *target)
ib_req_notify_cq(recv_cq, IB_CQ_NEXT_COMP); ib_req_notify_cq(recv_cq, IB_CQ_NEXT_COMP);
init_attr->event_handler = srp_qp_event; init_attr->event_handler = srp_qp_event;
init_attr->cap.max_send_wr = target->queue_size; init_attr->cap.max_send_wr = m * target->queue_size;
init_attr->cap.max_recv_wr = target->queue_size; init_attr->cap.max_recv_wr = target->queue_size;
init_attr->cap.max_recv_sge = 1; init_attr->cap.max_recv_sge = 1;
init_attr->cap.max_send_sge = 1; init_attr->cap.max_send_sge = 1;
init_attr->sq_sig_type = IB_SIGNAL_ALL_WR; init_attr->sq_sig_type = IB_SIGNAL_REQ_WR;
init_attr->qp_type = IB_QPT_RC; init_attr->qp_type = IB_QPT_RC;
init_attr->send_cq = send_cq; init_attr->send_cq = send_cq;
init_attr->recv_cq = recv_cq; init_attr->recv_cq = recv_cq;
...@@ -360,7 +493,18 @@ static int srp_create_target_ib(struct srp_target_port *target) ...@@ -360,7 +493,18 @@ static int srp_create_target_ib(struct srp_target_port *target)
if (ret) if (ret)
goto err_qp; goto err_qp;
if (dev->has_fmr) { if (dev->use_fast_reg && dev->has_fr) {
fr_pool = srp_alloc_fr_pool(target);
if (IS_ERR(fr_pool)) {
ret = PTR_ERR(fr_pool);
shost_printk(KERN_WARNING, target->scsi_host, PFX
"FR pool allocation failed (%d)\n", ret);
goto err_qp;
}
if (target->fr_pool)
srp_destroy_fr_pool(target->fr_pool);
target->fr_pool = fr_pool;
} else if (!dev->use_fast_reg && dev->has_fmr) {
fmr_pool = srp_alloc_fmr_pool(target); fmr_pool = srp_alloc_fmr_pool(target);
if (IS_ERR(fmr_pool)) { if (IS_ERR(fmr_pool)) {
ret = PTR_ERR(fmr_pool); ret = PTR_ERR(fmr_pool);
...@@ -407,10 +551,16 @@ static int srp_create_target_ib(struct srp_target_port *target) ...@@ -407,10 +551,16 @@ static int srp_create_target_ib(struct srp_target_port *target)
*/ */
static void srp_free_target_ib(struct srp_target_port *target) static void srp_free_target_ib(struct srp_target_port *target)
{ {
struct srp_device *dev = target->srp_host->srp_dev;
int i; int i;
if (dev->use_fast_reg) {
if (target->fr_pool)
srp_destroy_fr_pool(target->fr_pool);
} else {
if (target->fmr_pool) if (target->fmr_pool)
ib_destroy_fmr_pool(target->fmr_pool); ib_destroy_fmr_pool(target->fmr_pool);
}
ib_destroy_qp(target->qp); ib_destroy_qp(target->qp);
ib_destroy_cq(target->send_cq); ib_destroy_cq(target->send_cq);
ib_destroy_cq(target->recv_cq); ib_destroy_cq(target->recv_cq);
...@@ -615,7 +765,8 @@ static void srp_disconnect_target(struct srp_target_port *target) ...@@ -615,7 +765,8 @@ static void srp_disconnect_target(struct srp_target_port *target)
static void srp_free_req_data(struct srp_target_port *target) static void srp_free_req_data(struct srp_target_port *target)
{ {
struct ib_device *ibdev = target->srp_host->srp_dev->dev; struct srp_device *dev = target->srp_host->srp_dev;
struct ib_device *ibdev = dev->dev;
struct srp_request *req; struct srp_request *req;
int i; int i;
...@@ -624,6 +775,9 @@ static void srp_free_req_data(struct srp_target_port *target) ...@@ -624,6 +775,9 @@ static void srp_free_req_data(struct srp_target_port *target)
for (i = 0; i < target->req_ring_size; ++i) { for (i = 0; i < target->req_ring_size; ++i) {
req = &target->req_ring[i]; req = &target->req_ring[i];
if (dev->use_fast_reg)
kfree(req->fr_list);
else
kfree(req->fmr_list); kfree(req->fmr_list);
kfree(req->map_page); kfree(req->map_page);
if (req->indirect_dma_addr) { if (req->indirect_dma_addr) {
...@@ -643,6 +797,7 @@ static int srp_alloc_req_data(struct srp_target_port *target) ...@@ -643,6 +797,7 @@ static int srp_alloc_req_data(struct srp_target_port *target)
struct srp_device *srp_dev = target->srp_host->srp_dev; struct srp_device *srp_dev = target->srp_host->srp_dev;
struct ib_device *ibdev = srp_dev->dev; struct ib_device *ibdev = srp_dev->dev;
struct srp_request *req; struct srp_request *req;
void *mr_list;
dma_addr_t dma_addr; dma_addr_t dma_addr;
int i, ret = -ENOMEM; int i, ret = -ENOMEM;
...@@ -655,12 +810,20 @@ static int srp_alloc_req_data(struct srp_target_port *target) ...@@ -655,12 +810,20 @@ static int srp_alloc_req_data(struct srp_target_port *target)
for (i = 0; i < target->req_ring_size; ++i) { for (i = 0; i < target->req_ring_size; ++i) {
req = &target->req_ring[i]; req = &target->req_ring[i];
req->fmr_list = kmalloc(target->cmd_sg_cnt * sizeof(void *), mr_list = kmalloc(target->cmd_sg_cnt * sizeof(void *),
GFP_KERNEL); GFP_KERNEL);
if (!mr_list)
goto out;
if (srp_dev->use_fast_reg)
req->fr_list = mr_list;
else
req->fmr_list = mr_list;
req->map_page = kmalloc(srp_dev->max_pages_per_mr * req->map_page = kmalloc(srp_dev->max_pages_per_mr *
sizeof(void *), GFP_KERNEL); sizeof(void *), GFP_KERNEL);
if (!req->map_page)
goto out;
req->indirect_desc = kmalloc(target->indirect_size, GFP_KERNEL); req->indirect_desc = kmalloc(target->indirect_size, GFP_KERNEL);
if (!req->fmr_list || !req->map_page || !req->indirect_desc) if (!req->indirect_desc)
goto out; goto out;
dma_addr = ib_dma_map_single(ibdev, req->indirect_desc, dma_addr = ib_dma_map_single(ibdev, req->indirect_desc,
...@@ -797,21 +960,56 @@ static int srp_connect_target(struct srp_target_port *target) ...@@ -797,21 +960,56 @@ static int srp_connect_target(struct srp_target_port *target)
} }
} }
static int srp_inv_rkey(struct srp_target_port *target, u32 rkey)
{
struct ib_send_wr *bad_wr;
struct ib_send_wr wr = {
.opcode = IB_WR_LOCAL_INV,
.wr_id = LOCAL_INV_WR_ID_MASK,
.next = NULL,
.num_sge = 0,
.send_flags = 0,
.ex.invalidate_rkey = rkey,
};
return ib_post_send(target->qp, &wr, &bad_wr);
}
static void srp_unmap_data(struct scsi_cmnd *scmnd, static void srp_unmap_data(struct scsi_cmnd *scmnd,
struct srp_target_port *target, struct srp_target_port *target,
struct srp_request *req) struct srp_request *req)
{ {
struct ib_device *ibdev = target->srp_host->srp_dev->dev; struct srp_device *dev = target->srp_host->srp_dev;
struct ib_pool_fmr **pfmr; struct ib_device *ibdev = dev->dev;
int i, res;
if (!scsi_sglist(scmnd) || if (!scsi_sglist(scmnd) ||
(scmnd->sc_data_direction != DMA_TO_DEVICE && (scmnd->sc_data_direction != DMA_TO_DEVICE &&
scmnd->sc_data_direction != DMA_FROM_DEVICE)) scmnd->sc_data_direction != DMA_FROM_DEVICE))
return; return;
pfmr = req->fmr_list; if (dev->use_fast_reg) {
while (req->nmdesc--) struct srp_fr_desc **pfr;
ib_fmr_pool_unmap(*pfmr++);
for (i = req->nmdesc, pfr = req->fr_list; i > 0; i--, pfr++) {
res = srp_inv_rkey(target, (*pfr)->mr->rkey);
if (res < 0) {
shost_printk(KERN_ERR, target->scsi_host, PFX
"Queueing INV WR for rkey %#x failed (%d)\n",
(*pfr)->mr->rkey, res);
queue_work(system_long_wq,
&target->tl_err_work);
}
}
if (req->nmdesc)
srp_fr_pool_put(target->fr_pool, req->fr_list,
req->nmdesc);
} else {
struct ib_pool_fmr **pfmr;
for (i = req->nmdesc, pfmr = req->fmr_list; i > 0; i--, pfmr++)
ib_fmr_pool_unmap(*pfmr);
}
ib_dma_unmap_sg(ibdev, scsi_sglist(scmnd), scsi_sg_count(scmnd), ib_dma_unmap_sg(ibdev, scsi_sglist(scmnd), scsi_sg_count(scmnd),
scmnd->sc_data_direction); scmnd->sc_data_direction);
...@@ -924,21 +1122,19 @@ static int srp_rport_reconnect(struct srp_rport *rport) ...@@ -924,21 +1122,19 @@ static int srp_rport_reconnect(struct srp_rport *rport)
* callbacks will have finished before a new QP is allocated. * callbacks will have finished before a new QP is allocated.
*/ */
ret = srp_new_cm_id(target); ret = srp_new_cm_id(target);
/*
* Whether or not creating a new CM ID succeeded, create a new
* QP. This guarantees that all completion callback function
* invocations have finished before request resetting starts.
*/
if (ret == 0)
ret = srp_create_target_ib(target);
else
srp_create_target_ib(target);
for (i = 0; i < target->req_ring_size; ++i) { for (i = 0; i < target->req_ring_size; ++i) {
struct srp_request *req = &target->req_ring[i]; struct srp_request *req = &target->req_ring[i];
srp_finish_req(target, req, NULL, DID_RESET << 16); srp_finish_req(target, req, NULL, DID_RESET << 16);
} }
/*
* Whether or not creating a new CM ID succeeded, create a new
* QP. This guarantees that all callback functions for the old QP have
* finished before any send requests are posted on the new QP.
*/
ret += srp_create_target_ib(target);
INIT_LIST_HEAD(&target->free_tx); INIT_LIST_HEAD(&target->free_tx);
for (i = 0; i < target->queue_size; ++i) for (i = 0; i < target->queue_size; ++i)
list_add(&target->tx_ring[i]->list, &target->free_tx); list_add(&target->tx_ring[i]->list, &target->free_tx);
...@@ -986,6 +1182,47 @@ static int srp_map_finish_fmr(struct srp_map_state *state, ...@@ -986,6 +1182,47 @@ static int srp_map_finish_fmr(struct srp_map_state *state,
return 0; return 0;
} }
static int srp_map_finish_fr(struct srp_map_state *state,
struct srp_target_port *target)
{
struct srp_device *dev = target->srp_host->srp_dev;
struct ib_send_wr *bad_wr;
struct ib_send_wr wr;
struct srp_fr_desc *desc;
u32 rkey;
desc = srp_fr_pool_get(target->fr_pool);
if (!desc)
return -ENOMEM;
rkey = ib_inc_rkey(desc->mr->rkey);
ib_update_fast_reg_key(desc->mr, rkey);
memcpy(desc->frpl->page_list, state->pages,
sizeof(state->pages[0]) * state->npages);
memset(&wr, 0, sizeof(wr));
wr.opcode = IB_WR_FAST_REG_MR;
wr.wr_id = FAST_REG_WR_ID_MASK;
wr.wr.fast_reg.iova_start = state->base_dma_addr;
wr.wr.fast_reg.page_list = desc->frpl;
wr.wr.fast_reg.page_list_len = state->npages;
wr.wr.fast_reg.page_shift = ilog2(dev->mr_page_size);
wr.wr.fast_reg.length = state->dma_len;
wr.wr.fast_reg.access_flags = (IB_ACCESS_LOCAL_WRITE |
IB_ACCESS_REMOTE_READ |
IB_ACCESS_REMOTE_WRITE);
wr.wr.fast_reg.rkey = desc->mr->lkey;
*state->next_fr++ = desc;
state->nmdesc++;
srp_map_desc(state, state->base_dma_addr, state->dma_len,
desc->mr->rkey);
return ib_post_send(target->qp, &wr, &bad_wr);
}
static int srp_finish_mapping(struct srp_map_state *state, static int srp_finish_mapping(struct srp_map_state *state,
struct srp_target_port *target) struct srp_target_port *target)
{ {
...@@ -998,7 +1235,9 @@ static int srp_finish_mapping(struct srp_map_state *state, ...@@ -998,7 +1235,9 @@ static int srp_finish_mapping(struct srp_map_state *state,
srp_map_desc(state, state->base_dma_addr, state->dma_len, srp_map_desc(state, state->base_dma_addr, state->dma_len,
target->rkey); target->rkey);
else else
ret = srp_map_finish_fmr(state, target); ret = target->srp_host->srp_dev->use_fast_reg ?
srp_map_finish_fr(state, target) :
srp_map_finish_fmr(state, target);
if (ret == 0) { if (ret == 0) {
state->npages = 0; state->npages = 0;
...@@ -1020,7 +1259,7 @@ static void srp_map_update_start(struct srp_map_state *state, ...@@ -1020,7 +1259,7 @@ static void srp_map_update_start(struct srp_map_state *state,
static int srp_map_sg_entry(struct srp_map_state *state, static int srp_map_sg_entry(struct srp_map_state *state,
struct srp_target_port *target, struct srp_target_port *target,
struct scatterlist *sg, int sg_index, struct scatterlist *sg, int sg_index,
int use_fmr) bool use_mr)
{ {
struct srp_device *dev = target->srp_host->srp_dev; struct srp_device *dev = target->srp_host->srp_dev;
struct ib_device *ibdev = dev->dev; struct ib_device *ibdev = dev->dev;
...@@ -1032,22 +1271,24 @@ static int srp_map_sg_entry(struct srp_map_state *state, ...@@ -1032,22 +1271,24 @@ static int srp_map_sg_entry(struct srp_map_state *state,
if (!dma_len) if (!dma_len)
return 0; return 0;
if (use_fmr == SRP_MAP_NO_FMR) { if (!use_mr) {
/* Once we're in direct map mode for a request, we don't /*
* go back to FMR mode, so no need to update anything * Once we're in direct map mode for a request, we don't
* go back to FMR or FR mode, so no need to update anything
* other than the descriptor. * other than the descriptor.
*/ */
srp_map_desc(state, dma_addr, dma_len, target->rkey); srp_map_desc(state, dma_addr, dma_len, target->rkey);
return 0; return 0;
} }
/* If we start at an offset into the FMR page, don't merge into /*
* the current FMR. Finish it out, and use the kernel's MR for this * Since not all RDMA HW drivers support non-zero page offsets for
* sg entry. This is to avoid potential bugs on some SRP targets * FMR, if we start at an offset into a page, don't merge into the
* that were never quite defined, but went away when the initiator * current FMR mapping. Finish it out, and use the kernel's MR for
* avoided using FMR on such page fragments. * this sg entry.
*/ */
if (dma_addr & ~dev->mr_page_mask || dma_len > dev->mr_max_size) { if ((!dev->use_fast_reg && dma_addr & ~dev->mr_page_mask) ||
dma_len > dev->mr_max_size) {
ret = srp_finish_mapping(state, target); ret = srp_finish_mapping(state, target);
if (ret) if (ret)
return ret; return ret;
...@@ -1057,16 +1298,18 @@ static int srp_map_sg_entry(struct srp_map_state *state, ...@@ -1057,16 +1298,18 @@ static int srp_map_sg_entry(struct srp_map_state *state,
return 0; return 0;
} }
/* If this is the first sg to go into the FMR, save our position. /*
* We need to know the first unmapped entry, its index, and the * If this is the first sg that will be mapped via FMR or via FR, save
* first unmapped address within that entry to be able to restart * our position. We need to know the first unmapped entry, its index,
* mapping after an error. * and the first unmapped address within that entry to be able to
* restart mapping after an error.
*/ */
if (!state->unmapped_sg) if (!state->unmapped_sg)
srp_map_update_start(state, sg, sg_index, dma_addr); srp_map_update_start(state, sg, sg_index, dma_addr);
while (dma_len) { while (dma_len) {
if (state->npages == dev->max_pages_per_mr) { unsigned offset = dma_addr & ~dev->mr_page_mask;
if (state->npages == dev->max_pages_per_mr || offset != 0) {
ret = srp_finish_mapping(state, target); ret = srp_finish_mapping(state, target);
if (ret) if (ret)
return ret; return ret;
...@@ -1074,17 +1317,18 @@ static int srp_map_sg_entry(struct srp_map_state *state, ...@@ -1074,17 +1317,18 @@ static int srp_map_sg_entry(struct srp_map_state *state,
srp_map_update_start(state, sg, sg_index, dma_addr); srp_map_update_start(state, sg, sg_index, dma_addr);
} }
len = min_t(unsigned int, dma_len, dev->mr_page_size); len = min_t(unsigned int, dma_len, dev->mr_page_size - offset);
if (!state->npages) if (!state->npages)
state->base_dma_addr = dma_addr; state->base_dma_addr = dma_addr;
state->pages[state->npages++] = dma_addr; state->pages[state->npages++] = dma_addr & dev->mr_page_mask;
state->dma_len += len; state->dma_len += len;
dma_addr += len; dma_addr += len;
dma_len -= len; dma_len -= len;
} }
/* If the last entry of the FMR wasn't a full page, then we need to /*
* If the last entry of the MR wasn't a full page, then we need to
* close it out and start a new one -- we can only merge at page * close it out and start a new one -- we can only merge at page
* boundries. * boundries.
*/ */
...@@ -1097,25 +1341,32 @@ static int srp_map_sg_entry(struct srp_map_state *state, ...@@ -1097,25 +1341,32 @@ static int srp_map_sg_entry(struct srp_map_state *state,
return ret; return ret;
} }
static void srp_map_fmr(struct srp_map_state *state, static int srp_map_sg(struct srp_map_state *state,
struct srp_target_port *target, struct srp_request *req, struct srp_target_port *target, struct srp_request *req,
struct scatterlist *scat, int count) struct scatterlist *scat, int count)
{ {
struct srp_device *dev = target->srp_host->srp_dev; struct srp_device *dev = target->srp_host->srp_dev;
struct ib_device *ibdev = dev->dev; struct ib_device *ibdev = dev->dev;
struct scatterlist *sg; struct scatterlist *sg;
int i, use_fmr; int i;
bool use_mr;
state->desc = req->indirect_desc; state->desc = req->indirect_desc;
state->pages = req->map_page; state->pages = req->map_page;
if (dev->use_fast_reg) {
state->next_fr = req->fr_list;
use_mr = !!target->fr_pool;
} else {
state->next_fmr = req->fmr_list; state->next_fmr = req->fmr_list;
use_mr = !!target->fmr_pool;
use_fmr = target->fmr_pool ? SRP_MAP_ALLOW_FMR : SRP_MAP_NO_FMR; }
for_each_sg(scat, sg, count, i) { for_each_sg(scat, sg, count, i) {
if (srp_map_sg_entry(state, target, sg, i, use_fmr)) { if (srp_map_sg_entry(state, target, sg, i, use_mr)) {
/* FMR mapping failed, so backtrack to the first /*
* unmapped entry and continue on without using FMR. * Memory registration failed, so backtrack to the
* first unmapped entry and continue on without using
* memory registration.
*/ */
dma_addr_t dma_addr; dma_addr_t dma_addr;
unsigned int dma_len; unsigned int dma_len;
...@@ -1128,15 +1379,17 @@ static void srp_map_fmr(struct srp_map_state *state, ...@@ -1128,15 +1379,17 @@ static void srp_map_fmr(struct srp_map_state *state,
dma_len = ib_sg_dma_len(ibdev, sg); dma_len = ib_sg_dma_len(ibdev, sg);
dma_len -= (state->unmapped_addr - dma_addr); dma_len -= (state->unmapped_addr - dma_addr);
dma_addr = state->unmapped_addr; dma_addr = state->unmapped_addr;
use_fmr = SRP_MAP_NO_FMR; use_mr = false;
srp_map_desc(state, dma_addr, dma_len, target->rkey); srp_map_desc(state, dma_addr, dma_len, target->rkey);
} }
} }
if (use_fmr == SRP_MAP_ALLOW_FMR && srp_finish_mapping(state, target)) if (use_mr && srp_finish_mapping(state, target))
goto backtrack; goto backtrack;
req->nmdesc = state->nmdesc; req->nmdesc = state->nmdesc;
return 0;
} }
static int srp_map_data(struct scsi_cmnd *scmnd, struct srp_target_port *target, static int srp_map_data(struct scsi_cmnd *scmnd, struct srp_target_port *target,
...@@ -1193,9 +1446,9 @@ static int srp_map_data(struct scsi_cmnd *scmnd, struct srp_target_port *target, ...@@ -1193,9 +1446,9 @@ static int srp_map_data(struct scsi_cmnd *scmnd, struct srp_target_port *target,
goto map_complete; goto map_complete;
} }
/* We have more than one scatter/gather entry, so build our indirect /*
* descriptor table, trying to merge as many entries with FMR as we * We have more than one scatter/gather entry, so build our indirect
* can. * descriptor table, trying to merge as many entries as we can.
*/ */
indirect_hdr = (void *) cmd->add_data; indirect_hdr = (void *) cmd->add_data;
...@@ -1203,7 +1456,7 @@ static int srp_map_data(struct scsi_cmnd *scmnd, struct srp_target_port *target, ...@@ -1203,7 +1456,7 @@ static int srp_map_data(struct scsi_cmnd *scmnd, struct srp_target_port *target,
target->indirect_size, DMA_TO_DEVICE); target->indirect_size, DMA_TO_DEVICE);
memset(&state, 0, sizeof(state)); memset(&state, 0, sizeof(state));
srp_map_fmr(&state, target, req, scat, count); srp_map_sg(&state, target, req, scat, count);
/* We've mapped the request, now pull as much of the indirect /* We've mapped the request, now pull as much of the indirect
* descriptor table as we can into the command buffer. If this * descriptor table as we can into the command buffer. If this
...@@ -1212,7 +1465,8 @@ static int srp_map_data(struct scsi_cmnd *scmnd, struct srp_target_port *target, ...@@ -1212,7 +1465,8 @@ static int srp_map_data(struct scsi_cmnd *scmnd, struct srp_target_port *target,
* give us more S/G entries than we allow. * give us more S/G entries than we allow.
*/ */
if (state.ndesc == 1) { if (state.ndesc == 1) {
/* FMR mapping was able to collapse this to one entry, /*
* Memory registration collapsed the sg-list into one entry,
* so use a direct descriptor. * so use a direct descriptor.
*/ */
struct srp_direct_buf *buf = (void *) cmd->add_data; struct srp_direct_buf *buf = (void *) cmd->add_data;
...@@ -1535,14 +1789,24 @@ static void srp_tl_err_work(struct work_struct *work) ...@@ -1535,14 +1789,24 @@ static void srp_tl_err_work(struct work_struct *work)
srp_start_tl_fail_timers(target->rport); srp_start_tl_fail_timers(target->rport);
} }
static void srp_handle_qp_err(enum ib_wc_status wc_status, bool send_err, static void srp_handle_qp_err(u64 wr_id, enum ib_wc_status wc_status,
struct srp_target_port *target) bool send_err, struct srp_target_port *target)
{ {
if (target->connected && !target->qp_in_error) { if (target->connected && !target->qp_in_error) {
if (wr_id & LOCAL_INV_WR_ID_MASK) {
shost_printk(KERN_ERR, target->scsi_host, PFX
"LOCAL_INV failed with status %d\n",
wc_status);
} else if (wr_id & FAST_REG_WR_ID_MASK) {
shost_printk(KERN_ERR, target->scsi_host, PFX
"FAST_REG_MR failed status %d\n",
wc_status);
} else {
shost_printk(KERN_ERR, target->scsi_host, shost_printk(KERN_ERR, target->scsi_host,
PFX "failed %s status %d\n", PFX "failed %s status %d for iu %p\n",
send_err ? "send" : "receive", send_err ? "send" : "receive",
wc_status); wc_status, (void *)(uintptr_t)wr_id);
}
queue_work(system_long_wq, &target->tl_err_work); queue_work(system_long_wq, &target->tl_err_work);
} }
target->qp_in_error = true; target->qp_in_error = true;
...@@ -1558,7 +1822,7 @@ static void srp_recv_completion(struct ib_cq *cq, void *target_ptr) ...@@ -1558,7 +1822,7 @@ static void srp_recv_completion(struct ib_cq *cq, void *target_ptr)
if (likely(wc.status == IB_WC_SUCCESS)) { if (likely(wc.status == IB_WC_SUCCESS)) {
srp_handle_recv(target, &wc); srp_handle_recv(target, &wc);
} else { } else {
srp_handle_qp_err(wc.status, false, target); srp_handle_qp_err(wc.wr_id, wc.status, false, target);
} }
} }
} }
...@@ -1574,7 +1838,7 @@ static void srp_send_completion(struct ib_cq *cq, void *target_ptr) ...@@ -1574,7 +1838,7 @@ static void srp_send_completion(struct ib_cq *cq, void *target_ptr)
iu = (struct srp_iu *) (uintptr_t) wc.wr_id; iu = (struct srp_iu *) (uintptr_t) wc.wr_id;
list_add(&iu->list, &target->free_tx); list_add(&iu->list, &target->free_tx);
} else { } else {
srp_handle_qp_err(wc.status, true, target); srp_handle_qp_err(wc.wr_id, wc.status, true, target);
} }
} }
} }
...@@ -2737,9 +3001,9 @@ static ssize_t srp_create_target(struct device *dev, ...@@ -2737,9 +3001,9 @@ static ssize_t srp_create_target(struct device *dev,
goto err; goto err;
} }
if (!srp_dev->has_fmr && !target->allow_ext_sg && if (!srp_dev->has_fmr && !srp_dev->has_fr && !target->allow_ext_sg &&
target->cmd_sg_cnt < target->sg_tablesize) { target->cmd_sg_cnt < target->sg_tablesize) {
pr_warn("No FMR pool and no external indirect descriptors, limiting sg_tablesize to cmd_sg_cnt\n"); pr_warn("No MR pool and no external indirect descriptors, limiting sg_tablesize to cmd_sg_cnt\n");
target->sg_tablesize = target->cmd_sg_cnt; target->sg_tablesize = target->cmd_sg_cnt;
} }
...@@ -2896,6 +3160,13 @@ static void srp_add_one(struct ib_device *device) ...@@ -2896,6 +3160,13 @@ static void srp_add_one(struct ib_device *device)
srp_dev->has_fmr = (device->alloc_fmr && device->dealloc_fmr && srp_dev->has_fmr = (device->alloc_fmr && device->dealloc_fmr &&
device->map_phys_fmr && device->unmap_fmr); device->map_phys_fmr && device->unmap_fmr);
srp_dev->has_fr = (dev_attr->device_cap_flags &
IB_DEVICE_MEM_MGT_EXTENSIONS);
if (!srp_dev->has_fmr && !srp_dev->has_fr)
dev_warn(&device->dev, "neither FMR nor FR is supported\n");
srp_dev->use_fast_reg = (srp_dev->has_fr &&
(!srp_dev->has_fmr || prefer_fr));
/* /*
* Use the smallest page size supported by the HCA, down to a * Use the smallest page size supported by the HCA, down to a
...@@ -2909,10 +3180,16 @@ static void srp_add_one(struct ib_device *device) ...@@ -2909,10 +3180,16 @@ static void srp_add_one(struct ib_device *device)
do_div(max_pages_per_mr, srp_dev->mr_page_size); do_div(max_pages_per_mr, srp_dev->mr_page_size);
srp_dev->max_pages_per_mr = min_t(u64, SRP_MAX_PAGES_PER_MR, srp_dev->max_pages_per_mr = min_t(u64, SRP_MAX_PAGES_PER_MR,
max_pages_per_mr); max_pages_per_mr);
if (srp_dev->use_fast_reg) {
srp_dev->max_pages_per_mr =
min_t(u32, srp_dev->max_pages_per_mr,
dev_attr->max_fast_reg_page_list_len);
}
srp_dev->mr_max_size = srp_dev->mr_page_size * srp_dev->mr_max_size = srp_dev->mr_page_size *
srp_dev->max_pages_per_mr; srp_dev->max_pages_per_mr;
pr_debug("%s: mr_page_shift = %d, dev_attr->max_mr_size = %#llx, max_pages_per_mr = %d, mr_max_size = %#x\n", pr_debug("%s: mr_page_shift = %d, dev_attr->max_mr_size = %#llx, dev_attr->max_fast_reg_page_list_len = %u, max_pages_per_mr = %d, mr_max_size = %#x\n",
device->name, mr_page_shift, dev_attr->max_mr_size, device->name, mr_page_shift, dev_attr->max_mr_size,
dev_attr->max_fast_reg_page_list_len,
srp_dev->max_pages_per_mr, srp_dev->mr_max_size); srp_dev->max_pages_per_mr, srp_dev->mr_max_size);
INIT_LIST_HEAD(&srp_dev->dev_list); INIT_LIST_HEAD(&srp_dev->dev_list);
......
...@@ -68,8 +68,8 @@ enum { ...@@ -68,8 +68,8 @@ enum {
SRP_MAX_PAGES_PER_MR = 512, SRP_MAX_PAGES_PER_MR = 512,
SRP_MAP_ALLOW_FMR = 0, LOCAL_INV_WR_ID_MASK = 1,
SRP_MAP_NO_FMR = 1, FAST_REG_WR_ID_MASK = 2,
}; };
enum srp_target_state { enum srp_target_state {
...@@ -83,6 +83,12 @@ enum srp_iu_type { ...@@ -83,6 +83,12 @@ enum srp_iu_type {
SRP_IU_RSP, SRP_IU_RSP,
}; };
/*
* @mr_page_mask: HCA memory registration page mask.
* @mr_page_size: HCA memory registration page size.
* @mr_max_size: Maximum size in bytes of a single FMR / FR registration
* request.
*/
struct srp_device { struct srp_device {
struct list_head dev_list; struct list_head dev_list;
struct ib_device *dev; struct ib_device *dev;
...@@ -93,6 +99,8 @@ struct srp_device { ...@@ -93,6 +99,8 @@ struct srp_device {
int mr_max_size; int mr_max_size;
int max_pages_per_mr; int max_pages_per_mr;
bool has_fmr; bool has_fmr;
bool has_fr;
bool use_fast_reg;
}; };
struct srp_host { struct srp_host {
...@@ -110,7 +118,10 @@ struct srp_request { ...@@ -110,7 +118,10 @@ struct srp_request {
struct list_head list; struct list_head list;
struct scsi_cmnd *scmnd; struct scsi_cmnd *scmnd;
struct srp_iu *cmd; struct srp_iu *cmd;
union {
struct ib_pool_fmr **fmr_list; struct ib_pool_fmr **fmr_list;
struct srp_fr_desc **fr_list;
};
u64 *map_page; u64 *map_page;
struct srp_direct_buf *indirect_desc; struct srp_direct_buf *indirect_desc;
dma_addr_t indirect_dma_addr; dma_addr_t indirect_dma_addr;
...@@ -129,7 +140,10 @@ struct srp_target_port { ...@@ -129,7 +140,10 @@ struct srp_target_port {
struct ib_cq *send_cq ____cacheline_aligned_in_smp; struct ib_cq *send_cq ____cacheline_aligned_in_smp;
struct ib_cq *recv_cq; struct ib_cq *recv_cq;
struct ib_qp *qp; struct ib_qp *qp;
union {
struct ib_fmr_pool *fmr_pool; struct ib_fmr_pool *fmr_pool;
struct srp_fr_pool *fr_pool;
};
u32 lkey; u32 lkey;
u32 rkey; u32 rkey;
enum srp_target_state state; enum srp_target_state state;
...@@ -196,8 +210,59 @@ struct srp_iu { ...@@ -196,8 +210,59 @@ struct srp_iu {
enum dma_data_direction direction; enum dma_data_direction direction;
}; };
/**
* struct srp_fr_desc - fast registration work request arguments
* @entry: Entry in srp_fr_pool.free_list.
* @mr: Memory region.
* @frpl: Fast registration page list.
*/
struct srp_fr_desc {
struct list_head entry;
struct ib_mr *mr;
struct ib_fast_reg_page_list *frpl;
};
/**
* struct srp_fr_pool - pool of fast registration descriptors
*
* An entry is available for allocation if and only if it occurs in @free_list.
*
* @size: Number of descriptors in this pool.
* @max_page_list_len: Maximum fast registration work request page list length.
* @lock: Protects free_list.
* @free_list: List of free descriptors.
* @desc: Fast registration descriptor pool.
*/
struct srp_fr_pool {
int size;
int max_page_list_len;
spinlock_t lock;
struct list_head free_list;
struct srp_fr_desc desc[0];
};
/**
* struct srp_map_state - per-request DMA memory mapping state
* @desc: Pointer to the element of the SRP buffer descriptor array
* that is being filled in.
* @pages: Array with DMA addresses of pages being considered for
* memory registration.
* @base_dma_addr: DMA address of the first page that has not yet been mapped.
* @dma_len: Number of bytes that will be registered with the next
* FMR or FR memory registration call.
* @total_len: Total number of bytes in the sg-list being mapped.
* @npages: Number of page addresses in the pages[] array.
* @nmdesc: Number of FMR or FR memory descriptors used for mapping.
* @ndesc: Number of SRP buffer descriptors that have been filled in.
* @unmapped_sg: First element of the sg-list that is mapped via FMR or FR.
* @unmapped_index: Index of the first element mapped via FMR or FR.
* @unmapped_addr: DMA address of the first element mapped via FMR or FR.
*/
struct srp_map_state { struct srp_map_state {
union {
struct ib_pool_fmr **next_fmr; struct ib_pool_fmr **next_fmr;
struct srp_fr_desc **next_fr;
};
struct srp_direct_buf *desc; struct srp_direct_buf *desc;
u64 *pages; u64 *pages;
dma_addr_t base_dma_addr; dma_addr_t base_dma_addr;
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册