fmr_ops.c 7.3 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13
/*
 * Copyright (c) 2015 Oracle.  All rights reserved.
 * Copyright (c) 2003-2007 Network Appliance, Inc. All rights reserved.
 */

/* Lightweight memory registration using Fast Memory Regions (FMR).
 * Referred to sometimes as MTHCAFMR mode.
 *
 * FMR uses synchronous memory registration and deregistration.
 * FMR registration is known to be fast, but FMR deregistration
 * can take tens of usecs to complete.
 */

14 15 16 17 18 19 20 21 22 23 24 25 26 27 28
/* Normal operation
 *
 * A Memory Region is prepared for RDMA READ or WRITE using the
 * ib_map_phys_fmr verb (fmr_op_map). When the RDMA operation is
 * finished, the Memory Region is unmapped using the ib_unmap_fmr
 * verb (fmr_op_unmap).
 */

/* Transport recovery
 *
 * After a transport reconnect, fmr_op_map re-uses the MR already
 * allocated for the RPC, but generates a fresh rkey then maps the
 * MR again. This process is synchronous.
 */

29 30 31 32 33 34
#include "xprt_rdma.h"

#if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
# define RPCDBG_FACILITY	RPCDBG_TRANS
#endif

35 36 37
/* Maximum scatter/gather per FMR */
#define RPCRDMA_MAX_FMR_SGES	(64)

C
Chuck Lever 已提交
38 39 40 41 42 43 44
static int
fmr_op_open(struct rpcrdma_ia *ia, struct rpcrdma_ep *ep,
	    struct rpcrdma_create_data_internal *cdata)
{
	return 0;
}

45 46 47 48 49 50
/* FMR mode conveys up to 64 pages of payload per chunk segment.
 */
static size_t
fmr_op_maxpages(struct rpcrdma_xprt *r_xprt)
{
	return min_t(unsigned int, RPCRDMA_MAX_DATA_SEGS,
51
		     RPCRDMA_MAX_HDR_SEGS * RPCRDMA_MAX_FMR_SGES);
52 53
}

C
Chuck Lever 已提交
54 55 56 57 58 59 60 61 62 63 64 65 66 67
static int
fmr_op_init(struct rpcrdma_xprt *r_xprt)
{
	struct rpcrdma_buffer *buf = &r_xprt->rx_buf;
	int mr_access_flags = IB_ACCESS_REMOTE_WRITE | IB_ACCESS_REMOTE_READ;
	struct ib_fmr_attr fmr_attr = {
		.max_pages	= RPCRDMA_MAX_FMR_SGES,
		.max_maps	= 1,
		.page_shift	= PAGE_SHIFT
	};
	struct ib_pd *pd = r_xprt->rx_ia.ri_pd;
	struct rpcrdma_mw *r;
	int i, rc;

C
Chuck Lever 已提交
68
	spin_lock_init(&buf->rb_mwlock);
C
Chuck Lever 已提交
69 70 71
	INIT_LIST_HEAD(&buf->rb_mws);
	INIT_LIST_HEAD(&buf->rb_all);

72 73 74 75
	i = max_t(int, RPCRDMA_MAX_DATA_SEGS / RPCRDMA_MAX_FMR_SGES, 1);
	i += 2;				/* head + tail */
	i *= buf->rb_max_requests;	/* one set for each RPC slot */
	dprintk("RPC:       %s: initalizing %d FMRs\n", __func__, i);
C
Chuck Lever 已提交
76

77
	rc = -ENOMEM;
C
Chuck Lever 已提交
78 79 80
	while (i--) {
		r = kzalloc(sizeof(*r), GFP_KERNEL);
		if (!r)
81 82
			goto out;

83 84 85
		r->fmr.physaddrs = kmalloc(RPCRDMA_MAX_FMR_SGES *
					   sizeof(u64), GFP_KERNEL);
		if (!r->fmr.physaddrs)
86
			goto out_free;
C
Chuck Lever 已提交
87

88 89
		r->fmr.fmr = ib_alloc_fmr(pd, mr_access_flags, &fmr_attr);
		if (IS_ERR(r->fmr.fmr))
C
Chuck Lever 已提交
90 91 92 93 94 95 96 97
			goto out_fmr_err;

		list_add(&r->mw_list, &buf->rb_mws);
		list_add(&r->mw_all, &buf->rb_all);
	}
	return 0;

out_fmr_err:
98
	rc = PTR_ERR(r->fmr.fmr);
C
Chuck Lever 已提交
99
	dprintk("RPC:       %s: ib_alloc_fmr status %i\n", __func__, rc);
100
	kfree(r->fmr.physaddrs);
101
out_free:
C
Chuck Lever 已提交
102
	kfree(r);
103
out:
C
Chuck Lever 已提交
104 105 106
	return rc;
}

107 108 109 110 111
static int
__fmr_unmap(struct rpcrdma_mw *r)
{
	LIST_HEAD(l);

112
	list_add(&r->fmr.fmr->list, &l);
113 114 115
	return ib_unmap_fmr(&l);
}

116 117 118 119 120 121 122 123
/* Use the ib_map_phys_fmr() verb to register a memory region
 * for remote access via RDMA READ or RDMA WRITE.
 */
static int
fmr_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg,
	   int nsegs, bool writing)
{
	struct rpcrdma_ia *ia = &r_xprt->rx_ia;
124
	struct ib_device *device = ia->ri_device;
125
	enum dma_data_direction direction = rpcrdma_data_dir(writing);
126 127
	struct rpcrdma_mr_seg *seg1 = seg;
	int len, pageoff, i, rc;
128 129 130 131 132 133 134 135 136 137 138 139 140 141
	struct rpcrdma_mw *mw;

	mw = seg1->rl_mw;
	seg1->rl_mw = NULL;
	if (!mw) {
		mw = rpcrdma_get_mw(r_xprt);
		if (!mw)
			return -ENOMEM;
	} else {
		/* this is a retransmit; generate a fresh rkey */
		rc = __fmr_unmap(mw);
		if (rc)
			return rc;
	}
142 143 144 145 146 147 148 149

	pageoff = offset_in_page(seg1->mr_offset);
	seg1->mr_offset -= pageoff;	/* start of page */
	seg1->mr_len += pageoff;
	len = -pageoff;
	if (nsegs > RPCRDMA_MAX_FMR_SGES)
		nsegs = RPCRDMA_MAX_FMR_SGES;
	for (i = 0; i < nsegs;) {
150
		rpcrdma_map_one(device, seg, direction);
151
		mw->fmr.physaddrs[i] = seg->mr_dma;
152 153 154 155 156 157 158 159 160
		len += seg->mr_len;
		++seg;
		++i;
		/* Check for holes */
		if ((i < nsegs && offset_in_page(seg->mr_offset)) ||
		    offset_in_page((seg-1)->mr_offset + (seg-1)->mr_len))
			break;
	}

161
	rc = ib_map_phys_fmr(mw->fmr.fmr, mw->fmr.physaddrs,
162
			     i, seg1->mr_dma);
163 164 165
	if (rc)
		goto out_maperr;

166
	seg1->rl_mw = mw;
167
	seg1->mr_rkey = mw->fmr.fmr->rkey;
168 169 170 171 172 173 174 175 176 177
	seg1->mr_base = seg1->mr_dma + pageoff;
	seg1->mr_nsegs = i;
	seg1->mr_len = len;
	return i;

out_maperr:
	dprintk("RPC:       %s: ib_map_phys_fmr %u@0x%llx+%i (%d) status %i\n",
		__func__, len, (unsigned long long)seg1->mr_dma,
		pageoff, i, rc);
	while (i--)
178
		rpcrdma_unmap_one(device, --seg);
179 180 181
	return rc;
}

182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221
static void
__fmr_dma_unmap(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg)
{
	struct ib_device *device = r_xprt->rx_ia.ri_device;
	struct rpcrdma_mw *mw = seg->rl_mw;
	int nsegs = seg->mr_nsegs;

	seg->rl_mw = NULL;

	while (nsegs--)
		rpcrdma_unmap_one(device, seg++);

	rpcrdma_put_mw(r_xprt, mw);
}

/* Invalidate all memory regions that were registered for "req".
 *
 * Sleeps until it is safe for the host CPU to access the
 * previously mapped memory regions.
 */
static void
fmr_op_unmap_sync(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req)
{
	struct rpcrdma_mr_seg *seg;
	unsigned int i, nchunks;
	struct rpcrdma_mw *mw;
	LIST_HEAD(unmap_list);
	int rc;

	dprintk("RPC:       %s: req %p\n", __func__, req);

	/* ORDER: Invalidate all of the req's MRs first
	 *
	 * ib_unmap_fmr() is slow, so use a single call instead
	 * of one call per mapped MR.
	 */
	for (i = 0, nchunks = req->rl_nchunks; nchunks; nchunks--) {
		seg = &req->rl_segments[i];
		mw = seg->rl_mw;

222
		list_add(&mw->fmr.fmr->list, &unmap_list);
223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244

		i += seg->mr_nsegs;
	}
	rc = ib_unmap_fmr(&unmap_list);
	if (rc)
		pr_warn("%s: ib_unmap_fmr failed (%i)\n", __func__, rc);

	/* ORDER: Now DMA unmap all of the req's MRs, and return
	 * them to the free MW list.
	 */
	for (i = 0, nchunks = req->rl_nchunks; nchunks; nchunks--) {
		seg = &req->rl_segments[i];

		__fmr_dma_unmap(r_xprt, seg);

		i += seg->mr_nsegs;
		seg->mr_nsegs = 0;
	}

	req->rl_nchunks = 0;
}

245 246 247 248 249 250 251 252
/* Use the ib_unmap_fmr() verb to prevent further remote
 * access via RDMA READ or RDMA WRITE.
 */
static int
fmr_op_unmap(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg)
{
	struct rpcrdma_ia *ia = &r_xprt->rx_ia;
	struct rpcrdma_mr_seg *seg1 = seg;
253
	struct rpcrdma_mw *mw = seg1->rl_mw;
254 255
	int rc, nsegs = seg->mr_nsegs;

256 257 258
	dprintk("RPC:       %s: FMR %p\n", __func__, mw);

	seg1->rl_mw = NULL;
259
	while (seg1->mr_nsegs--)
260
		rpcrdma_unmap_one(ia->ri_device, seg++);
261
	rc = __fmr_unmap(mw);
262 263
	if (rc)
		goto out_err;
264
	rpcrdma_put_mw(r_xprt, mw);
265 266 267
	return nsegs;

out_err:
268 269 270
	/* The FMR is abandoned, but remains in rb_all. fmr_op_destroy
	 * will attempt to release it when the transport is destroyed.
	 */
271 272 273 274
	dprintk("RPC:       %s: ib_unmap_fmr status %i\n", __func__, rc);
	return nsegs;
}

275 276 277 278 279 280 281 282 283
static void
fmr_op_destroy(struct rpcrdma_buffer *buf)
{
	struct rpcrdma_mw *r;
	int rc;

	while (!list_empty(&buf->rb_all)) {
		r = list_entry(buf->rb_all.next, struct rpcrdma_mw, mw_all);
		list_del(&r->mw_all);
284
		kfree(r->fmr.physaddrs);
285

286
		rc = ib_dealloc_fmr(r->fmr.fmr);
287 288 289
		if (rc)
			dprintk("RPC:       %s: ib_dealloc_fmr failed %i\n",
				__func__, rc);
290

291 292 293 294
		kfree(r);
	}
}

295
const struct rpcrdma_memreg_ops rpcrdma_fmr_memreg_ops = {
296
	.ro_map				= fmr_op_map,
297
	.ro_unmap_sync			= fmr_op_unmap_sync,
298
	.ro_unmap			= fmr_op_unmap,
C
Chuck Lever 已提交
299
	.ro_open			= fmr_op_open,
300
	.ro_maxpages			= fmr_op_maxpages,
C
Chuck Lever 已提交
301
	.ro_init			= fmr_op_init,
302
	.ro_destroy			= fmr_op_destroy,
303 304
	.ro_displayname			= "fmr",
};