fmr_ops.c 7.4 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13
/*
 * Copyright (c) 2015 Oracle.  All rights reserved.
 * Copyright (c) 2003-2007 Network Appliance, Inc. All rights reserved.
 */

/* Lightweight memory registration using Fast Memory Regions (FMR).
 * Referred to sometimes as MTHCAFMR mode.
 *
 * FMR uses synchronous memory registration and deregistration.
 * FMR registration is known to be fast, but FMR deregistration
 * can take tens of usecs to complete.
 */

14 15 16 17 18 19 20 21 22 23 24 25 26 27 28
/* Normal operation
 *
 * A Memory Region is prepared for RDMA READ or WRITE using the
 * ib_map_phys_fmr verb (fmr_op_map). When the RDMA operation is
 * finished, the Memory Region is unmapped using the ib_unmap_fmr
 * verb (fmr_op_unmap).
 */

/* Transport recovery
 *
 * After a transport reconnect, fmr_op_map re-uses the MR already
 * allocated for the RPC, but generates a fresh rkey then maps the
 * MR again. This process is synchronous.
 */

29 30 31 32 33 34
#include "xprt_rdma.h"

#if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
# define RPCDBG_FACILITY	RPCDBG_TRANS
#endif

35 36 37
/* Maximum scatter/gather per FMR */
#define RPCRDMA_MAX_FMR_SGES	(64)

C
Chuck Lever 已提交
38 39 40 41
static int
fmr_op_open(struct rpcrdma_ia *ia, struct rpcrdma_ep *ep,
	    struct rpcrdma_create_data_internal *cdata)
{
C
Chuck Lever 已提交
42 43 44
	rpcrdma_set_max_header_sizes(ia, cdata, max_t(unsigned int, 1,
						      RPCRDMA_MAX_DATA_SEGS /
						      RPCRDMA_MAX_FMR_SGES));
C
Chuck Lever 已提交
45 46 47
	return 0;
}

48 49 50 51 52 53
/* FMR mode conveys up to 64 pages of payload per chunk segment.
 */
static size_t
fmr_op_maxpages(struct rpcrdma_xprt *r_xprt)
{
	return min_t(unsigned int, RPCRDMA_MAX_DATA_SEGS,
54
		     RPCRDMA_MAX_HDR_SEGS * RPCRDMA_MAX_FMR_SGES);
55 56
}

C
Chuck Lever 已提交
57 58 59 60 61 62 63 64 65 66 67 68 69 70
static int
fmr_op_init(struct rpcrdma_xprt *r_xprt)
{
	struct rpcrdma_buffer *buf = &r_xprt->rx_buf;
	int mr_access_flags = IB_ACCESS_REMOTE_WRITE | IB_ACCESS_REMOTE_READ;
	struct ib_fmr_attr fmr_attr = {
		.max_pages	= RPCRDMA_MAX_FMR_SGES,
		.max_maps	= 1,
		.page_shift	= PAGE_SHIFT
	};
	struct ib_pd *pd = r_xprt->rx_ia.ri_pd;
	struct rpcrdma_mw *r;
	int i, rc;

C
Chuck Lever 已提交
71
	spin_lock_init(&buf->rb_mwlock);
C
Chuck Lever 已提交
72 73 74
	INIT_LIST_HEAD(&buf->rb_mws);
	INIT_LIST_HEAD(&buf->rb_all);

75 76 77 78
	i = max_t(int, RPCRDMA_MAX_DATA_SEGS / RPCRDMA_MAX_FMR_SGES, 1);
	i += 2;				/* head + tail */
	i *= buf->rb_max_requests;	/* one set for each RPC slot */
	dprintk("RPC:       %s: initalizing %d FMRs\n", __func__, i);
C
Chuck Lever 已提交
79

80
	rc = -ENOMEM;
C
Chuck Lever 已提交
81 82 83
	while (i--) {
		r = kzalloc(sizeof(*r), GFP_KERNEL);
		if (!r)
84 85
			goto out;

86 87 88
		r->fmr.physaddrs = kmalloc(RPCRDMA_MAX_FMR_SGES *
					   sizeof(u64), GFP_KERNEL);
		if (!r->fmr.physaddrs)
89
			goto out_free;
C
Chuck Lever 已提交
90

91 92
		r->fmr.fmr = ib_alloc_fmr(pd, mr_access_flags, &fmr_attr);
		if (IS_ERR(r->fmr.fmr))
C
Chuck Lever 已提交
93 94 95 96 97 98 99 100
			goto out_fmr_err;

		list_add(&r->mw_list, &buf->rb_mws);
		list_add(&r->mw_all, &buf->rb_all);
	}
	return 0;

out_fmr_err:
101
	rc = PTR_ERR(r->fmr.fmr);
C
Chuck Lever 已提交
102
	dprintk("RPC:       %s: ib_alloc_fmr status %i\n", __func__, rc);
103
	kfree(r->fmr.physaddrs);
104
out_free:
C
Chuck Lever 已提交
105
	kfree(r);
106
out:
C
Chuck Lever 已提交
107 108 109
	return rc;
}

110 111 112 113 114
static int
__fmr_unmap(struct rpcrdma_mw *r)
{
	LIST_HEAD(l);

115
	list_add(&r->fmr.fmr->list, &l);
116 117 118
	return ib_unmap_fmr(&l);
}

119 120 121 122 123 124 125 126
/* Use the ib_map_phys_fmr() verb to register a memory region
 * for remote access via RDMA READ or RDMA WRITE.
 */
static int
fmr_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg,
	   int nsegs, bool writing)
{
	struct rpcrdma_ia *ia = &r_xprt->rx_ia;
127
	struct ib_device *device = ia->ri_device;
128
	enum dma_data_direction direction = rpcrdma_data_dir(writing);
129 130
	struct rpcrdma_mr_seg *seg1 = seg;
	int len, pageoff, i, rc;
131 132 133 134 135 136 137 138 139 140 141 142 143 144
	struct rpcrdma_mw *mw;

	mw = seg1->rl_mw;
	seg1->rl_mw = NULL;
	if (!mw) {
		mw = rpcrdma_get_mw(r_xprt);
		if (!mw)
			return -ENOMEM;
	} else {
		/* this is a retransmit; generate a fresh rkey */
		rc = __fmr_unmap(mw);
		if (rc)
			return rc;
	}
145 146 147 148 149 150 151 152

	pageoff = offset_in_page(seg1->mr_offset);
	seg1->mr_offset -= pageoff;	/* start of page */
	seg1->mr_len += pageoff;
	len = -pageoff;
	if (nsegs > RPCRDMA_MAX_FMR_SGES)
		nsegs = RPCRDMA_MAX_FMR_SGES;
	for (i = 0; i < nsegs;) {
153
		rpcrdma_map_one(device, seg, direction);
154
		mw->fmr.physaddrs[i] = seg->mr_dma;
155 156 157 158 159 160 161 162 163
		len += seg->mr_len;
		++seg;
		++i;
		/* Check for holes */
		if ((i < nsegs && offset_in_page(seg->mr_offset)) ||
		    offset_in_page((seg-1)->mr_offset + (seg-1)->mr_len))
			break;
	}

164
	rc = ib_map_phys_fmr(mw->fmr.fmr, mw->fmr.physaddrs,
165
			     i, seg1->mr_dma);
166 167 168
	if (rc)
		goto out_maperr;

169
	seg1->rl_mw = mw;
170
	seg1->mr_rkey = mw->fmr.fmr->rkey;
171 172 173 174 175 176 177 178 179 180
	seg1->mr_base = seg1->mr_dma + pageoff;
	seg1->mr_nsegs = i;
	seg1->mr_len = len;
	return i;

out_maperr:
	dprintk("RPC:       %s: ib_map_phys_fmr %u@0x%llx+%i (%d) status %i\n",
		__func__, len, (unsigned long long)seg1->mr_dma,
		pageoff, i, rc);
	while (i--)
181
		rpcrdma_unmap_one(device, --seg);
182 183 184
	return rc;
}

185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224
static void
__fmr_dma_unmap(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg)
{
	struct ib_device *device = r_xprt->rx_ia.ri_device;
	struct rpcrdma_mw *mw = seg->rl_mw;
	int nsegs = seg->mr_nsegs;

	seg->rl_mw = NULL;

	while (nsegs--)
		rpcrdma_unmap_one(device, seg++);

	rpcrdma_put_mw(r_xprt, mw);
}

/* Invalidate all memory regions that were registered for "req".
 *
 * Sleeps until it is safe for the host CPU to access the
 * previously mapped memory regions.
 */
static void
fmr_op_unmap_sync(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req)
{
	struct rpcrdma_mr_seg *seg;
	unsigned int i, nchunks;
	struct rpcrdma_mw *mw;
	LIST_HEAD(unmap_list);
	int rc;

	dprintk("RPC:       %s: req %p\n", __func__, req);

	/* ORDER: Invalidate all of the req's MRs first
	 *
	 * ib_unmap_fmr() is slow, so use a single call instead
	 * of one call per mapped MR.
	 */
	for (i = 0, nchunks = req->rl_nchunks; nchunks; nchunks--) {
		seg = &req->rl_segments[i];
		mw = seg->rl_mw;

225
		list_add(&mw->fmr.fmr->list, &unmap_list);
226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247

		i += seg->mr_nsegs;
	}
	rc = ib_unmap_fmr(&unmap_list);
	if (rc)
		pr_warn("%s: ib_unmap_fmr failed (%i)\n", __func__, rc);

	/* ORDER: Now DMA unmap all of the req's MRs, and return
	 * them to the free MW list.
	 */
	for (i = 0, nchunks = req->rl_nchunks; nchunks; nchunks--) {
		seg = &req->rl_segments[i];

		__fmr_dma_unmap(r_xprt, seg);

		i += seg->mr_nsegs;
		seg->mr_nsegs = 0;
	}

	req->rl_nchunks = 0;
}

248 249 250 251 252 253 254 255
/* Use the ib_unmap_fmr() verb to prevent further remote
 * access via RDMA READ or RDMA WRITE.
 */
static int
fmr_op_unmap(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg)
{
	struct rpcrdma_ia *ia = &r_xprt->rx_ia;
	struct rpcrdma_mr_seg *seg1 = seg;
256
	struct rpcrdma_mw *mw = seg1->rl_mw;
257 258
	int rc, nsegs = seg->mr_nsegs;

259 260 261
	dprintk("RPC:       %s: FMR %p\n", __func__, mw);

	seg1->rl_mw = NULL;
262
	while (seg1->mr_nsegs--)
263
		rpcrdma_unmap_one(ia->ri_device, seg++);
264
	rc = __fmr_unmap(mw);
265 266
	if (rc)
		goto out_err;
267
	rpcrdma_put_mw(r_xprt, mw);
268 269 270
	return nsegs;

out_err:
271 272 273
	/* The FMR is abandoned, but remains in rb_all. fmr_op_destroy
	 * will attempt to release it when the transport is destroyed.
	 */
274 275 276 277
	dprintk("RPC:       %s: ib_unmap_fmr status %i\n", __func__, rc);
	return nsegs;
}

278 279 280 281 282 283 284 285 286
static void
fmr_op_destroy(struct rpcrdma_buffer *buf)
{
	struct rpcrdma_mw *r;
	int rc;

	while (!list_empty(&buf->rb_all)) {
		r = list_entry(buf->rb_all.next, struct rpcrdma_mw, mw_all);
		list_del(&r->mw_all);
287
		kfree(r->fmr.physaddrs);
288

289
		rc = ib_dealloc_fmr(r->fmr.fmr);
290 291 292
		if (rc)
			dprintk("RPC:       %s: ib_dealloc_fmr failed %i\n",
				__func__, rc);
293

294 295 296 297
		kfree(r);
	}
}

298
const struct rpcrdma_memreg_ops rpcrdma_fmr_memreg_ops = {
299
	.ro_map				= fmr_op_map,
300
	.ro_unmap_sync			= fmr_op_unmap_sync,
301
	.ro_unmap			= fmr_op_unmap,
C
Chuck Lever 已提交
302
	.ro_open			= fmr_op_open,
303
	.ro_maxpages			= fmr_op_maxpages,
C
Chuck Lever 已提交
304
	.ro_init			= fmr_op_init,
305
	.ro_destroy			= fmr_op_destroy,
306 307
	.ro_displayname			= "fmr",
};