filelayoutdev.c 9.8 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32
/*
 *  Device operations for the pnfs nfs4 file layout driver.
 *
 *  Copyright (c) 2002
 *  The Regents of the University of Michigan
 *  All Rights Reserved
 *
 *  Dean Hildebrand <dhildebz@umich.edu>
 *  Garth Goodson   <Garth.Goodson@netapp.com>
 *
 *  Permission is granted to use, copy, create derivative works, and
 *  redistribute this software and such derivative works for any purpose,
 *  so long as the name of the University of Michigan is not used in
 *  any advertising or publicity pertaining to the use or distribution
 *  of this software without specific, written prior authorization. If
 *  the above copyright notice or any other identification of the
 *  University of Michigan is included in any copy of any portion of
 *  this software, then the disclaimer below must also be included.
 *
 *  This software is provided as is, without representation or warranty
 *  of any kind either express or implied, including without limitation
 *  the implied warranties of merchantability, fitness for a particular
 *  purpose, or noninfringement.  The Regents of the University of
 *  Michigan shall not be liable for any damages, including special,
 *  indirect, incidental, or consequential damages, with respect to any
 *  claim arising out of or in connection with the use of the software,
 *  even if it has been or is hereafter advised of the possibility of
 *  such damages.
 */

#include <linux/nfs_fs.h>
#include <linux/vmalloc.h>
33
#include <linux/module.h>
34

35 36 37
#include "../internal.h"
#include "../nfs4session.h"
#include "filelayout.h"
38 39 40

#define NFSDBG_FACILITY		NFSDBG_PNFS_LD

41 42 43
static unsigned int dataserver_timeo = NFS4_DEF_DS_TIMEO;
static unsigned int dataserver_retrans = NFS4_DEF_DS_RETRANS;

A
Andy Adamson 已提交
44 45
/*
 * Create an rpc connection to the nfs4_pnfs_ds data server
W
Weston Andros Adamson 已提交
46
 * Currently only supports IPv4 and IPv6 addresses
A
Andy Adamson 已提交
47 48 49 50
 */
static int
nfs4_ds_connect(struct nfs_server *mds_srv, struct nfs4_pnfs_ds *ds)
{
51
	struct nfs_client *clp = ERR_PTR(-EIO);
52
	struct nfs4_pnfs_ds_addr *da;
A
Andy Adamson 已提交
53 54
	int status = 0;

55
	dprintk("--> %s DS %s au_flavor %d\n", __func__, ds->ds_remotestr,
A
Andy Adamson 已提交
56 57
		mds_srv->nfs_client->cl_rpcclient->cl_auth->au_flavor);

58 59 60
	list_for_each_entry(da, &ds->ds_addrs, da_node) {
		dprintk("%s: DS %s: trying address %s\n",
			__func__, ds->ds_remotestr, da->da_remotestr);
61

62
		clp = nfs4_set_ds_client(mds_srv->nfs_client,
63 64 65
					(struct sockaddr *)&da->da_addr,
					da->da_addrlen, IPPROTO_TCP,
					dataserver_timeo, dataserver_retrans);
66 67 68 69
		if (!IS_ERR(clp))
			break;
	}

A
Andy Adamson 已提交
70 71 72 73 74
	if (IS_ERR(clp)) {
		status = PTR_ERR(clp);
		goto out;
	}

75
	status = nfs4_init_ds_session(clp, mds_srv->nfs_client->cl_lease_time);
A
Andy Adamson 已提交
76 77 78
	if (status)
		goto out_put;

79
	smp_wmb();
A
Andy Adamson 已提交
80
	ds->ds_clp = clp;
W
Weston Andros Adamson 已提交
81
	dprintk("%s [new] addr: %s\n", __func__, ds->ds_remotestr);
A
Andy Adamson 已提交
82 83 84 85 86 87 88
out:
	return status;
out_put:
	nfs_put_client(clp);
	goto out;
}

89
void
90 91 92 93 94
nfs4_fl_free_deviceid(struct nfs4_file_layout_dsaddr *dsaddr)
{
	struct nfs4_pnfs_ds *ds;
	int i;

95
	nfs4_print_deviceid(&dsaddr->id_node.deviceid);
96 97 98

	for (i = 0; i < dsaddr->ds_num; i++) {
		ds = dsaddr->ds_list[i];
99 100
		if (ds != NULL)
			nfs4_pnfs_ds_put(ds);
101 102 103 104 105 106
	}
	kfree(dsaddr->stripe_indices);
	kfree(dsaddr);
}

/* Decode opaque device data and return the result */
107 108 109
struct nfs4_file_layout_dsaddr *
nfs4_fl_alloc_deviceid_node(struct nfs_server *server, struct pnfs_device *pdev,
		gfp_t gfp_flags)
110
{
111
	int i;
112 113
	u32 cnt, num;
	u8 *indexp;
114 115 116 117 118
	__be32 *p;
	u8 *stripe_indices;
	u8 max_stripe_index;
	struct nfs4_file_layout_dsaddr *dsaddr = NULL;
	struct xdr_stream stream;
119
	struct xdr_buf buf;
120
	struct page *scratch;
121 122
	struct list_head dsaddrs;
	struct nfs4_pnfs_ds_addr *da;
123 124

	/* set up xdr stream */
125
	scratch = alloc_page(gfp_flags);
126 127 128
	if (!scratch)
		goto out_err;

129
	xdr_init_decode_pages(&stream, &buf, pdev->pages, pdev->pglen);
130
	xdr_set_scratch_buffer(&stream, page_address(scratch), PAGE_SIZE);
131 132

	/* Get the stripe count (number of stripe index) */
133 134 135 136 137
	p = xdr_inline_decode(&stream, 4);
	if (unlikely(!p))
		goto out_err_free_scratch;

	cnt = be32_to_cpup(p);
138 139
	dprintk("%s stripe count  %d\n", __func__, cnt);
	if (cnt > NFS4_PNFS_MAX_STRIPE_CNT) {
140
		printk(KERN_WARNING "NFS: %s: stripe count %d greater than "
141 142
		       "supported maximum %d\n", __func__,
			cnt, NFS4_PNFS_MAX_STRIPE_CNT);
143 144 145 146
		goto out_err_free_scratch;
	}

	/* read stripe indices */
147
	stripe_indices = kcalloc(cnt, sizeof(u8), gfp_flags);
148 149 150 151 152 153 154 155 156 157 158 159 160
	if (!stripe_indices)
		goto out_err_free_scratch;

	p = xdr_inline_decode(&stream, cnt << 2);
	if (unlikely(!p))
		goto out_err_free_stripe_indices;

	indexp = &stripe_indices[0];
	max_stripe_index = 0;
	for (i = 0; i < cnt; i++) {
		*indexp = be32_to_cpup(p++);
		max_stripe_index = max(max_stripe_index, *indexp);
		indexp++;
161 162 163
	}

	/* Check the multipath list count */
164 165 166 167 168
	p = xdr_inline_decode(&stream, 4);
	if (unlikely(!p))
		goto out_err_free_stripe_indices;

	num = be32_to_cpup(p);
169 170
	dprintk("%s ds_num %u\n", __func__, num);
	if (num > NFS4_PNFS_MAX_MULTI_CNT) {
171
		printk(KERN_WARNING "NFS: %s: multipath count %d greater than "
172 173
			"supported maximum %d\n", __func__,
			num, NFS4_PNFS_MAX_MULTI_CNT);
174
		goto out_err_free_stripe_indices;
175
	}
176 177 178

	/* validate stripe indices are all < num */
	if (max_stripe_index >= num) {
179
		printk(KERN_WARNING "NFS: %s: stripe index %u >= num ds %u\n",
180 181 182 183
			__func__, max_stripe_index, num);
		goto out_err_free_stripe_indices;
	}

184 185
	dsaddr = kzalloc(sizeof(*dsaddr) +
			(sizeof(struct nfs4_pnfs_ds *) * (num - 1)),
186
			gfp_flags);
187
	if (!dsaddr)
188
		goto out_err_free_stripe_indices;
189 190

	dsaddr->stripe_count = cnt;
191 192
	dsaddr->stripe_indices = stripe_indices;
	stripe_indices = NULL;
193
	dsaddr->ds_num = num;
194
	nfs4_init_deviceid_node(&dsaddr->id_node, server, &pdev->dev_id);
195

196 197
	INIT_LIST_HEAD(&dsaddrs);

198 199
	for (i = 0; i < dsaddr->ds_num; i++) {
		int j;
200 201 202 203 204
		u32 mp_count;

		p = xdr_inline_decode(&stream, 4);
		if (unlikely(!p))
			goto out_err_free_deviceid;
205

206 207
		mp_count = be32_to_cpup(p); /* multipath count */
		for (j = 0; j < mp_count; j++) {
208 209
			da = nfs4_decode_mp_ds_addr(server->nfs_client->cl_net,
						    &stream, gfp_flags);
210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230
			if (da)
				list_add_tail(&da->da_node, &dsaddrs);
		}
		if (list_empty(&dsaddrs)) {
			dprintk("%s: no suitable DS addresses found\n",
				__func__);
			goto out_err_free_deviceid;
		}

		dsaddr->ds_list[i] = nfs4_pnfs_ds_add(&dsaddrs, gfp_flags);
		if (!dsaddr->ds_list[i])
			goto out_err_drain_dsaddrs;

		/* If DS was already in cache, free ds addrs */
		while (!list_empty(&dsaddrs)) {
			da = list_first_entry(&dsaddrs,
					      struct nfs4_pnfs_ds_addr,
					      da_node);
			list_del_init(&da->da_node);
			kfree(da->da_remotestr);
			kfree(da);
231 232
		}
	}
233 234

	__free_page(scratch);
235 236
	return dsaddr;

237 238 239 240 241 242 243 244
out_err_drain_dsaddrs:
	while (!list_empty(&dsaddrs)) {
		da = list_first_entry(&dsaddrs, struct nfs4_pnfs_ds_addr,
				      da_node);
		list_del_init(&da->da_node);
		kfree(da->da_remotestr);
		kfree(da);
	}
245
out_err_free_deviceid:
246
	nfs4_fl_free_deviceid(dsaddr);
247 248 249 250 251 252
	/* stripe_indicies was part of dsaddr */
	goto out_err_free_scratch;
out_err_free_stripe_indices:
	kfree(stripe_indices);
out_err_free_scratch:
	__free_page(scratch);
253 254 255 256 257
out_err:
	dprintk("%s ERROR: returning NULL\n", __func__);
	return NULL;
}

258 259
void
nfs4_fl_put_deviceid(struct nfs4_file_layout_dsaddr *dsaddr)
260
{
261
	nfs4_put_deviceid_node(&dsaddr->id_node);
262
}
F
Fred Isaman 已提交
263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304

/*
 * Want res = (offset - layout->pattern_offset)/ layout->stripe_unit
 * Then: ((res + fsi) % dsaddr->stripe_count)
 */
u32
nfs4_fl_calc_j_index(struct pnfs_layout_segment *lseg, loff_t offset)
{
	struct nfs4_filelayout_segment *flseg = FILELAYOUT_LSEG(lseg);
	u64 tmp;

	tmp = offset - flseg->pattern_offset;
	do_div(tmp, flseg->stripe_unit);
	tmp += flseg->first_stripe_index;
	return do_div(tmp, flseg->dsaddr->stripe_count);
}

u32
nfs4_fl_calc_ds_index(struct pnfs_layout_segment *lseg, u32 j)
{
	return FILELAYOUT_LSEG(lseg)->dsaddr->stripe_indices[j];
}

struct nfs_fh *
nfs4_fl_select_ds_fh(struct pnfs_layout_segment *lseg, u32 j)
{
	struct nfs4_filelayout_segment *flseg = FILELAYOUT_LSEG(lseg);
	u32 i;

	if (flseg->stripe_type == STRIPE_SPARSE) {
		if (flseg->num_fh == 1)
			i = 0;
		else if (flseg->num_fh == 0)
			/* Use the MDS OPEN fh set in nfs_read_rpcsetup */
			return NULL;
		else
			i = nfs4_fl_calc_ds_index(lseg, j);
	} else
		i = j;
	return flseg->fh_array[i];
}

305 306 307
static void nfs4_wait_ds_connect(struct nfs4_pnfs_ds *ds)
{
	might_sleep();
308 309
	wait_on_bit_action(&ds->ds_state, NFS4DS_CONNECTING,
			   nfs_wait_bit_killable, TASK_KILLABLE);
310 311 312 313
}

static void nfs4_clear_ds_conn_bit(struct nfs4_pnfs_ds *ds)
{
314
	smp_mb__before_atomic();
315
	clear_bit(NFS4DS_CONNECTING, &ds->ds_state);
316
	smp_mb__after_atomic();
317 318 319 320
	wake_up_bit(&ds->ds_state, NFS4DS_CONNECTING);
}


F
Fred Isaman 已提交
321 322 323 324 325
struct nfs4_pnfs_ds *
nfs4_fl_prepare_ds(struct pnfs_layout_segment *lseg, u32 ds_idx)
{
	struct nfs4_file_layout_dsaddr *dsaddr = FILELAYOUT_LSEG(lseg)->dsaddr;
	struct nfs4_pnfs_ds *ds = dsaddr->ds_list[ds_idx];
326
	struct nfs4_deviceid_node *devid = FILELAYOUT_DEVID_NODE(lseg);
327
	struct nfs4_pnfs_ds *ret = ds;
F
Fred Isaman 已提交
328 329

	if (ds == NULL) {
330
		printk(KERN_ERR "NFS: %s: No data server for offset index %d\n",
F
Fred Isaman 已提交
331
			__func__, ds_idx);
332
		pnfs_generic_mark_devid_invalid(devid);
333
		goto out;
F
Fred Isaman 已提交
334
	}
335
	smp_rmb();
336
	if (ds->ds_clp)
337
		goto out_test_devid;
F
Fred Isaman 已提交
338

339
	if (test_and_set_bit(NFS4DS_CONNECTING, &ds->ds_state) == 0) {
340
		struct nfs_server *s = NFS_SERVER(lseg->pls_layout->plh_inode);
F
Fred Isaman 已提交
341 342
		int err;

343
		err = nfs4_ds_connect(s, ds);
344
		if (err)
345
			nfs4_mark_deviceid_unavailable(devid);
346 347 348 349
		nfs4_clear_ds_conn_bit(ds);
	} else {
		/* Either ds is connected, or ds is NULL */
		nfs4_wait_ds_connect(ds);
F
Fred Isaman 已提交
350
	}
351 352 353 354 355
out_test_devid:
	if (filelayout_test_devid_unavailable(devid))
		ret = NULL;
out:
	return ret;
F
Fred Isaman 已提交
356
}
357 358 359 360 361 362 363 364 365

module_param(dataserver_retrans, uint, 0644);
MODULE_PARM_DESC(dataserver_retrans, "The  number of times the NFSv4.1 client "
			"retries a request before it attempts further "
			" recovery  action.");
module_param(dataserver_timeo, uint, 0644);
MODULE_PARM_DESC(dataserver_timeo, "The time (in tenths of a second) the "
			"NFSv4.1  client  waits for a response from a "
			" data server before it retries an NFS request.");