filelayoutdev.c 8.4 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32
/*
 *  Device operations for the pnfs nfs4 file layout driver.
 *
 *  Copyright (c) 2002
 *  The Regents of the University of Michigan
 *  All Rights Reserved
 *
 *  Dean Hildebrand <dhildebz@umich.edu>
 *  Garth Goodson   <Garth.Goodson@netapp.com>
 *
 *  Permission is granted to use, copy, create derivative works, and
 *  redistribute this software and such derivative works for any purpose,
 *  so long as the name of the University of Michigan is not used in
 *  any advertising or publicity pertaining to the use or distribution
 *  of this software without specific, written prior authorization. If
 *  the above copyright notice or any other identification of the
 *  University of Michigan is included in any copy of any portion of
 *  this software, then the disclaimer below must also be included.
 *
 *  This software is provided as is, without representation or warranty
 *  of any kind either express or implied, including without limitation
 *  the implied warranties of merchantability, fitness for a particular
 *  purpose, or noninfringement.  The Regents of the University of
 *  Michigan shall not be liable for any damages, including special,
 *  indirect, incidental, or consequential damages, with respect to any
 *  claim arising out of or in connection with the use of the software,
 *  even if it has been or is hereafter advised of the possibility of
 *  such damages.
 */

#include <linux/nfs_fs.h>
#include <linux/vmalloc.h>
33
#include <linux/module.h>
34

35 36 37
#include "../internal.h"
#include "../nfs4session.h"
#include "filelayout.h"
38 39 40

#define NFSDBG_FACILITY		NFSDBG_PNFS_LD

41 42 43
static unsigned int dataserver_timeo = NFS4_DEF_DS_TIMEO;
static unsigned int dataserver_retrans = NFS4_DEF_DS_RETRANS;

44
void
45 46 47 48 49
nfs4_fl_free_deviceid(struct nfs4_file_layout_dsaddr *dsaddr)
{
	struct nfs4_pnfs_ds *ds;
	int i;

50
	nfs4_print_deviceid(&dsaddr->id_node.deviceid);
51 52 53

	for (i = 0; i < dsaddr->ds_num; i++) {
		ds = dsaddr->ds_list[i];
54 55
		if (ds != NULL)
			nfs4_pnfs_ds_put(ds);
56 57
	}
	kfree(dsaddr->stripe_indices);
58
	kfree_rcu(dsaddr, id_node.rcu);
59 60 61
}

/* Decode opaque device data and return the result */
62 63 64
struct nfs4_file_layout_dsaddr *
nfs4_fl_alloc_deviceid_node(struct nfs_server *server, struct pnfs_device *pdev,
		gfp_t gfp_flags)
65
{
66
	int i;
67 68
	u32 cnt, num;
	u8 *indexp;
69 70 71 72 73
	__be32 *p;
	u8 *stripe_indices;
	u8 max_stripe_index;
	struct nfs4_file_layout_dsaddr *dsaddr = NULL;
	struct xdr_stream stream;
74
	struct xdr_buf buf;
75
	struct page *scratch;
76 77
	struct list_head dsaddrs;
	struct nfs4_pnfs_ds_addr *da;
78 79

	/* set up xdr stream */
80
	scratch = alloc_page(gfp_flags);
81 82 83
	if (!scratch)
		goto out_err;

84
	xdr_init_decode_pages(&stream, &buf, pdev->pages, pdev->pglen);
85
	xdr_set_scratch_buffer(&stream, page_address(scratch), PAGE_SIZE);
86 87

	/* Get the stripe count (number of stripe index) */
88 89 90 91 92
	p = xdr_inline_decode(&stream, 4);
	if (unlikely(!p))
		goto out_err_free_scratch;

	cnt = be32_to_cpup(p);
93 94
	dprintk("%s stripe count  %d\n", __func__, cnt);
	if (cnt > NFS4_PNFS_MAX_STRIPE_CNT) {
95
		printk(KERN_WARNING "NFS: %s: stripe count %d greater than "
96 97
		       "supported maximum %d\n", __func__,
			cnt, NFS4_PNFS_MAX_STRIPE_CNT);
98 99 100 101
		goto out_err_free_scratch;
	}

	/* read stripe indices */
102
	stripe_indices = kcalloc(cnt, sizeof(u8), gfp_flags);
103 104 105 106 107 108 109 110 111 112 113 114 115
	if (!stripe_indices)
		goto out_err_free_scratch;

	p = xdr_inline_decode(&stream, cnt << 2);
	if (unlikely(!p))
		goto out_err_free_stripe_indices;

	indexp = &stripe_indices[0];
	max_stripe_index = 0;
	for (i = 0; i < cnt; i++) {
		*indexp = be32_to_cpup(p++);
		max_stripe_index = max(max_stripe_index, *indexp);
		indexp++;
116 117 118
	}

	/* Check the multipath list count */
119 120 121 122 123
	p = xdr_inline_decode(&stream, 4);
	if (unlikely(!p))
		goto out_err_free_stripe_indices;

	num = be32_to_cpup(p);
124 125
	dprintk("%s ds_num %u\n", __func__, num);
	if (num > NFS4_PNFS_MAX_MULTI_CNT) {
126
		printk(KERN_WARNING "NFS: %s: multipath count %d greater than "
127 128
			"supported maximum %d\n", __func__,
			num, NFS4_PNFS_MAX_MULTI_CNT);
129
		goto out_err_free_stripe_indices;
130
	}
131 132 133

	/* validate stripe indices are all < num */
	if (max_stripe_index >= num) {
134
		printk(KERN_WARNING "NFS: %s: stripe index %u >= num ds %u\n",
135 136 137 138
			__func__, max_stripe_index, num);
		goto out_err_free_stripe_indices;
	}

139 140
	dsaddr = kzalloc(sizeof(*dsaddr) +
			(sizeof(struct nfs4_pnfs_ds *) * (num - 1)),
141
			gfp_flags);
142
	if (!dsaddr)
143
		goto out_err_free_stripe_indices;
144 145

	dsaddr->stripe_count = cnt;
146 147
	dsaddr->stripe_indices = stripe_indices;
	stripe_indices = NULL;
148
	dsaddr->ds_num = num;
149
	nfs4_init_deviceid_node(&dsaddr->id_node, server, &pdev->dev_id);
150

151 152
	INIT_LIST_HEAD(&dsaddrs);

153 154
	for (i = 0; i < dsaddr->ds_num; i++) {
		int j;
155 156 157 158 159
		u32 mp_count;

		p = xdr_inline_decode(&stream, 4);
		if (unlikely(!p))
			goto out_err_free_deviceid;
160

161 162
		mp_count = be32_to_cpup(p); /* multipath count */
		for (j = 0; j < mp_count; j++) {
163 164
			da = nfs4_decode_mp_ds_addr(server->nfs_client->cl_net,
						    &stream, gfp_flags);
165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185
			if (da)
				list_add_tail(&da->da_node, &dsaddrs);
		}
		if (list_empty(&dsaddrs)) {
			dprintk("%s: no suitable DS addresses found\n",
				__func__);
			goto out_err_free_deviceid;
		}

		dsaddr->ds_list[i] = nfs4_pnfs_ds_add(&dsaddrs, gfp_flags);
		if (!dsaddr->ds_list[i])
			goto out_err_drain_dsaddrs;

		/* If DS was already in cache, free ds addrs */
		while (!list_empty(&dsaddrs)) {
			da = list_first_entry(&dsaddrs,
					      struct nfs4_pnfs_ds_addr,
					      da_node);
			list_del_init(&da->da_node);
			kfree(da->da_remotestr);
			kfree(da);
186 187
		}
	}
188 189

	__free_page(scratch);
190 191
	return dsaddr;

192 193 194 195 196 197 198 199
out_err_drain_dsaddrs:
	while (!list_empty(&dsaddrs)) {
		da = list_first_entry(&dsaddrs, struct nfs4_pnfs_ds_addr,
				      da_node);
		list_del_init(&da->da_node);
		kfree(da->da_remotestr);
		kfree(da);
	}
200
out_err_free_deviceid:
201
	nfs4_fl_free_deviceid(dsaddr);
202 203 204 205 206 207
	/* stripe_indicies was part of dsaddr */
	goto out_err_free_scratch;
out_err_free_stripe_indices:
	kfree(stripe_indices);
out_err_free_scratch:
	__free_page(scratch);
208 209 210 211 212
out_err:
	dprintk("%s ERROR: returning NULL\n", __func__);
	return NULL;
}

213 214
void
nfs4_fl_put_deviceid(struct nfs4_file_layout_dsaddr *dsaddr)
215
{
216
	nfs4_put_deviceid_node(&dsaddr->id_node);
217
}
F
Fred Isaman 已提交
218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259

/*
 * Want res = (offset - layout->pattern_offset)/ layout->stripe_unit
 * Then: ((res + fsi) % dsaddr->stripe_count)
 */
u32
nfs4_fl_calc_j_index(struct pnfs_layout_segment *lseg, loff_t offset)
{
	struct nfs4_filelayout_segment *flseg = FILELAYOUT_LSEG(lseg);
	u64 tmp;

	tmp = offset - flseg->pattern_offset;
	do_div(tmp, flseg->stripe_unit);
	tmp += flseg->first_stripe_index;
	return do_div(tmp, flseg->dsaddr->stripe_count);
}

u32
nfs4_fl_calc_ds_index(struct pnfs_layout_segment *lseg, u32 j)
{
	return FILELAYOUT_LSEG(lseg)->dsaddr->stripe_indices[j];
}

struct nfs_fh *
nfs4_fl_select_ds_fh(struct pnfs_layout_segment *lseg, u32 j)
{
	struct nfs4_filelayout_segment *flseg = FILELAYOUT_LSEG(lseg);
	u32 i;

	if (flseg->stripe_type == STRIPE_SPARSE) {
		if (flseg->num_fh == 1)
			i = 0;
		else if (flseg->num_fh == 0)
			/* Use the MDS OPEN fh set in nfs_read_rpcsetup */
			return NULL;
		else
			i = nfs4_fl_calc_ds_index(lseg, j);
	} else
		i = j;
	return flseg->fh_array[i];
}

260
/* Upon return, either ds is connected, or ds is NULL */
F
Fred Isaman 已提交
261 262 263 264 265
struct nfs4_pnfs_ds *
nfs4_fl_prepare_ds(struct pnfs_layout_segment *lseg, u32 ds_idx)
{
	struct nfs4_file_layout_dsaddr *dsaddr = FILELAYOUT_LSEG(lseg)->dsaddr;
	struct nfs4_pnfs_ds *ds = dsaddr->ds_list[ds_idx];
266
	struct nfs4_deviceid_node *devid = FILELAYOUT_DEVID_NODE(lseg);
267
	struct nfs4_pnfs_ds *ret = ds;
268
	struct nfs_server *s = NFS_SERVER(lseg->pls_layout->plh_inode);
269
	int status;
F
Fred Isaman 已提交
270 271

	if (ds == NULL) {
272
		printk(KERN_ERR "NFS: %s: No data server for offset index %d\n",
F
Fred Isaman 已提交
273
			__func__, ds_idx);
274
		pnfs_generic_mark_devid_invalid(devid);
275
		goto out;
F
Fred Isaman 已提交
276
	}
277
	smp_rmb();
278
	if (ds->ds_clp)
279
		goto out_test_devid;
F
Fred Isaman 已提交
280

281
	status = nfs4_pnfs_ds_connect(s, ds, devid, dataserver_timeo,
282
			     dataserver_retrans, 4,
283
			     s->nfs_client->cl_minorversion);
284
	if (status) {
285
		nfs4_mark_deviceid_unavailable(devid);
286 287 288
		ret = NULL;
		goto out;
	}
289

290
out_test_devid:
291 292
	if (ret->ds_clp == NULL ||
	    filelayout_test_devid_unavailable(devid))
293 294 295
		ret = NULL;
out:
	return ret;
F
Fred Isaman 已提交
296
}
297 298 299 300 301 302 303 304 305

module_param(dataserver_retrans, uint, 0644);
MODULE_PARM_DESC(dataserver_retrans, "The  number of times the NFSv4.1 client "
			"retries a request before it attempts further "
			" recovery  action.");
module_param(dataserver_timeo, uint, 0644);
MODULE_PARM_DESC(dataserver_timeo, "The time (in tenths of a second) the "
			"NFSv4.1  client  waits for a response from a "
			" data server before it retries an NFS request.");