nfs4state.c 165.5 KB
Newer Older
L
Linus Torvalds 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34
/*
*  Copyright (c) 2001 The Regents of the University of Michigan.
*  All rights reserved.
*
*  Kendrick Smith <kmsmith@umich.edu>
*  Andy Adamson <kandros@umich.edu>
*
*  Redistribution and use in source and binary forms, with or without
*  modification, are permitted provided that the following conditions
*  are met:
*
*  1. Redistributions of source code must retain the above copyright
*     notice, this list of conditions and the following disclaimer.
*  2. Redistributions in binary form must reproduce the above copyright
*     notice, this list of conditions and the following disclaimer in the
*     documentation and/or other materials provided with the distribution.
*  3. Neither the name of the University nor the names of its
*     contributors may be used to endorse or promote products derived
*     from this software without specific prior written permission.
*
*  THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED
*  WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
*  MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
*  DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
*  FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
*  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
*  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
*  BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
*  LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
*  NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
*  SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
*/

35
#include <linux/file.h>
36
#include <linux/fs.h>
37
#include <linux/slab.h>
38
#include <linux/namei.h>
39
#include <linux/swap.h>
40
#include <linux/pagemap.h>
41
#include <linux/ratelimit.h>
42
#include <linux/sunrpc/svcauth_gss.h>
43
#include <linux/sunrpc/addr.h>
44
#include <linux/hash.h>
45
#include "xdr4.h"
46
#include "xdr4cb.h"
47
#include "vfs.h"
48
#include "current_stateid.h"
L
Linus Torvalds 已提交
49

50 51
#include "netns.h"

L
Linus Torvalds 已提交
52 53
#define NFSDDBG_FACILITY                NFSDDBG_PROC

54 55 56 57 58 59 60 61
#define all_ones {{~0,~0},~0}
static const stateid_t one_stateid = {
	.si_generation = ~0,
	.si_opaque = all_ones,
};
static const stateid_t zero_stateid = {
	/* all fields zero */
};
62 63 64
static const stateid_t currentstateid = {
	.si_generation = 1,
};
65

A
Andy Adamson 已提交
66
static u64 current_sessionid = 1;
67

68 69
#define ZERO_STATEID(stateid) (!memcmp((stateid), &zero_stateid, sizeof(stateid_t)))
#define ONE_STATEID(stateid)  (!memcmp((stateid), &one_stateid, sizeof(stateid_t)))
70
#define CURRENT_STATEID(stateid) (!memcmp((stateid), &currentstateid, sizeof(stateid_t)))
L
Linus Torvalds 已提交
71 72

/* forward declarations */
73
static bool check_for_locks(struct nfs4_file *fp, struct nfs4_lockowner *lowner);
74
static void nfs4_free_ol_stateid(struct nfs4_stid *stid);
L
Linus Torvalds 已提交
75

76 77 78 79 80 81 82
/* Locking: */

/*
 * Currently used for the del_recall_lru and file hash table.  In an
 * effort to decrease the scope of the client_mutex, this spinlock may
 * eventually cover more:
 */
83
static DEFINE_SPINLOCK(state_lock);
84

85 86 87 88 89 90
/*
 * A waitqueue for all in-progress 4.0 CLOSE operations that are waiting for
 * the refcount on the open stateid to drop.
 */
static DECLARE_WAIT_QUEUE_HEAD(close_wq);

C
Christoph Hellwig 已提交
91 92 93 94 95
static struct kmem_cache *openowner_slab;
static struct kmem_cache *lockowner_slab;
static struct kmem_cache *file_slab;
static struct kmem_cache *stateid_slab;
static struct kmem_cache *deleg_slab;
N
NeilBrown 已提交
96

97
static void free_session(struct nfsd4_session *);
98

99
static bool is_session_dead(struct nfsd4_session *ses)
100
{
101
	return ses->se_flags & NFS4_SESSION_DEAD;
102 103
}

104
static __be32 mark_session_dead_locked(struct nfsd4_session *ses, int ref_held_by_me)
105
{
106
	if (atomic_read(&ses->se_ref) > ref_held_by_me)
107 108 109
		return nfserr_jukebox;
	ses->se_flags |= NFS4_SESSION_DEAD;
	return nfs_ok;
110 111
}

112 113 114 115 116 117 118
static bool is_client_expired(struct nfs4_client *clp)
{
	return clp->cl_time == 0;
}

static __be32 get_client_locked(struct nfs4_client *clp)
{
119 120 121 122
	struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id);

	lockdep_assert_held(&nn->client_lock);

123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160
	if (is_client_expired(clp))
		return nfserr_expired;
	atomic_inc(&clp->cl_refcount);
	return nfs_ok;
}

/* must be called under the client_lock */
static inline void
renew_client_locked(struct nfs4_client *clp)
{
	struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id);

	if (is_client_expired(clp)) {
		WARN_ON(1);
		printk("%s: client (clientid %08x/%08x) already expired\n",
			__func__,
			clp->cl_clientid.cl_boot,
			clp->cl_clientid.cl_id);
		return;
	}

	dprintk("renewing client (clientid %08x/%08x)\n",
			clp->cl_clientid.cl_boot,
			clp->cl_clientid.cl_id);
	list_move_tail(&clp->cl_lru, &nn->client_lru);
	clp->cl_time = get_seconds();
}

static inline void
renew_client(struct nfs4_client *clp)
{
	struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id);

	spin_lock(&nn->client_lock);
	renew_client_locked(clp);
	spin_unlock(&nn->client_lock);
}

161
static void put_client_renew_locked(struct nfs4_client *clp)
162
{
163 164 165 166
	struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id);

	lockdep_assert_held(&nn->client_lock);

167 168 169 170 171 172
	if (!atomic_dec_and_test(&clp->cl_refcount))
		return;
	if (!is_client_expired(clp))
		renew_client_locked(clp);
}

173 174 175 176
static void put_client_renew(struct nfs4_client *clp)
{
	struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id);

177 178 179 180
	if (!atomic_dec_and_lock(&clp->cl_refcount, &nn->client_lock))
		return;
	if (!is_client_expired(clp))
		renew_client_locked(clp);
181 182 183
	spin_unlock(&nn->client_lock);
}

184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199
static __be32 nfsd4_get_session_locked(struct nfsd4_session *ses)
{
	__be32 status;

	if (is_session_dead(ses))
		return nfserr_badsession;
	status = get_client_locked(ses->se_client);
	if (status)
		return status;
	atomic_inc(&ses->se_ref);
	return nfs_ok;
}

static void nfsd4_put_session_locked(struct nfsd4_session *ses)
{
	struct nfs4_client *clp = ses->se_client;
200 201 202
	struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id);

	lockdep_assert_held(&nn->client_lock);
203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218

	if (atomic_dec_and_test(&ses->se_ref) && is_session_dead(ses))
		free_session(ses);
	put_client_renew_locked(clp);
}

static void nfsd4_put_session(struct nfsd4_session *ses)
{
	struct nfs4_client *clp = ses->se_client;
	struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id);

	spin_lock(&nn->client_lock);
	nfsd4_put_session_locked(ses);
	spin_unlock(&nn->client_lock);
}

219
static int
220
same_owner_str(struct nfs4_stateowner *sop, struct xdr_netobj *owner)
221 222
{
	return (sop->so_owner.len == owner->len) &&
223
		0 == memcmp(sop->so_owner.data, owner->data, owner->len);
224 225 226 227
}

static struct nfs4_openowner *
find_openstateowner_str_locked(unsigned int hashval, struct nfsd4_open *open,
228
			struct nfs4_client *clp)
229 230 231
{
	struct nfs4_stateowner *so;

232
	lockdep_assert_held(&clp->cl_lock);
233

234 235
	list_for_each_entry(so, &clp->cl_ownerstr_hashtbl[hashval],
			    so_strhash) {
236 237
		if (!so->so_is_open_owner)
			continue;
238
		if (same_owner_str(so, &open->op_owner)) {
239
			atomic_inc(&so->so_count);
240
			return openowner(so);
241 242 243 244 245 246 247
		}
	}
	return NULL;
}

static struct nfs4_openowner *
find_openstateowner_str(unsigned int hashval, struct nfsd4_open *open,
248
			struct nfs4_client *clp)
249 250 251
{
	struct nfs4_openowner *oo;

252 253 254
	spin_lock(&clp->cl_lock);
	oo = find_openstateowner_str_locked(hashval, open, clp);
	spin_unlock(&clp->cl_lock);
255 256 257
	return oo;
}

L
Linus Torvalds 已提交
258 259 260 261 262 263 264 265 266 267 268 269 270
static inline u32
opaque_hashval(const void *ptr, int nbytes)
{
	unsigned char *cptr = (unsigned char *) ptr;

	u32 x = 0;
	while (nbytes--) {
		x *= 37;
		x += *cptr++;
	}
	return x;
}

271 272 273 274 275
static void nfsd4_free_file(struct nfs4_file *f)
{
	kmem_cache_free(file_slab, f);
}

276 277 278
static inline void
put_nfs4_file(struct nfs4_file *fi)
{
279 280
	might_lock(&state_lock);

281
	if (atomic_dec_and_lock(&fi->fi_ref, &state_lock)) {
282
		hlist_del(&fi->fi_hash);
283
		spin_unlock(&state_lock);
284
		nfsd4_free_file(fi);
285
	}
286 287 288 289 290
}

static inline void
get_nfs4_file(struct nfs4_file *fi)
{
291
	atomic_inc(&fi->fi_ref);
292 293
}

294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366
static struct file *
__nfs4_get_fd(struct nfs4_file *f, int oflag)
{
	if (f->fi_fds[oflag])
		return get_file(f->fi_fds[oflag]);
	return NULL;
}

static struct file *
find_writeable_file_locked(struct nfs4_file *f)
{
	struct file *ret;

	lockdep_assert_held(&f->fi_lock);

	ret = __nfs4_get_fd(f, O_WRONLY);
	if (!ret)
		ret = __nfs4_get_fd(f, O_RDWR);
	return ret;
}

static struct file *
find_writeable_file(struct nfs4_file *f)
{
	struct file *ret;

	spin_lock(&f->fi_lock);
	ret = find_writeable_file_locked(f);
	spin_unlock(&f->fi_lock);

	return ret;
}

static struct file *find_readable_file_locked(struct nfs4_file *f)
{
	struct file *ret;

	lockdep_assert_held(&f->fi_lock);

	ret = __nfs4_get_fd(f, O_RDONLY);
	if (!ret)
		ret = __nfs4_get_fd(f, O_RDWR);
	return ret;
}

static struct file *
find_readable_file(struct nfs4_file *f)
{
	struct file *ret;

	spin_lock(&f->fi_lock);
	ret = find_readable_file_locked(f);
	spin_unlock(&f->fi_lock);

	return ret;
}

static struct file *
find_any_file(struct nfs4_file *f)
{
	struct file *ret;

	spin_lock(&f->fi_lock);
	ret = __nfs4_get_fd(f, O_RDWR);
	if (!ret) {
		ret = __nfs4_get_fd(f, O_WRONLY);
		if (!ret)
			ret = __nfs4_get_fd(f, O_RDONLY);
	}
	spin_unlock(&f->fi_lock);
	return ret;
}

367
static atomic_long_t num_delegations;
368
unsigned long max_delegations;
369 370 371 372 373

/*
 * Open owner state (share locks)
 */

374 375 376 377
/* hash tables for lock and open owners */
#define OWNER_HASH_BITS              8
#define OWNER_HASH_SIZE             (1 << OWNER_HASH_BITS)
#define OWNER_HASH_MASK             (OWNER_HASH_SIZE - 1)
378

379
static unsigned int ownerstr_hashval(struct xdr_netobj *ownername)
380 381 382 383
{
	unsigned int ret;

	ret = opaque_hashval(ownername->data, ownername->len);
384
	return ret & OWNER_HASH_MASK;
385
}
386 387 388 389

/* hash table for nfs4_file */
#define FILE_HASH_BITS                   8
#define FILE_HASH_SIZE                  (1 << FILE_HASH_BITS)
S
Shan Wei 已提交
390

391
static unsigned int nfsd_fh_hashval(struct knfsd_fh *fh)
392
{
393 394 395 396 397 398 399 400 401 402 403 404 405 406
	return jhash2(fh->fh_base.fh_pad, XDR_QUADLEN(fh->fh_size), 0);
}

static unsigned int file_hashval(struct knfsd_fh *fh)
{
	return nfsd_fh_hashval(fh) & (FILE_HASH_SIZE - 1);
}

static bool nfsd_fh_match(struct knfsd_fh *fh1, struct knfsd_fh *fh2)
{
	return fh1->fh_size == fh2->fh_size &&
		!memcmp(fh1->fh_base.fh_pad,
				fh2->fh_base.fh_pad,
				fh1->fh_size);
407 408
}

409
static struct hlist_head file_hashtbl[FILE_HASH_SIZE];
410

411 412
static void
__nfs4_file_get_access(struct nfs4_file *fp, u32 access)
413
{
414 415
	lockdep_assert_held(&fp->fi_lock);

416 417 418 419
	if (access & NFS4_SHARE_ACCESS_WRITE)
		atomic_inc(&fp->fi_access[O_WRONLY]);
	if (access & NFS4_SHARE_ACCESS_READ)
		atomic_inc(&fp->fi_access[O_RDONLY]);
420 421
}

422 423
static __be32
nfs4_file_get_access(struct nfs4_file *fp, u32 access)
424
{
425 426
	lockdep_assert_held(&fp->fi_lock);

427 428 429 430
	/* Does this access mode make sense? */
	if (access & ~NFS4_SHARE_ACCESS_BOTH)
		return nfserr_inval;

431 432 433 434
	/* Does it conflict with a deny mode already set? */
	if ((access & fp->fi_share_deny) != 0)
		return nfserr_share_denied;

435 436
	__nfs4_file_get_access(fp, access);
	return nfs_ok;
437 438
}

439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457
static __be32 nfs4_file_check_deny(struct nfs4_file *fp, u32 deny)
{
	/* Common case is that there is no deny mode. */
	if (deny) {
		/* Does this deny mode make sense? */
		if (deny & ~NFS4_SHARE_DENY_BOTH)
			return nfserr_inval;

		if ((deny & NFS4_SHARE_DENY_READ) &&
		    atomic_read(&fp->fi_access[O_RDONLY]))
			return nfserr_share_denied;

		if ((deny & NFS4_SHARE_DENY_WRITE) &&
		    atomic_read(&fp->fi_access[O_WRONLY]))
			return nfserr_share_denied;
	}
	return nfs_ok;
}

458
static void __nfs4_file_put_access(struct nfs4_file *fp, int oflag)
459
{
460 461 462 463 464 465
	might_lock(&fp->fi_lock);

	if (atomic_dec_and_lock(&fp->fi_access[oflag], &fp->fi_lock)) {
		struct file *f1 = NULL;
		struct file *f2 = NULL;

J
Jeff Layton 已提交
466
		swap(f1, fp->fi_fds[oflag]);
467
		if (atomic_read(&fp->fi_access[1 - oflag]) == 0)
J
Jeff Layton 已提交
468
			swap(f2, fp->fi_fds[O_RDWR]);
469 470 471 472 473
		spin_unlock(&fp->fi_lock);
		if (f1)
			fput(f1);
		if (f2)
			fput(f2);
474 475 476
	}
}

477
static void nfs4_file_put_access(struct nfs4_file *fp, u32 access)
478
{
479 480 481
	WARN_ON_ONCE(access & ~NFS4_SHARE_ACCESS_BOTH);

	if (access & NFS4_SHARE_ACCESS_WRITE)
482
		__nfs4_file_put_access(fp, O_WRONLY);
483 484
	if (access & NFS4_SHARE_ACCESS_READ)
		__nfs4_file_put_access(fp, O_RDONLY);
485 486
}

487 488
static struct nfs4_stid *nfs4_alloc_stid(struct nfs4_client *cl,
					 struct kmem_cache *slab)
489
{
J
J. Bruce Fields 已提交
490
	struct nfs4_stid *stid;
J
J. Bruce Fields 已提交
491
	int new_id;
492

493
	stid = kmem_cache_zalloc(slab, GFP_KERNEL);
J
J. Bruce Fields 已提交
494 495 496
	if (!stid)
		return NULL;

497 498 499 500 501
	idr_preload(GFP_KERNEL);
	spin_lock(&cl->cl_lock);
	new_id = idr_alloc_cyclic(&cl->cl_stateids, stid, 0, 0, GFP_NOWAIT);
	spin_unlock(&cl->cl_lock);
	idr_preload_end();
T
Tejun Heo 已提交
502
	if (new_id < 0)
J
J. Bruce Fields 已提交
503
		goto out_free;
504
	stid->sc_client = cl;
J
J. Bruce Fields 已提交
505 506
	stid->sc_stateid.si_opaque.so_id = new_id;
	stid->sc_stateid.si_opaque.so_clid = cl->cl_clientid;
507
	/* Will be incremented before return to client: */
508
	atomic_set(&stid->sc_count, 1);
509 510

	/*
J
J. Bruce Fields 已提交
511 512 513 514 515 516 517
	 * It shouldn't be a problem to reuse an opaque stateid value.
	 * I don't think it is for 4.1.  But with 4.0 I worry that, for
	 * example, a stray write retransmission could be accepted by
	 * the server when it should have been rejected.  Therefore,
	 * adopt a trick from the sctp code to attempt to maximize the
	 * amount of time until an id is reused, by ensuring they always
	 * "increase" (mod INT_MAX):
518
	 */
J
J. Bruce Fields 已提交
519 520
	return stid;
out_free:
521
	kmem_cache_free(slab, stid);
J
J. Bruce Fields 已提交
522
	return NULL;
523 524
}

525
static struct nfs4_ol_stateid * nfs4_alloc_open_stateid(struct nfs4_client *clp)
526
{
527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542
	struct nfs4_stid *stid;
	struct nfs4_ol_stateid *stp;

	stid = nfs4_alloc_stid(clp, stateid_slab);
	if (!stid)
		return NULL;

	stp = openlockstateid(stid);
	stp->st_stid.sc_free = nfs4_free_ol_stateid;
	return stp;
}

static void nfs4_free_deleg(struct nfs4_stid *stid)
{
	kmem_cache_free(deleg_slab, stid);
	atomic_long_dec(&num_delegations);
543 544
}

545 546 547 548 549 550 551 552 553 554 555 556 557 558
/*
 * When we recall a delegation, we should be careful not to hand it
 * out again straight away.
 * To ensure this we keep a pair of bloom filters ('new' and 'old')
 * in which the filehandles of recalled delegations are "stored".
 * If a filehandle appear in either filter, a delegation is blocked.
 * When a delegation is recalled, the filehandle is stored in the "new"
 * filter.
 * Every 30 seconds we swap the filters and clear the "new" one,
 * unless both are empty of course.
 *
 * Each filter is 256 bits.  We hash the filehandle to 32bit and use the
 * low 3 bytes as hash-table indices.
 *
559
 * 'blocked_delegations_lock', which is always taken in block_delegations(),
560 561 562
 * is used to manage concurrent access.  Testing does not need the lock
 * except when swapping the two filters.
 */
563
static DEFINE_SPINLOCK(blocked_delegations_lock);
564 565 566 567 568 569 570 571 572 573 574 575 576 577 578
static struct bloom_pair {
	int	entries, old_entries;
	time_t	swap_time;
	int	new; /* index into 'set' */
	DECLARE_BITMAP(set[2], 256);
} blocked_delegations;

static int delegation_blocked(struct knfsd_fh *fh)
{
	u32 hash;
	struct bloom_pair *bd = &blocked_delegations;

	if (bd->entries == 0)
		return 0;
	if (seconds_since_boot() - bd->swap_time > 30) {
579
		spin_lock(&blocked_delegations_lock);
580 581 582 583 584 585 586 587
		if (seconds_since_boot() - bd->swap_time > 30) {
			bd->entries -= bd->old_entries;
			bd->old_entries = bd->entries;
			memset(bd->set[bd->new], 0,
			       sizeof(bd->set[0]));
			bd->new = 1-bd->new;
			bd->swap_time = seconds_since_boot();
		}
588
		spin_unlock(&blocked_delegations_lock);
589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610
	}
	hash = arch_fast_hash(&fh->fh_base, fh->fh_size, 0);
	if (test_bit(hash&255, bd->set[0]) &&
	    test_bit((hash>>8)&255, bd->set[0]) &&
	    test_bit((hash>>16)&255, bd->set[0]))
		return 1;

	if (test_bit(hash&255, bd->set[1]) &&
	    test_bit((hash>>8)&255, bd->set[1]) &&
	    test_bit((hash>>16)&255, bd->set[1]))
		return 1;

	return 0;
}

static void block_delegations(struct knfsd_fh *fh)
{
	u32 hash;
	struct bloom_pair *bd = &blocked_delegations;

	hash = arch_fast_hash(&fh->fh_base, fh->fh_size, 0);

611
	spin_lock(&blocked_delegations_lock);
612 613 614 615 616 617
	__set_bit(hash&255, bd->set[bd->new]);
	__set_bit((hash>>8)&255, bd->set[bd->new]);
	__set_bit((hash>>16)&255, bd->set[bd->new]);
	if (bd->entries == 0)
		bd->swap_time = seconds_since_boot();
	bd->entries += 1;
618
	spin_unlock(&blocked_delegations_lock);
619 620
}

L
Linus Torvalds 已提交
621
static struct nfs4_delegation *
622
alloc_init_deleg(struct nfs4_client *clp, struct svc_fh *current_fh)
L
Linus Torvalds 已提交
623 624
{
	struct nfs4_delegation *dp;
625
	long n;
L
Linus Torvalds 已提交
626 627

	dprintk("NFSD alloc_init_deleg\n");
628 629 630
	n = atomic_long_inc_return(&num_delegations);
	if (n < 0 || n > max_delegations)
		goto out_dec;
631
	if (delegation_blocked(&current_fh->fh_handle))
632
		goto out_dec;
633
	dp = delegstateid(nfs4_alloc_stid(clp, deleg_slab));
N
NeilBrown 已提交
634
	if (dp == NULL)
635
		goto out_dec;
636 637

	dp->dl_stid.sc_free = nfs4_free_deleg;
638 639
	/*
	 * delegation seqid's are never incremented.  The 4.1 special
J
J. Bruce Fields 已提交
640 641
	 * meaning of seqid 0 isn't meaningful, really, but let's avoid
	 * 0 anyway just for consistency and use 1:
642 643
	 */
	dp->dl_stid.sc_stateid.si_generation = 1;
644 645
	INIT_LIST_HEAD(&dp->dl_perfile);
	INIT_LIST_HEAD(&dp->dl_perclnt);
L
Linus Torvalds 已提交
646
	INIT_LIST_HEAD(&dp->dl_recall_lru);
647
	dp->dl_type = NFS4_OPEN_DELEGATE_READ;
648
	INIT_WORK(&dp->dl_recall.cb_work, nfsd4_run_cb_recall);
L
Linus Torvalds 已提交
649
	return dp;
650 651 652
out_dec:
	atomic_long_dec(&num_delegations);
	return NULL;
L
Linus Torvalds 已提交
653 654 655
}

void
656
nfs4_put_stid(struct nfs4_stid *s)
L
Linus Torvalds 已提交
657
{
658
	struct nfs4_file *fp = s->sc_file;
659 660
	struct nfs4_client *clp = s->sc_client;

661 662
	might_lock(&clp->cl_lock);

663 664
	if (!atomic_dec_and_lock(&s->sc_count, &clp->cl_lock)) {
		wake_up_all(&close_wq);
665
		return;
666
	}
667
	idr_remove(&clp->cl_stateids, s->sc_stateid.si_opaque.so_id);
668
	spin_unlock(&clp->cl_lock);
669
	s->sc_free(s);
670 671
	if (fp)
		put_nfs4_file(fp);
L
Linus Torvalds 已提交
672 673
}

674
static void nfs4_put_deleg_lease(struct nfs4_file *fp)
L
Linus Torvalds 已提交
675
{
676 677
	lockdep_assert_held(&state_lock);

678 679
	if (!fp->fi_lease)
		return;
680 681 682
	if (atomic_dec_and_test(&fp->fi_delegees)) {
		vfs_setlease(fp->fi_deleg_file, F_UNLCK, &fp->fi_lease);
		fp->fi_lease = NULL;
683
		fput(fp->fi_deleg_file);
684 685
		fp->fi_deleg_file = NULL;
	}
L
Linus Torvalds 已提交
686 687
}

J
J. Bruce Fields 已提交
688 689
static void unhash_stid(struct nfs4_stid *s)
{
J
J. Bruce Fields 已提交
690
	s->sc_type = 0;
J
J. Bruce Fields 已提交
691 692
}

693 694 695
static void
hash_delegation_locked(struct nfs4_delegation *dp, struct nfs4_file *fp)
{
696
	lockdep_assert_held(&state_lock);
697
	lockdep_assert_held(&fp->fi_lock);
698

699
	atomic_inc(&dp->dl_stid.sc_count);
700
	dp->dl_stid.sc_type = NFS4_DELEG_STID;
701 702 703 704
	list_add(&dp->dl_perfile, &fp->fi_delegations);
	list_add(&dp->dl_perclnt, &dp->dl_stid.sc_client->cl_delegations);
}

L
Linus Torvalds 已提交
705
static void
706
unhash_delegation_locked(struct nfs4_delegation *dp)
L
Linus Torvalds 已提交
707
{
708
	struct nfs4_file *fp = dp->dl_stid.sc_file;
709

710 711
	lockdep_assert_held(&state_lock);

712
	dp->dl_stid.sc_type = NFS4_CLOSED_DELEG_STID;
713 714
	/* Ensure that deleg break won't try to requeue it */
	++dp->dl_time;
715
	spin_lock(&fp->fi_lock);
716
	list_del_init(&dp->dl_perclnt);
L
Linus Torvalds 已提交
717
	list_del_init(&dp->dl_recall_lru);
718 719
	list_del_init(&dp->dl_perfile);
	spin_unlock(&fp->fi_lock);
720
	if (fp)
721
		nfs4_put_deleg_lease(fp);
722 723 724 725
}

static void destroy_delegation(struct nfs4_delegation *dp)
{
726 727 728
	spin_lock(&state_lock);
	unhash_delegation_locked(dp);
	spin_unlock(&state_lock);
729
	nfs4_put_stid(&dp->dl_stid);
730 731 732 733 734 735
}

static void revoke_delegation(struct nfs4_delegation *dp)
{
	struct nfs4_client *clp = dp->dl_stid.sc_client;

736 737
	WARN_ON(!list_empty(&dp->dl_recall_lru));

738
	if (clp->cl_minorversion == 0)
739
		nfs4_put_stid(&dp->dl_stid);
740 741
	else {
		dp->dl_stid.sc_type = NFS4_REVOKED_DELEG_STID;
742 743 744
		spin_lock(&clp->cl_lock);
		list_add(&dp->dl_recall_lru, &clp->cl_revoked);
		spin_unlock(&clp->cl_lock);
745 746 747
	}
}

L
Linus Torvalds 已提交
748 749 750 751
/* 
 * SETCLIENTID state 
 */

752 753 754 755 756 757 758 759 760 761
static unsigned int clientid_hashval(u32 id)
{
	return id & CLIENT_HASH_MASK;
}

static unsigned int clientstr_hashval(const char *name)
{
	return opaque_hashval(name, 8) & CLIENT_HASH_MASK;
}

762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779
/*
 * We store the NONE, READ, WRITE, and BOTH bits separately in the
 * st_{access,deny}_bmap field of the stateid, in order to track not
 * only what share bits are currently in force, but also what
 * combinations of share bits previous opens have used.  This allows us
 * to enforce the recommendation of rfc 3530 14.2.19 that the server
 * return an error if the client attempt to downgrade to a combination
 * of share bits not explicable by closing some of its previous opens.
 *
 * XXX: This enforcement is actually incomplete, since we don't keep
 * track of access/deny bit combinations; so, e.g., we allow:
 *
 *	OPEN allow read, deny write
 *	OPEN allow both, deny none
 *	DOWNGRADE allow read, deny none
 *
 * which we should reject.
 */
780 781
static unsigned int
bmap_to_share_mode(unsigned long bmap) {
782
	int i;
783
	unsigned int access = 0;
784 785 786

	for (i = 1; i < 4; i++) {
		if (test_bit(i, &bmap))
787
			access |= i;
788
	}
789
	return access;
790 791
}

792 793 794 795
/* set share access for a given stateid */
static inline void
set_access(u32 access, struct nfs4_ol_stateid *stp)
{
796 797 798 799
	unsigned char mask = 1 << access;

	WARN_ON_ONCE(access > NFS4_SHARE_ACCESS_BOTH);
	stp->st_access_bmap |= mask;
800 801 802 803 804 805
}

/* clear share access for a given stateid */
static inline void
clear_access(u32 access, struct nfs4_ol_stateid *stp)
{
806 807 808 809
	unsigned char mask = 1 << access;

	WARN_ON_ONCE(access > NFS4_SHARE_ACCESS_BOTH);
	stp->st_access_bmap &= ~mask;
810 811 812 813 814 815
}

/* test whether a given stateid has access */
static inline bool
test_access(u32 access, struct nfs4_ol_stateid *stp)
{
816 817 818
	unsigned char mask = 1 << access;

	return (bool)(stp->st_access_bmap & mask);
819 820
}

821 822
/* set share deny for a given stateid */
static inline void
823
set_deny(u32 deny, struct nfs4_ol_stateid *stp)
824
{
825 826 827 828
	unsigned char mask = 1 << deny;

	WARN_ON_ONCE(deny > NFS4_SHARE_DENY_BOTH);
	stp->st_deny_bmap |= mask;
829 830 831 832
}

/* clear share deny for a given stateid */
static inline void
833
clear_deny(u32 deny, struct nfs4_ol_stateid *stp)
834
{
835 836 837 838
	unsigned char mask = 1 << deny;

	WARN_ON_ONCE(deny > NFS4_SHARE_DENY_BOTH);
	stp->st_deny_bmap &= ~mask;
839 840 841 842
}

/* test whether a given stateid is denying specific access */
static inline bool
843
test_deny(u32 deny, struct nfs4_ol_stateid *stp)
844
{
845 846 847
	unsigned char mask = 1 << deny;

	return (bool)(stp->st_deny_bmap & mask);
848 849 850 851
}

static int nfs4_access_to_omode(u32 access)
{
852
	switch (access & NFS4_SHARE_ACCESS_BOTH) {
853 854 855 856 857 858 859
	case NFS4_SHARE_ACCESS_READ:
		return O_RDONLY;
	case NFS4_SHARE_ACCESS_WRITE:
		return O_WRONLY;
	case NFS4_SHARE_ACCESS_BOTH:
		return O_RDWR;
	}
860 861
	WARN_ON_ONCE(1);
	return O_RDONLY;
862 863
}

864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894
/*
 * A stateid that had a deny mode associated with it is being released
 * or downgraded. Recalculate the deny mode on the file.
 */
static void
recalculate_deny_mode(struct nfs4_file *fp)
{
	struct nfs4_ol_stateid *stp;

	spin_lock(&fp->fi_lock);
	fp->fi_share_deny = 0;
	list_for_each_entry(stp, &fp->fi_stateids, st_perfile)
		fp->fi_share_deny |= bmap_to_share_mode(stp->st_deny_bmap);
	spin_unlock(&fp->fi_lock);
}

static void
reset_union_bmap_deny(u32 deny, struct nfs4_ol_stateid *stp)
{
	int i;
	bool change = false;

	for (i = 1; i < 4; i++) {
		if ((i & deny) != i) {
			change = true;
			clear_deny(i, stp);
		}
	}

	/* Recalculate per-file deny mode if there was a change */
	if (change)
895
		recalculate_deny_mode(stp->st_stid.sc_file);
896 897
}

898 899 900 901 902
/* release all access and file references for a given stateid */
static void
release_all_access(struct nfs4_ol_stateid *stp)
{
	int i;
903
	struct nfs4_file *fp = stp->st_stid.sc_file;
904 905 906

	if (fp && stp->st_deny_bmap != 0)
		recalculate_deny_mode(fp);
907 908 909

	for (i = 1; i < 4; i++) {
		if (test_access(i, stp))
910
			nfs4_file_put_access(stp->st_stid.sc_file, i);
911 912 913 914
		clear_access(i, stp);
	}
}

915 916
static void nfs4_put_stateowner(struct nfs4_stateowner *sop)
{
917 918 919 920 921
	struct nfs4_client *clp = sop->so_client;

	might_lock(&clp->cl_lock);

	if (!atomic_dec_and_lock(&sop->so_count, &clp->cl_lock))
922
		return;
923
	sop->so_ops->so_unhash(sop);
924
	spin_unlock(&clp->cl_lock);
925 926 927 928
	kfree(sop->so_owner.data);
	sop->so_ops->so_free(sop);
}

929
static void unhash_ol_stateid(struct nfs4_ol_stateid *stp)
930
{
931
	struct nfs4_file *fp = stp->st_stid.sc_file;
932

933 934
	lockdep_assert_held(&stp->st_stateowner->so_client->cl_lock);

935
	spin_lock(&fp->fi_lock);
936
	list_del(&stp->st_perfile);
937
	spin_unlock(&fp->fi_lock);
938 939 940
	list_del(&stp->st_perstateowner);
}

941
static void nfs4_free_ol_stateid(struct nfs4_stid *stid)
942
{
943
	struct nfs4_ol_stateid *stp = openlockstateid(stid);
944

945
	release_all_access(stp);
946 947
	if (stp->st_stateowner)
		nfs4_put_stateowner(stp->st_stateowner);
948
	kmem_cache_free(stateid_slab, stid);
949 950
}

951
static void nfs4_free_lock_stateid(struct nfs4_stid *stid)
952
{
953 954
	struct nfs4_ol_stateid *stp = openlockstateid(stid);
	struct nfs4_lockowner *lo = lockowner(stp->st_stateowner);
955 956
	struct file *file;

957 958 959 960 961 962
	file = find_any_file(stp->st_stid.sc_file);
	if (file)
		filp_close(file, (fl_owner_t)lo);
	nfs4_free_ol_stateid(stid);
}

963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986
/*
 * Put the persistent reference to an already unhashed generic stateid, while
 * holding the cl_lock. If it's the last reference, then put it onto the
 * reaplist for later destruction.
 */
static void put_ol_stateid_locked(struct nfs4_ol_stateid *stp,
				       struct list_head *reaplist)
{
	struct nfs4_stid *s = &stp->st_stid;
	struct nfs4_client *clp = s->sc_client;

	lockdep_assert_held(&clp->cl_lock);

	WARN_ON_ONCE(!list_empty(&stp->st_locks));

	if (!atomic_dec_and_test(&s->sc_count)) {
		wake_up_all(&close_wq);
		return;
	}

	idr_remove(&clp->cl_stateids, s->sc_stateid.si_opaque.so_id);
	list_add(&stp->st_locks, reaplist);
}

987
static void unhash_lock_stateid(struct nfs4_ol_stateid *stp)
988
{
989 990
	struct nfs4_openowner *oo = openowner(stp->st_openstp->st_stateowner);

991 992 993
	lockdep_assert_held(&oo->oo_owner.so_client->cl_lock);

	list_del_init(&stp->st_locks);
994
	unhash_ol_stateid(stp);
J
J. Bruce Fields 已提交
995
	unhash_stid(&stp->st_stid);
996 997 998 999 1000 1001 1002 1003
}

static void release_lock_stateid(struct nfs4_ol_stateid *stp)
{
	struct nfs4_openowner *oo = openowner(stp->st_openstp->st_stateowner);

	spin_lock(&oo->oo_owner.so_client->cl_lock);
	unhash_lock_stateid(stp);
1004
	spin_unlock(&oo->oo_owner.so_client->cl_lock);
1005
	nfs4_put_stid(&stp->st_stid);
1006 1007
}

1008
static void unhash_lockowner_locked(struct nfs4_lockowner *lo)
1009
{
1010
	struct nfs4_client *clp = lo->lo_owner.so_client;
1011

1012
	lockdep_assert_held(&clp->cl_lock);
1013

1014 1015 1016
	list_del_init(&lo->lo_owner.so_strhash);
}

1017 1018 1019 1020 1021 1022 1023 1024
/*
 * Free a list of generic stateids that were collected earlier after being
 * fully unhashed.
 */
static void
free_ol_stateid_reaplist(struct list_head *reaplist)
{
	struct nfs4_ol_stateid *stp;
1025
	struct nfs4_file *fp;
1026 1027 1028 1029 1030 1031 1032

	might_sleep();

	while (!list_empty(reaplist)) {
		stp = list_first_entry(reaplist, struct nfs4_ol_stateid,
				       st_locks);
		list_del(&stp->st_locks);
1033
		fp = stp->st_stid.sc_file;
1034
		stp->st_stid.sc_free(&stp->st_stid);
1035 1036
		if (fp)
			put_nfs4_file(fp);
1037 1038 1039
	}
}

1040
static void release_lockowner(struct nfs4_lockowner *lo)
1041
{
1042
	struct nfs4_client *clp = lo->lo_owner.so_client;
1043
	struct nfs4_ol_stateid *stp;
1044
	struct list_head reaplist;
1045

1046
	INIT_LIST_HEAD(&reaplist);
1047

1048 1049
	spin_lock(&clp->cl_lock);
	unhash_lockowner_locked(lo);
1050 1051
	while (!list_empty(&lo->lo_owner.so_stateids)) {
		stp = list_first_entry(&lo->lo_owner.so_stateids,
1052
				struct nfs4_ol_stateid, st_perstateowner);
1053
		unhash_lock_stateid(stp);
1054
		put_ol_stateid_locked(stp, &reaplist);
1055
	}
1056
	spin_unlock(&clp->cl_lock);
1057
	free_ol_stateid_reaplist(&reaplist);
1058
	nfs4_put_stateowner(&lo->lo_owner);
1059 1060
}

1061 1062
static void release_open_stateid_locks(struct nfs4_ol_stateid *open_stp,
				       struct list_head *reaplist)
1063 1064 1065 1066 1067 1068
{
	struct nfs4_ol_stateid *stp;

	while (!list_empty(&open_stp->st_locks)) {
		stp = list_entry(open_stp->st_locks.next,
				struct nfs4_ol_stateid, st_locks);
1069 1070
		unhash_lock_stateid(stp);
		put_ol_stateid_locked(stp, reaplist);
1071 1072 1073
	}
}

1074 1075
static void unhash_open_stateid(struct nfs4_ol_stateid *stp,
				struct list_head *reaplist)
1076
{
1077 1078
	lockdep_assert_held(&stp->st_stid.sc_client->cl_lock);

1079
	unhash_ol_stateid(stp);
1080
	release_open_stateid_locks(stp, reaplist);
1081 1082 1083 1084
}

static void release_open_stateid(struct nfs4_ol_stateid *stp)
{
1085 1086 1087
	LIST_HEAD(reaplist);

	spin_lock(&stp->st_stid.sc_client->cl_lock);
1088
	unhash_open_stateid(stp, &reaplist);
1089 1090 1091
	put_ol_stateid_locked(stp, &reaplist);
	spin_unlock(&stp->st_stid.sc_client->cl_lock);
	free_ol_stateid_reaplist(&reaplist);
1092 1093
}

1094
static void unhash_openowner_locked(struct nfs4_openowner *oo)
1095
{
1096
	struct nfs4_client *clp = oo->oo_owner.so_client;
1097

1098
	lockdep_assert_held(&clp->cl_lock);
1099

1100 1101
	list_del_init(&oo->oo_owner.so_strhash);
	list_del_init(&oo->oo_perclient);
1102 1103
}

1104 1105
static void release_last_closed_stateid(struct nfs4_openowner *oo)
{
1106 1107 1108
	struct nfsd_net *nn = net_generic(oo->oo_owner.so_client->net,
					  nfsd_net_id);
	struct nfs4_ol_stateid *s;
1109

1110 1111
	spin_lock(&nn->client_lock);
	s = oo->oo_last_closed_stid;
1112
	if (s) {
1113
		list_del_init(&oo->oo_close_lru);
1114 1115
		oo->oo_last_closed_stid = NULL;
	}
1116 1117 1118
	spin_unlock(&nn->client_lock);
	if (s)
		nfs4_put_stid(&s->st_stid);
1119 1120
}

1121
static void release_openowner(struct nfs4_openowner *oo)
1122 1123
{
	struct nfs4_ol_stateid *stp;
1124
	struct nfs4_client *clp = oo->oo_owner.so_client;
1125
	struct list_head reaplist;
1126

1127
	INIT_LIST_HEAD(&reaplist);
1128

1129 1130
	spin_lock(&clp->cl_lock);
	unhash_openowner_locked(oo);
1131 1132 1133
	while (!list_empty(&oo->oo_owner.so_stateids)) {
		stp = list_first_entry(&oo->oo_owner.so_stateids,
				struct nfs4_ol_stateid, st_perstateowner);
1134
		unhash_open_stateid(stp, &reaplist);
1135
		put_ol_stateid_locked(stp, &reaplist);
1136
	}
1137
	spin_unlock(&clp->cl_lock);
1138
	free_ol_stateid_reaplist(&reaplist);
1139
	release_last_closed_stateid(oo);
1140
	nfs4_put_stateowner(&oo->oo_owner);
1141 1142
}

M
Marc Eshel 已提交
1143 1144 1145 1146 1147 1148 1149 1150
static inline int
hash_sessionid(struct nfs4_sessionid *sessionid)
{
	struct nfsd4_sessionid *sid = (struct nfsd4_sessionid *)sessionid;

	return sid->sequence % SESSION_HASH_SIZE;
}

1151
#ifdef NFSD_DEBUG
M
Marc Eshel 已提交
1152 1153 1154 1155 1156 1157
static inline void
dump_sessionid(const char *fn, struct nfs4_sessionid *sessionid)
{
	u32 *ptr = (u32 *)(&sessionid->data[0]);
	dprintk("%s: %u:%u:%u:%u\n", fn, ptr[0], ptr[1], ptr[2], ptr[3]);
}
1158 1159 1160 1161 1162 1163 1164
#else
static inline void
dump_sessionid(const char *fn, struct nfs4_sessionid *sessionid)
{
}
#endif

1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176
/*
 * Bump the seqid on cstate->replay_owner, and clear replay_owner if it
 * won't be used for replay.
 */
void nfsd4_bump_seqid(struct nfsd4_compound_state *cstate, __be32 nfserr)
{
	struct nfs4_stateowner *so = cstate->replay_owner;

	if (nfserr == nfserr_replay_me)
		return;

	if (!seqid_mutating_err(ntohl(nfserr))) {
1177
		nfsd4_cstate_clear_replay(cstate);
1178 1179 1180 1181 1182 1183 1184 1185 1186
		return;
	}
	if (!so)
		return;
	if (so->so_is_open_owner)
		release_last_closed_stateid(openowner(so));
	so->so_seqid++;
	return;
}
M
Marc Eshel 已提交
1187

A
Andy Adamson 已提交
1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200
static void
gen_sessionid(struct nfsd4_session *ses)
{
	struct nfs4_client *clp = ses->se_client;
	struct nfsd4_sessionid *sid;

	sid = (struct nfsd4_sessionid *)ses->se_sessionid.data;
	sid->clientid = clp->cl_clientid;
	sid->sequence = current_sessionid++;
	sid->reserved = 0;
}

/*
1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213
 * The protocol defines ca_maxresponssize_cached to include the size of
 * the rpc header, but all we need to cache is the data starting after
 * the end of the initial SEQUENCE operation--the rest we regenerate
 * each time.  Therefore we can advertise a ca_maxresponssize_cached
 * value that is the number of bytes in our cache plus a few additional
 * bytes.  In order to stay on the safe side, and not promise more than
 * we can cache, those additional bytes must be the minimum possible: 24
 * bytes of rpc header (xid through accept state, with AUTH_NULL
 * verifier), 12 for the compound header (with zero-length tag), and 44
 * for the SEQUENCE op response:
 */
#define NFSD_MIN_HDR_SEQ_SZ  (24 + 12 + 44)

1214 1215 1216 1217 1218 1219 1220 1221 1222
static void
free_session_slots(struct nfsd4_session *ses)
{
	int i;

	for (i = 0; i < ses->se_fchannel.maxreqs; i++)
		kfree(ses->se_slots[i]);
}

1223
/*
1224 1225 1226
 * We don't actually need to cache the rpc and session headers, so we
 * can allocate a little less for each slot:
 */
1227
static inline u32 slot_bytes(struct nfsd4_channel_attrs *ca)
1228
{
1229
	u32 size;
1230

1231 1232 1233 1234 1235
	if (ca->maxresp_cached < NFSD_MIN_HDR_SEQ_SZ)
		size = 0;
	else
		size = ca->maxresp_cached - NFSD_MIN_HDR_SEQ_SZ;
	return size + sizeof(struct nfsd4_slot);
1236
}
A
Andy Adamson 已提交
1237

1238 1239
/*
 * XXX: If we run out of reserved DRC memory we could (up to a point)
1240
 * re-negotiate active sessions and reduce their slot usage to make
1241
 * room for new connections. For now we just fail the create session.
A
Andy Adamson 已提交
1242
 */
1243
static u32 nfsd4_get_drc_mem(struct nfsd4_channel_attrs *ca)
A
Andy Adamson 已提交
1244
{
1245 1246
	u32 slotsize = slot_bytes(ca);
	u32 num = ca->maxreqs;
1247
	int avail;
A
Andy Adamson 已提交
1248

1249
	spin_lock(&nfsd_drc_lock);
1250 1251
	avail = min((unsigned long)NFSD_MAX_MEM_PER_SESSION,
		    nfsd_drc_max_mem - nfsd_drc_mem_used);
1252 1253 1254
	num = min_t(int, num, avail / slotsize);
	nfsd_drc_mem_used += num * slotsize;
	spin_unlock(&nfsd_drc_lock);
A
Andy Adamson 已提交
1255

1256 1257
	return num;
}
A
Andy Adamson 已提交
1258

1259
static void nfsd4_put_drc_mem(struct nfsd4_channel_attrs *ca)
1260
{
1261 1262
	int slotsize = slot_bytes(ca);

1263
	spin_lock(&nfsd_drc_lock);
1264
	nfsd_drc_mem_used -= slotsize * ca->maxreqs;
1265
	spin_unlock(&nfsd_drc_lock);
1266
}
A
Andy Adamson 已提交
1267

1268 1269
static struct nfsd4_session *alloc_session(struct nfsd4_channel_attrs *fattrs,
					   struct nfsd4_channel_attrs *battrs)
1270
{
1271 1272
	int numslots = fattrs->maxreqs;
	int slotsize = slot_bytes(fattrs);
1273 1274
	struct nfsd4_session *new;
	int mem, i;
1275

1276 1277 1278
	BUILD_BUG_ON(NFSD_MAX_SLOTS_PER_SESSION * sizeof(struct nfsd4_slot *)
			+ sizeof(struct nfsd4_session) > PAGE_SIZE);
	mem = numslots * sizeof(struct nfsd4_slot *);
A
Andy Adamson 已提交
1279

1280 1281 1282
	new = kzalloc(sizeof(*new) + mem, GFP_KERNEL);
	if (!new)
		return NULL;
1283
	/* allocate each struct nfsd4_slot and data cache in one piece */
1284
	for (i = 0; i < numslots; i++) {
1285
		new->se_slots[i] = kzalloc(slotsize, GFP_KERNEL);
1286
		if (!new->se_slots[i])
1287 1288
			goto out_free;
	}
1289 1290 1291 1292

	memcpy(&new->se_fchannel, fattrs, sizeof(struct nfsd4_channel_attrs));
	memcpy(&new->se_bchannel, battrs, sizeof(struct nfsd4_channel_attrs));

1293 1294 1295 1296 1297 1298
	return new;
out_free:
	while (i--)
		kfree(new->se_slots[i]);
	kfree(new);
	return NULL;
A
Andy Adamson 已提交
1299 1300
}

1301 1302 1303 1304 1305
static void free_conn(struct nfsd4_conn *c)
{
	svc_xprt_put(c->cn_xprt);
	kfree(c);
}
A
Andy Adamson 已提交
1306

1307 1308 1309 1310
static void nfsd4_conn_lost(struct svc_xpt_user *u)
{
	struct nfsd4_conn *c = container_of(u, struct nfsd4_conn, cn_xpt_user);
	struct nfs4_client *clp = c->cn_session->se_client;
A
Andy Adamson 已提交
1311

1312 1313 1314 1315 1316
	spin_lock(&clp->cl_lock);
	if (!list_empty(&c->cn_persession)) {
		list_del(&c->cn_persession);
		free_conn(c);
	}
1317
	nfsd4_probe_callback(clp);
1318
	spin_unlock(&clp->cl_lock);
1319
}
A
Andy Adamson 已提交
1320

1321
static struct nfsd4_conn *alloc_conn(struct svc_rqst *rqstp, u32 flags)
1322 1323
{
	struct nfsd4_conn *conn;
A
Andy Adamson 已提交
1324

1325 1326
	conn = kmalloc(sizeof(struct nfsd4_conn), GFP_KERNEL);
	if (!conn)
1327
		return NULL;
1328 1329
	svc_xprt_get(rqstp->rq_xprt);
	conn->cn_xprt = rqstp->rq_xprt;
1330
	conn->cn_flags = flags;
1331 1332 1333
	INIT_LIST_HEAD(&conn->cn_xpt_user.list);
	return conn;
}
1334

1335 1336 1337 1338
static void __nfsd4_hash_conn(struct nfsd4_conn *conn, struct nfsd4_session *ses)
{
	conn->cn_session = ses;
	list_add(&conn->cn_persession, &ses->se_conns);
A
Andy Adamson 已提交
1339 1340
}

1341
static void nfsd4_hash_conn(struct nfsd4_conn *conn, struct nfsd4_session *ses)
1342
{
1343
	struct nfs4_client *clp = ses->se_client;
1344

1345
	spin_lock(&clp->cl_lock);
1346
	__nfsd4_hash_conn(conn, ses);
1347
	spin_unlock(&clp->cl_lock);
1348 1349
}

1350
static int nfsd4_register_conn(struct nfsd4_conn *conn)
1351
{
1352
	conn->cn_xpt_user.callback = nfsd4_conn_lost;
1353
	return register_xpt_user(conn->cn_xprt, &conn->cn_xpt_user);
1354 1355
}

1356
static void nfsd4_init_conn(struct svc_rqst *rqstp, struct nfsd4_conn *conn, struct nfsd4_session *ses)
A
Andy Adamson 已提交
1357
{
1358
	int ret;
A
Andy Adamson 已提交
1359

1360
	nfsd4_hash_conn(conn, ses);
1361 1362 1363 1364
	ret = nfsd4_register_conn(conn);
	if (ret)
		/* oops; xprt is already down: */
		nfsd4_conn_lost(&conn->cn_xpt_user);
1365 1366
	/* We may have gained or lost a callback channel: */
	nfsd4_probe_callback_sync(ses->se_client);
1367
}
A
Andy Adamson 已提交
1368

1369
static struct nfsd4_conn *alloc_conn_from_crses(struct svc_rqst *rqstp, struct nfsd4_create_session *cses)
1370 1371 1372
{
	u32 dir = NFS4_CDFC4_FORE;

1373
	if (cses->flags & SESSION4_BACK_CHAN)
1374
		dir |= NFS4_CDFC4_BACK;
1375
	return alloc_conn(rqstp, dir);
1376 1377 1378
}

/* must be called under client_lock */
1379
static void nfsd4_del_conns(struct nfsd4_session *s)
1380
{
1381 1382
	struct nfs4_client *clp = s->se_client;
	struct nfsd4_conn *c;
A
Andy Adamson 已提交
1383

1384 1385 1386 1387 1388
	spin_lock(&clp->cl_lock);
	while (!list_empty(&s->se_conns)) {
		c = list_first_entry(&s->se_conns, struct nfsd4_conn, cn_persession);
		list_del_init(&c->cn_persession);
		spin_unlock(&clp->cl_lock);
1389

1390 1391
		unregister_xpt_user(c->cn_xprt, &c->cn_xpt_user);
		free_conn(c);
A
Andy Adamson 已提交
1392

1393 1394 1395
		spin_lock(&clp->cl_lock);
	}
	spin_unlock(&clp->cl_lock);
1396
}
A
Andy Adamson 已提交
1397

1398 1399 1400 1401 1402 1403
static void __free_session(struct nfsd4_session *ses)
{
	free_session_slots(ses);
	kfree(ses);
}

1404
static void free_session(struct nfsd4_session *ses)
1405
{
1406
	nfsd4_del_conns(ses);
1407
	nfsd4_put_drc_mem(&ses->se_fchannel);
1408
	__free_session(ses);
1409 1410
}

1411
static void init_session(struct svc_rqst *rqstp, struct nfsd4_session *new, struct nfs4_client *clp, struct nfsd4_create_session *cses)
1412 1413
{
	int idx;
1414
	struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id);
1415

A
Andy Adamson 已提交
1416 1417 1418
	new->se_client = clp;
	gen_sessionid(new);

1419 1420
	INIT_LIST_HEAD(&new->se_conns);

1421
	new->se_cb_seq_nr = 1;
A
Andy Adamson 已提交
1422
	new->se_flags = cses->flags;
1423
	new->se_cb_prog = cses->callback_prog;
1424
	new->se_cb_sec = cses->cb_sec;
1425
	atomic_set(&new->se_ref, 0);
1426
	idx = hash_sessionid(&new->se_sessionid);
1427
	list_add(&new->se_hash, &nn->sessionid_hashtbl[idx]);
1428
	spin_lock(&clp->cl_lock);
A
Andy Adamson 已提交
1429
	list_add(&new->se_perclnt, &clp->cl_sessions);
1430
	spin_unlock(&clp->cl_lock);
1431

1432
	if (cses->flags & SESSION4_BACK_CHAN) {
1433
		struct sockaddr *sa = svc_addr(rqstp);
1434 1435 1436 1437 1438 1439 1440
		/*
		 * This is a little silly; with sessions there's no real
		 * use for the callback address.  Use the peer address
		 * as a reasonable default for now, but consider fixing
		 * the rpc client not to require an address in the
		 * future:
		 */
1441 1442 1443
		rpc_copy_addr((struct sockaddr *)&clp->cl_cb_conn.cb_addr, sa);
		clp->cl_cb_conn.cb_addrlen = svc_addr_len(sa);
	}
A
Andy Adamson 已提交
1444 1445
}

1446
/* caller must hold client_lock */
M
Marc Eshel 已提交
1447
static struct nfsd4_session *
1448
__find_in_sessionid_hashtbl(struct nfs4_sessionid *sessionid, struct net *net)
M
Marc Eshel 已提交
1449 1450 1451
{
	struct nfsd4_session *elem;
	int idx;
1452
	struct nfsd_net *nn = net_generic(net, nfsd_net_id);
M
Marc Eshel 已提交
1453

1454 1455
	lockdep_assert_held(&nn->client_lock);

M
Marc Eshel 已提交
1456 1457 1458
	dump_sessionid(__func__, sessionid);
	idx = hash_sessionid(sessionid);
	/* Search in the appropriate list */
1459
	list_for_each_entry(elem, &nn->sessionid_hashtbl[idx], se_hash) {
M
Marc Eshel 已提交
1460 1461 1462 1463 1464 1465 1466 1467 1468 1469
		if (!memcmp(elem->se_sessionid.data, sessionid->data,
			    NFS4_MAX_SESSIONID_LEN)) {
			return elem;
		}
	}

	dprintk("%s: session not found\n", __func__);
	return NULL;
}

1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487
static struct nfsd4_session *
find_in_sessionid_hashtbl(struct nfs4_sessionid *sessionid, struct net *net,
		__be32 *ret)
{
	struct nfsd4_session *session;
	__be32 status = nfserr_badsession;

	session = __find_in_sessionid_hashtbl(sessionid, net);
	if (!session)
		goto out;
	status = nfsd4_get_session_locked(session);
	if (status)
		session = NULL;
out:
	*ret = status;
	return session;
}

1488
/* caller must hold client_lock */
A
Andy Adamson 已提交
1489
static void
M
Marc Eshel 已提交
1490
unhash_session(struct nfsd4_session *ses)
A
Andy Adamson 已提交
1491
{
1492 1493 1494 1495 1496
	struct nfs4_client *clp = ses->se_client;
	struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id);

	lockdep_assert_held(&nn->client_lock);

A
Andy Adamson 已提交
1497
	list_del(&ses->se_hash);
1498
	spin_lock(&ses->se_client->cl_lock);
A
Andy Adamson 已提交
1499
	list_del(&ses->se_perclnt);
1500
	spin_unlock(&ses->se_client->cl_lock);
M
Marc Eshel 已提交
1501 1502
}

L
Linus Torvalds 已提交
1503 1504
/* SETCLIENTID and SETCLIENTID_CONFIRM Helper functions */
static int
1505
STALE_CLIENTID(clientid_t *clid, struct nfsd_net *nn)
L
Linus Torvalds 已提交
1506
{
1507
	if (clid->cl_boot == nn->boot_time)
L
Linus Torvalds 已提交
1508
		return 0;
A
Andy Adamson 已提交
1509
	dprintk("NFSD stale clientid (%08x/%08x) boot_time %08lx\n",
1510
		clid->cl_boot, clid->cl_id, nn->boot_time);
L
Linus Torvalds 已提交
1511 1512 1513 1514 1515 1516 1517 1518
	return 1;
}

/* 
 * XXX Should we use a slab cache ?
 * This type of memory management is somewhat inefficient, but we use it
 * anyway since SETCLIENTID is not a common operation.
 */
1519
static struct nfs4_client *alloc_client(struct xdr_netobj name)
L
Linus Torvalds 已提交
1520 1521
{
	struct nfs4_client *clp;
1522
	int i;
L
Linus Torvalds 已提交
1523

1524 1525 1526
	clp = kzalloc(sizeof(struct nfs4_client), GFP_KERNEL);
	if (clp == NULL)
		return NULL;
1527
	clp->cl_name.data = kmemdup(name.data, name.len, GFP_KERNEL);
1528 1529 1530 1531 1532 1533 1534 1535
	if (clp->cl_name.data == NULL)
		goto err_no_name;
	clp->cl_ownerstr_hashtbl = kmalloc(sizeof(struct list_head) *
			OWNER_HASH_SIZE, GFP_KERNEL);
	if (!clp->cl_ownerstr_hashtbl)
		goto err_no_hashtbl;
	for (i = 0; i < OWNER_HASH_SIZE; i++)
		INIT_LIST_HEAD(&clp->cl_ownerstr_hashtbl[i]);
1536
	clp->cl_name.len = name.len;
1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548
	INIT_LIST_HEAD(&clp->cl_sessions);
	idr_init(&clp->cl_stateids);
	atomic_set(&clp->cl_refcount, 0);
	clp->cl_cb_state = NFSD4_CB_UNKNOWN;
	INIT_LIST_HEAD(&clp->cl_idhash);
	INIT_LIST_HEAD(&clp->cl_openowners);
	INIT_LIST_HEAD(&clp->cl_delegations);
	INIT_LIST_HEAD(&clp->cl_lru);
	INIT_LIST_HEAD(&clp->cl_callbacks);
	INIT_LIST_HEAD(&clp->cl_revoked);
	spin_lock_init(&clp->cl_lock);
	rpc_init_wait_queue(&clp->cl_cb_waitq, "Backchannel slot table");
L
Linus Torvalds 已提交
1549
	return clp;
1550 1551 1552 1553 1554
err_no_hashtbl:
	kfree(clp->cl_name.data);
err_no_name:
	kfree(clp);
	return NULL;
L
Linus Torvalds 已提交
1555 1556
}

1557
static void
L
Linus Torvalds 已提交
1558 1559
free_client(struct nfs4_client *clp)
{
1560 1561 1562 1563 1564
	while (!list_empty(&clp->cl_sessions)) {
		struct nfsd4_session *ses;
		ses = list_entry(clp->cl_sessions.next, struct nfsd4_session,
				se_perclnt);
		list_del(&ses->se_perclnt);
1565 1566
		WARN_ON_ONCE(atomic_read(&ses->se_ref));
		free_session(ses);
1567
	}
1568
	rpc_destroy_wait_queue(&clp->cl_cb_waitq);
1569
	free_svc_cred(&clp->cl_cred);
1570
	kfree(clp->cl_ownerstr_hashtbl);
L
Linus Torvalds 已提交
1571
	kfree(clp->cl_name.data);
M
majianpeng 已提交
1572
	idr_destroy(&clp->cl_stateids);
L
Linus Torvalds 已提交
1573 1574 1575
	kfree(clp);
}

B
Benny Halevy 已提交
1576
/* must be called under the client_lock */
1577
static void
B
Benny Halevy 已提交
1578 1579
unhash_client_locked(struct nfs4_client *clp)
{
1580
	struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id);
1581 1582
	struct nfsd4_session *ses;

1583 1584
	lockdep_assert_held(&nn->client_lock);

1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595
	/* Mark the client as expired! */
	clp->cl_time = 0;
	/* Make it invisible */
	if (!list_empty(&clp->cl_idhash)) {
		list_del_init(&clp->cl_idhash);
		if (test_bit(NFSD4_CLIENT_CONFIRMED, &clp->cl_flags))
			rb_erase(&clp->cl_namenode, &nn->conf_name_tree);
		else
			rb_erase(&clp->cl_namenode, &nn->unconf_name_tree);
	}
	list_del_init(&clp->cl_lru);
1596
	spin_lock(&clp->cl_lock);
1597 1598
	list_for_each_entry(ses, &clp->cl_sessions, se_perclnt)
		list_del_init(&ses->se_hash);
1599
	spin_unlock(&clp->cl_lock);
B
Benny Halevy 已提交
1600 1601
}

L
Linus Torvalds 已提交
1602
static void
1603 1604 1605 1606 1607 1608 1609 1610 1611
unhash_client(struct nfs4_client *clp)
{
	struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id);

	spin_lock(&nn->client_lock);
	unhash_client_locked(clp);
	spin_unlock(&nn->client_lock);
}

1612 1613 1614 1615 1616 1617 1618 1619
static __be32 mark_client_expired_locked(struct nfs4_client *clp)
{
	if (atomic_read(&clp->cl_refcount))
		return nfserr_jukebox;
	unhash_client_locked(clp);
	return nfs_ok;
}

1620 1621
static void
__destroy_client(struct nfs4_client *clp)
L
Linus Torvalds 已提交
1622
{
1623
	struct nfs4_openowner *oo;
L
Linus Torvalds 已提交
1624 1625 1626 1627
	struct nfs4_delegation *dp;
	struct list_head reaplist;

	INIT_LIST_HEAD(&reaplist);
1628
	spin_lock(&state_lock);
1629 1630
	while (!list_empty(&clp->cl_delegations)) {
		dp = list_entry(clp->cl_delegations.next, struct nfs4_delegation, dl_perclnt);
1631 1632
		unhash_delegation_locked(dp);
		list_add(&dp->dl_recall_lru, &reaplist);
L
Linus Torvalds 已提交
1633
	}
1634
	spin_unlock(&state_lock);
L
Linus Torvalds 已提交
1635 1636
	while (!list_empty(&reaplist)) {
		dp = list_entry(reaplist.next, struct nfs4_delegation, dl_recall_lru);
1637
		list_del_init(&dp->dl_recall_lru);
1638
		nfs4_put_stid(&dp->dl_stid);
L
Linus Torvalds 已提交
1639
	}
1640
	while (!list_empty(&clp->cl_revoked)) {
1641
		dp = list_entry(reaplist.next, struct nfs4_delegation, dl_recall_lru);
1642
		list_del_init(&dp->dl_recall_lru);
1643
		nfs4_put_stid(&dp->dl_stid);
1644
	}
1645
	while (!list_empty(&clp->cl_openowners)) {
1646
		oo = list_entry(clp->cl_openowners.next, struct nfs4_openowner, oo_perclient);
1647
		atomic_inc(&oo->oo_owner.so_count);
1648
		release_openowner(oo);
L
Linus Torvalds 已提交
1649
	}
1650
	nfsd4_shutdown_callback(clp);
B
Benny Halevy 已提交
1651 1652
	if (clp->cl_cb_conn.cb_xprt)
		svc_xprt_put(clp->cl_cb_conn.cb_xprt);
1653
	free_client(clp);
L
Linus Torvalds 已提交
1654 1655
}

1656 1657 1658 1659 1660 1661 1662
static void
destroy_client(struct nfs4_client *clp)
{
	unhash_client(clp);
	__destroy_client(clp);
}

1663 1664
static void expire_client(struct nfs4_client *clp)
{
1665
	unhash_client(clp);
1666
	nfsd4_client_record_remove(clp);
1667
	__destroy_client(clp);
1668 1669
}

1670 1671 1672 1673
static void copy_verf(struct nfs4_client *target, nfs4_verifier *source)
{
	memcpy(target->cl_verifier.data, source->data,
			sizeof(target->cl_verifier.data));
L
Linus Torvalds 已提交
1674 1675
}

1676 1677
static void copy_clid(struct nfs4_client *target, struct nfs4_client *source)
{
L
Linus Torvalds 已提交
1678 1679 1680 1681
	target->cl_clientid.cl_boot = source->cl_clientid.cl_boot; 
	target->cl_clientid.cl_id = source->cl_clientid.cl_id; 
}

1682
static int copy_cred(struct svc_cred *target, struct svc_cred *source)
1683
{
1684 1685 1686 1687 1688 1689 1690
	if (source->cr_principal) {
		target->cr_principal =
				kstrdup(source->cr_principal, GFP_KERNEL);
		if (target->cr_principal == NULL)
			return -ENOMEM;
	} else
		target->cr_principal = NULL;
1691
	target->cr_flavor = source->cr_flavor;
L
Linus Torvalds 已提交
1692 1693 1694 1695
	target->cr_uid = source->cr_uid;
	target->cr_gid = source->cr_gid;
	target->cr_group_info = source->cr_group_info;
	get_group_info(target->cr_group_info);
1696 1697 1698
	target->cr_gss_mech = source->cr_gss_mech;
	if (source->cr_gss_mech)
		gss_mech_get(source->cr_gss_mech);
1699
	return 0;
L
Linus Torvalds 已提交
1700 1701
}

1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712
static long long
compare_blob(const struct xdr_netobj *o1, const struct xdr_netobj *o2)
{
	long long res;

	res = o1->len - o2->len;
	if (res)
		return res;
	return (long long)memcmp(o1->data, o2->data, o1->len);
}

1713
static int same_name(const char *n1, const char *n2)
1714
{
N
NeilBrown 已提交
1715
	return 0 == memcmp(n1, n2, HEXDIR_LEN);
L
Linus Torvalds 已提交
1716 1717 1718
}

static int
1719 1720 1721
same_verf(nfs4_verifier *v1, nfs4_verifier *v2)
{
	return 0 == memcmp(v1->data, v2->data, sizeof(v1->data));
L
Linus Torvalds 已提交
1722 1723 1724
}

static int
1725 1726 1727
same_clid(clientid_t *cl1, clientid_t *cl2)
{
	return (cl1->cl_boot == cl2->cl_boot) && (cl1->cl_id == cl2->cl_id);
L
Linus Torvalds 已提交
1728 1729
}

1730 1731 1732 1733 1734 1735 1736
static bool groups_equal(struct group_info *g1, struct group_info *g2)
{
	int i;

	if (g1->ngroups != g2->ngroups)
		return false;
	for (i=0; i<g1->ngroups; i++)
1737
		if (!gid_eq(GROUP_AT(g1, i), GROUP_AT(g2, i)))
1738 1739 1740 1741
			return false;
	return true;
}

1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757
/*
 * RFC 3530 language requires clid_inuse be returned when the
 * "principal" associated with a requests differs from that previously
 * used.  We use uid, gid's, and gss principal string as our best
 * approximation.  We also don't want to allow non-gss use of a client
 * established using gss: in theory cr_principal should catch that
 * change, but in practice cr_principal can be null even in the gss case
 * since gssd doesn't always pass down a principal string.
 */
static bool is_gss_cred(struct svc_cred *cr)
{
	/* Is cr_flavor one of the gss "pseudoflavors"?: */
	return (cr->cr_flavor > RPC_AUTH_MAXFLAVOR);
}


1758
static bool
1759 1760
same_creds(struct svc_cred *cr1, struct svc_cred *cr2)
{
1761
	if ((is_gss_cred(cr1) != is_gss_cred(cr2))
1762 1763
		|| (!uid_eq(cr1->cr_uid, cr2->cr_uid))
		|| (!gid_eq(cr1->cr_gid, cr2->cr_gid))
1764 1765 1766 1767 1768 1769
		|| !groups_equal(cr1->cr_group_info, cr2->cr_group_info))
		return false;
	if (cr1->cr_principal == cr2->cr_principal)
		return true;
	if (!cr1->cr_principal || !cr2->cr_principal)
		return false;
1770
	return 0 == strcmp(cr1->cr_principal, cr2->cr_principal);
L
Linus Torvalds 已提交
1771 1772
}

1773 1774 1775 1776 1777
static bool svc_rqst_integrity_protected(struct svc_rqst *rqstp)
{
	struct svc_cred *cr = &rqstp->rq_cred;
	u32 service;

1778 1779
	if (!cr->cr_gss_mech)
		return false;
1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799
	service = gss_pseudoflavor_to_service(cr->cr_gss_mech, cr->cr_flavor);
	return service == RPC_GSS_SVC_INTEGRITY ||
	       service == RPC_GSS_SVC_PRIVACY;
}

static bool mach_creds_match(struct nfs4_client *cl, struct svc_rqst *rqstp)
{
	struct svc_cred *cr = &rqstp->rq_cred;

	if (!cl->cl_mach_cred)
		return true;
	if (cl->cl_cred.cr_gss_mech != cr->cr_gss_mech)
		return false;
	if (!svc_rqst_integrity_protected(rqstp))
		return false;
	if (!cr->cr_principal)
		return false;
	return 0 == strcmp(cl->cl_cred.cr_principal, cr->cr_principal);
}

1800
static void gen_confirm(struct nfs4_client *clp, struct nfsd_net *nn)
1801
{
1802
	__be32 verf[2];
L
Linus Torvalds 已提交
1803

1804 1805 1806 1807 1808
	/*
	 * This is opaque to client, so no need to byte-swap. Use
	 * __force to keep sparse happy
	 */
	verf[0] = (__force __be32)get_seconds();
1809
	verf[1] = (__force __be32)nn->clientid_counter;
1810
	memcpy(clp->cl_confirm.data, verf, sizeof(clp->cl_confirm.data));
L
Linus Torvalds 已提交
1811 1812
}

1813 1814 1815 1816 1817 1818 1819
static void gen_clid(struct nfs4_client *clp, struct nfsd_net *nn)
{
	clp->cl_clientid.cl_boot = nn->boot_time;
	clp->cl_clientid.cl_id = nn->clientid_counter++;
	gen_confirm(clp, nn);
}

1820 1821
static struct nfs4_stid *
find_stateid_locked(struct nfs4_client *cl, stateid_t *t)
1822
{
J
J. Bruce Fields 已提交
1823 1824 1825 1826 1827 1828
	struct nfs4_stid *ret;

	ret = idr_find(&cl->cl_stateids, t->si_opaque.so_id);
	if (!ret || !ret->sc_type)
		return NULL;
	return ret;
J
J. Bruce Fields 已提交
1829 1830
}

1831 1832
static struct nfs4_stid *
find_stateid_by_type(struct nfs4_client *cl, stateid_t *t, char typemask)
1833 1834
{
	struct nfs4_stid *s;
J
J. Bruce Fields 已提交
1835

1836 1837
	spin_lock(&cl->cl_lock);
	s = find_stateid_locked(cl, t);
1838 1839 1840 1841 1842 1843
	if (s != NULL) {
		if (typemask & s->sc_type)
			atomic_inc(&s->sc_count);
		else
			s = NULL;
	}
1844 1845
	spin_unlock(&cl->cl_lock);
	return s;
1846 1847
}

J
Jeff Layton 已提交
1848
static struct nfs4_client *create_client(struct xdr_netobj name,
1849 1850 1851 1852
		struct svc_rqst *rqstp, nfs4_verifier *verf)
{
	struct nfs4_client *clp;
	struct sockaddr *sa = svc_addr(rqstp);
1853
	int ret;
1854
	struct net *net = SVC_NET(rqstp);
1855 1856 1857 1858 1859

	clp = alloc_client(name);
	if (clp == NULL)
		return NULL;

1860 1861 1862 1863
	ret = copy_cred(&clp->cl_cred, &rqstp->rq_cred);
	if (ret) {
		free_client(clp);
		return NULL;
1864
	}
1865
	INIT_WORK(&clp->cl_cb_null.cb_work, nfsd4_run_cb_null);
B
Benny Halevy 已提交
1866
	clp->cl_time = get_seconds();
1867 1868 1869
	clear_bit(0, &clp->cl_cb_slot_busy);
	copy_verf(clp, verf);
	rpc_copy_addr((struct sockaddr *) &clp->cl_addr, sa);
1870
	clp->cl_cb_session = NULL;
1871
	clp->net = net;
1872 1873 1874
	return clp;
}

1875
static void
1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916
add_clp_to_name_tree(struct nfs4_client *new_clp, struct rb_root *root)
{
	struct rb_node **new = &(root->rb_node), *parent = NULL;
	struct nfs4_client *clp;

	while (*new) {
		clp = rb_entry(*new, struct nfs4_client, cl_namenode);
		parent = *new;

		if (compare_blob(&clp->cl_name, &new_clp->cl_name) > 0)
			new = &((*new)->rb_left);
		else
			new = &((*new)->rb_right);
	}

	rb_link_node(&new_clp->cl_namenode, parent, new);
	rb_insert_color(&new_clp->cl_namenode, root);
}

static struct nfs4_client *
find_clp_in_name_tree(struct xdr_netobj *name, struct rb_root *root)
{
	long long cmp;
	struct rb_node *node = root->rb_node;
	struct nfs4_client *clp;

	while (node) {
		clp = rb_entry(node, struct nfs4_client, cl_namenode);
		cmp = compare_blob(&clp->cl_name, name);
		if (cmp > 0)
			node = node->rb_left;
		else if (cmp < 0)
			node = node->rb_right;
		else
			return clp;
	}
	return NULL;
}

static void
add_to_unconfirmed(struct nfs4_client *clp)
L
Linus Torvalds 已提交
1917 1918
{
	unsigned int idhashval;
1919
	struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id);
L
Linus Torvalds 已提交
1920

1921 1922
	lockdep_assert_held(&nn->client_lock);

1923
	clear_bit(NFSD4_CLIENT_CONFIRMED, &clp->cl_flags);
1924
	add_clp_to_name_tree(clp, &nn->unconf_name_tree);
L
Linus Torvalds 已提交
1925
	idhashval = clientid_hashval(clp->cl_clientid.cl_id);
1926
	list_add(&clp->cl_idhash, &nn->unconf_id_hashtbl[idhashval]);
1927
	renew_client_locked(clp);
L
Linus Torvalds 已提交
1928 1929
}

1930
static void
L
Linus Torvalds 已提交
1931 1932 1933
move_to_confirmed(struct nfs4_client *clp)
{
	unsigned int idhashval = clientid_hashval(clp->cl_clientid.cl_id);
1934
	struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id);
L
Linus Torvalds 已提交
1935

1936 1937
	lockdep_assert_held(&nn->client_lock);

L
Linus Torvalds 已提交
1938
	dprintk("NFSD: move_to_confirm nfs4_client %p\n", clp);
1939
	list_move(&clp->cl_idhash, &nn->conf_id_hashtbl[idhashval]);
1940
	rb_erase(&clp->cl_namenode, &nn->unconf_name_tree);
1941
	add_clp_to_name_tree(clp, &nn->conf_name_tree);
1942
	set_bit(NFSD4_CLIENT_CONFIRMED, &clp->cl_flags);
1943
	renew_client_locked(clp);
L
Linus Torvalds 已提交
1944 1945 1946
}

static struct nfs4_client *
J
J. Bruce Fields 已提交
1947
find_client_in_id_table(struct list_head *tbl, clientid_t *clid, bool sessions)
L
Linus Torvalds 已提交
1948 1949 1950 1951
{
	struct nfs4_client *clp;
	unsigned int idhashval = clientid_hashval(clid->cl_id);

J
J. Bruce Fields 已提交
1952
	list_for_each_entry(clp, &tbl[idhashval], cl_idhash) {
1953
		if (same_clid(&clp->cl_clientid, clid)) {
1954 1955
			if ((bool)clp->cl_minorversion != sessions)
				return NULL;
1956
			renew_client_locked(clp);
L
Linus Torvalds 已提交
1957
			return clp;
1958
		}
L
Linus Torvalds 已提交
1959 1960 1961 1962
	}
	return NULL;
}

J
J. Bruce Fields 已提交
1963 1964 1965 1966 1967
static struct nfs4_client *
find_confirmed_client(clientid_t *clid, bool sessions, struct nfsd_net *nn)
{
	struct list_head *tbl = nn->conf_id_hashtbl;

1968
	lockdep_assert_held(&nn->client_lock);
J
J. Bruce Fields 已提交
1969 1970 1971
	return find_client_in_id_table(tbl, clid, sessions);
}

L
Linus Torvalds 已提交
1972
static struct nfs4_client *
1973
find_unconfirmed_client(clientid_t *clid, bool sessions, struct nfsd_net *nn)
L
Linus Torvalds 已提交
1974
{
J
J. Bruce Fields 已提交
1975
	struct list_head *tbl = nn->unconf_id_hashtbl;
L
Linus Torvalds 已提交
1976

1977
	lockdep_assert_held(&nn->client_lock);
J
J. Bruce Fields 已提交
1978
	return find_client_in_id_table(tbl, clid, sessions);
L
Linus Torvalds 已提交
1979 1980
}

1981
static bool clp_used_exchangeid(struct nfs4_client *clp)
1982
{
1983
	return clp->cl_exchange_flags != 0;
1984
} 
1985

1986
static struct nfs4_client *
1987
find_confirmed_client_by_name(struct xdr_netobj *name, struct nfsd_net *nn)
1988
{
1989
	lockdep_assert_held(&nn->client_lock);
1990
	return find_clp_in_name_tree(name, &nn->conf_name_tree);
1991 1992 1993
}

static struct nfs4_client *
1994
find_unconfirmed_client_by_name(struct xdr_netobj *name, struct nfsd_net *nn)
1995
{
1996
	lockdep_assert_held(&nn->client_lock);
1997
	return find_clp_in_name_tree(name, &nn->unconf_name_tree);
1998 1999
}

2000
static void
2001
gen_callback(struct nfs4_client *clp, struct nfsd4_setclientid *se, struct svc_rqst *rqstp)
L
Linus Torvalds 已提交
2002
{
2003
	struct nfs4_cb_conn *conn = &clp->cl_cb_conn;
2004 2005
	struct sockaddr	*sa = svc_addr(rqstp);
	u32 scopeid = rpc_get_scope_id(sa);
2006 2007 2008 2009 2010 2011 2012 2013 2014 2015
	unsigned short expected_family;

	/* Currently, we only support tcp and tcp6 for the callback channel */
	if (se->se_callback_netid_len == 3 &&
	    !memcmp(se->se_callback_netid_val, "tcp", 3))
		expected_family = AF_INET;
	else if (se->se_callback_netid_len == 4 &&
		 !memcmp(se->se_callback_netid_val, "tcp6", 4))
		expected_family = AF_INET6;
	else
L
Linus Torvalds 已提交
2016 2017
		goto out_err;

2018
	conn->cb_addrlen = rpc_uaddr2sockaddr(clp->net, se->se_callback_addr_val,
2019
					    se->se_callback_addr_len,
2020 2021
					    (struct sockaddr *)&conn->cb_addr,
					    sizeof(conn->cb_addr));
2022

2023
	if (!conn->cb_addrlen || conn->cb_addr.ss_family != expected_family)
L
Linus Torvalds 已提交
2024
		goto out_err;
2025

2026 2027
	if (conn->cb_addr.ss_family == AF_INET6)
		((struct sockaddr_in6 *)&conn->cb_addr)->sin6_scope_id = scopeid;
2028

2029 2030
	conn->cb_prog = se->se_callback_prog;
	conn->cb_ident = se->se_callback_ident;
2031
	memcpy(&conn->cb_saddr, &rqstp->rq_daddr, rqstp->rq_daddrlen);
L
Linus Torvalds 已提交
2032 2033
	return;
out_err:
2034 2035
	conn->cb_addr.ss_family = AF_UNSPEC;
	conn->cb_addrlen = 0;
N
Neil Brown 已提交
2036
	dprintk(KERN_INFO "NFSD: this client (clientid %08x/%08x) "
L
Linus Torvalds 已提交
2037 2038 2039 2040 2041 2042
		"will not receive delegations\n",
		clp->cl_clientid.cl_boot, clp->cl_clientid.cl_id);

	return;
}

2043
/*
2044
 * Cache a reply. nfsd4_check_resp_size() has bounded the cache size.
2045
 */
2046
static void
2047 2048
nfsd4_store_cache_entry(struct nfsd4_compoundres *resp)
{
2049
	struct xdr_buf *buf = resp->xdr.buf;
2050 2051
	struct nfsd4_slot *slot = resp->cstate.slot;
	unsigned int base;
2052

2053
	dprintk("--> %s slot %p\n", __func__, slot);
2054

2055 2056
	slot->sl_opcnt = resp->opcnt;
	slot->sl_status = resp->cstate.status;
2057

2058
	slot->sl_flags |= NFSD4_SLOT_INITIALIZED;
2059
	if (nfsd4_not_cached(resp)) {
2060
		slot->sl_datalen = 0;
2061
		return;
2062
	}
2063 2064 2065
	base = resp->cstate.data_offset;
	slot->sl_datalen = buf->len - base;
	if (read_bytes_from_xdr_buf(buf, base, slot->sl_data, slot->sl_datalen))
2066 2067
		WARN("%s: sessions DRC could not cache compound\n", __func__);
	return;
2068 2069 2070
}

/*
2071 2072 2073 2074
 * Encode the replay sequence operation from the slot values.
 * If cachethis is FALSE encode the uncached rep error on the next
 * operation which sets resp->p and increments resp->opcnt for
 * nfs4svc_encode_compoundres.
2075 2076
 *
 */
2077 2078 2079
static __be32
nfsd4_enc_sequence_replay(struct nfsd4_compoundargs *args,
			  struct nfsd4_compoundres *resp)
2080
{
2081 2082
	struct nfsd4_op *op;
	struct nfsd4_slot *slot = resp->cstate.slot;
2083

2084 2085 2086
	/* Encode the replayed sequence operation */
	op = &args->ops[resp->opcnt - 1];
	nfsd4_encode_operation(resp, op);
2087

2088
	/* Return nfserr_retry_uncached_rep in next operation. */
2089
	if (args->opcnt > 1 && !(slot->sl_flags & NFSD4_SLOT_CACHETHIS)) {
2090 2091 2092
		op = &args->ops[resp->opcnt++];
		op->status = nfserr_retry_uncached_rep;
		nfsd4_encode_operation(resp, op);
2093
	}
2094
	return op->status;
2095 2096 2097
}

/*
2098 2099
 * The sequence operation is not cached because we can use the slot and
 * session values.
2100
 */
2101
static __be32
2102 2103
nfsd4_replay_cache_entry(struct nfsd4_compoundres *resp,
			 struct nfsd4_sequence *seq)
2104
{
2105
	struct nfsd4_slot *slot = resp->cstate.slot;
2106 2107
	struct xdr_stream *xdr = &resp->xdr;
	__be32 *p;
2108 2109
	__be32 status;

2110
	dprintk("--> %s slot %p\n", __func__, slot);
2111

2112
	status = nfsd4_enc_sequence_replay(resp->rqstp->rq_argp, resp);
2113
	if (status)
2114
		return status;
2115

2116 2117 2118 2119 2120 2121 2122
	p = xdr_reserve_space(xdr, slot->sl_datalen);
	if (!p) {
		WARN_ON_ONCE(1);
		return nfserr_serverfault;
	}
	xdr_encode_opaque_fixed(p, slot->sl_data, slot->sl_datalen);
	xdr_commit_encode(xdr);
2123

2124
	resp->opcnt = slot->sl_opcnt;
2125
	return slot->sl_status;
2126 2127
}

A
Andy Adamson 已提交
2128 2129 2130 2131 2132 2133 2134 2135 2136 2137 2138 2139 2140 2141 2142 2143
/*
 * Set the exchange_id flags returned by the server.
 */
static void
nfsd4_set_ex_flags(struct nfs4_client *new, struct nfsd4_exchange_id *clid)
{
	/* pNFS is not supported */
	new->cl_exchange_flags |= EXCHGID4_FLAG_USE_NON_PNFS;

	/* Referrals are supported, Migration is not. */
	new->cl_exchange_flags |= EXCHGID4_FLAG_SUPP_MOVED_REFER;

	/* set the wire flags to return to client. */
	clid->flags = new->cl_exchange_flags;
}

2144 2145 2146 2147 2148 2149 2150 2151
static bool client_has_state(struct nfs4_client *clp)
{
	/*
	 * Note clp->cl_openowners check isn't quite right: there's no
	 * need to count owners without stateid's.
	 *
	 * Also note we should probably be using this in 4.0 case too.
	 */
2152 2153 2154
	return !list_empty(&clp->cl_openowners)
		|| !list_empty(&clp->cl_delegations)
		|| !list_empty(&clp->cl_sessions);
2155 2156
}

A
Andy Adamson 已提交
2157 2158 2159 2160 2161
__be32
nfsd4_exchange_id(struct svc_rqst *rqstp,
		  struct nfsd4_compound_state *cstate,
		  struct nfsd4_exchange_id *exid)
{
2162 2163
	struct nfs4_client *conf, *new;
	struct nfs4_client *unconf = NULL;
J
J. Bruce Fields 已提交
2164
	__be32 status;
2165
	char			addr_str[INET6_ADDRSTRLEN];
A
Andy Adamson 已提交
2166
	nfs4_verifier		verf = exid->verifier;
2167
	struct sockaddr		*sa = svc_addr(rqstp);
2168
	bool	update = exid->flags & EXCHGID4_FLAG_UPD_CONFIRMED_REC_A;
2169
	struct nfsd_net		*nn = net_generic(SVC_NET(rqstp), nfsd_net_id);
A
Andy Adamson 已提交
2170

2171
	rpc_ntop(sa, addr_str, sizeof(addr_str));
A
Andy Adamson 已提交
2172
	dprintk("%s rqstp=%p exid=%p clname.len=%u clname.data=%p "
2173
		"ip_addr=%s flags %x, spa_how %d\n",
A
Andy Adamson 已提交
2174
		__func__, rqstp, exid, exid->clname.len, exid->clname.data,
2175
		addr_str, exid->flags, exid->spa_how);
A
Andy Adamson 已提交
2176

2177
	if (exid->flags & ~EXCHGID4_FLAG_MASK_A)
A
Andy Adamson 已提交
2178 2179 2180
		return nfserr_inval;

	switch (exid->spa_how) {
2181 2182 2183
	case SP4_MACH_CRED:
		if (!svc_rqst_integrity_protected(rqstp))
			return nfserr_inval;
A
Andy Adamson 已提交
2184 2185
	case SP4_NONE:
		break;
2186 2187
	default:				/* checked by xdr code */
		WARN_ON_ONCE(1);
A
Andy Adamson 已提交
2188
	case SP4_SSV:
2189
		return nfserr_encr_alg_unsupp;
A
Andy Adamson 已提交
2190 2191
	}

2192 2193 2194 2195
	new = create_client(exid->clname, rqstp, &verf);
	if (new == NULL)
		return nfserr_jukebox;

2196
	/* Cases below refer to rfc 5661 section 18.35.4: */
2197
	spin_lock(&nn->client_lock);
2198
	conf = find_confirmed_client_by_name(&exid->clname, nn);
A
Andy Adamson 已提交
2199
	if (conf) {
2200 2201 2202
		bool creds_match = same_creds(&conf->cl_cred, &rqstp->rq_cred);
		bool verfs_match = same_verf(&verf, &conf->cl_verifier);

2203 2204
		if (update) {
			if (!clp_used_exchangeid(conf)) { /* buggy client */
2205
				status = nfserr_inval;
2206 2207
				goto out;
			}
2208 2209 2210 2211
			if (!mach_creds_match(conf, rqstp)) {
				status = nfserr_wrong_cred;
				goto out;
			}
2212
			if (!creds_match) { /* case 9 */
2213
				status = nfserr_perm;
2214 2215 2216
				goto out;
			}
			if (!verfs_match) { /* case 8 */
A
Andy Adamson 已提交
2217 2218 2219
				status = nfserr_not_same;
				goto out;
			}
2220 2221 2222
			/* case 6 */
			exid->flags |= EXCHGID4_FLAG_CONFIRMED_R;
			goto out_copy;
A
Andy Adamson 已提交
2223
		}
2224
		if (!creds_match) { /* case 3 */
2225 2226
			if (client_has_state(conf)) {
				status = nfserr_clid_inuse;
A
Andy Adamson 已提交
2227 2228 2229 2230
				goto out;
			}
			goto out_new;
		}
2231
		if (verfs_match) { /* case 2 */
2232
			conf->cl_exchange_flags |= EXCHGID4_FLAG_CONFIRMED_R;
2233 2234 2235
			goto out_copy;
		}
		/* case 5, client reboot */
2236
		conf = NULL;
2237
		goto out_new;
2238 2239
	}

2240
	if (update) { /* case 7 */
2241 2242
		status = nfserr_noent;
		goto out;
A
Andy Adamson 已提交
2243 2244
	}

2245
	unconf  = find_unconfirmed_client_by_name(&exid->clname, nn);
2246
	if (unconf) /* case 4, possible retry or client restart */
2247
		unhash_client_locked(unconf);
A
Andy Adamson 已提交
2248

2249
	/* case 1 (normal case) */
A
Andy Adamson 已提交
2250
out_new:
2251 2252 2253 2254 2255
	if (conf) {
		status = mark_client_expired_locked(conf);
		if (status)
			goto out;
	}
2256
	new->cl_minorversion = cstate->minorversion;
2257
	new->cl_mach_cred = (exid->spa_how == SP4_MACH_CRED);
A
Andy Adamson 已提交
2258

2259
	gen_clid(new, nn);
2260
	add_to_unconfirmed(new);
2261
	swap(new, conf);
A
Andy Adamson 已提交
2262
out_copy:
2263 2264
	exid->clientid.cl_boot = conf->cl_clientid.cl_boot;
	exid->clientid.cl_id = conf->cl_clientid.cl_id;
A
Andy Adamson 已提交
2265

2266 2267
	exid->seqid = conf->cl_cs_slot.sl_seqid + 1;
	nfsd4_set_ex_flags(conf, exid);
A
Andy Adamson 已提交
2268 2269

	dprintk("nfsd4_exchange_id seqid %d flags %x\n",
2270
		conf->cl_cs_slot.sl_seqid, conf->cl_exchange_flags);
A
Andy Adamson 已提交
2271 2272 2273
	status = nfs_ok;

out:
2274
	spin_unlock(&nn->client_lock);
2275
	if (new)
2276 2277 2278
		expire_client(new);
	if (unconf)
		expire_client(unconf);
A
Andy Adamson 已提交
2279
	return status;
A
Andy Adamson 已提交
2280 2281
}

J
J. Bruce Fields 已提交
2282
static __be32
2283
check_slot_seqid(u32 seqid, u32 slot_seqid, int slot_inuse)
B
Benny Halevy 已提交
2284
{
2285 2286
	dprintk("%s enter. seqid %d slot_seqid %d\n", __func__, seqid,
		slot_seqid);
B
Benny Halevy 已提交
2287 2288

	/* The slot is in use, and no response has been sent. */
2289 2290
	if (slot_inuse) {
		if (seqid == slot_seqid)
B
Benny Halevy 已提交
2291 2292 2293 2294
			return nfserr_jukebox;
		else
			return nfserr_seq_misordered;
	}
2295
	/* Note unsigned 32-bit arithmetic handles wraparound: */
2296
	if (likely(seqid == slot_seqid + 1))
B
Benny Halevy 已提交
2297
		return nfs_ok;
2298
	if (seqid == slot_seqid)
B
Benny Halevy 已提交
2299 2300 2301 2302
		return nfserr_replay_cache;
	return nfserr_seq_misordered;
}

2303 2304 2305 2306 2307 2308 2309
/*
 * Cache the create session result into the create session single DRC
 * slot cache by saving the xdr structure. sl_seqid has been set.
 * Do this for solo or embedded create session operations.
 */
static void
nfsd4_cache_create_session(struct nfsd4_create_session *cr_ses,
J
J. Bruce Fields 已提交
2310
			   struct nfsd4_clid_slot *slot, __be32 nfserr)
2311 2312 2313 2314 2315 2316 2317 2318 2319 2320 2321 2322 2323
{
	slot->sl_status = nfserr;
	memcpy(&slot->sl_cr_ses, cr_ses, sizeof(*cr_ses));
}

static __be32
nfsd4_replay_create_session(struct nfsd4_create_session *cr_ses,
			    struct nfsd4_clid_slot *slot)
{
	memcpy(cr_ses, &slot->sl_cr_ses, sizeof(*cr_ses));
	return slot->sl_status;
}

2324 2325 2326 2327 2328 2329 2330 2331 2332 2333 2334 2335 2336 2337 2338 2339 2340
#define NFSD_MIN_REQ_HDR_SEQ_SZ	((\
			2 * 2 + /* credential,verifier: AUTH_NULL, length 0 */ \
			1 +	/* MIN tag is length with zero, only length */ \
			3 +	/* version, opcount, opcode */ \
			XDR_QUADLEN(NFS4_MAX_SESSIONID_LEN) + \
				/* seqid, slotID, slotID, cache */ \
			4 ) * sizeof(__be32))

#define NFSD_MIN_RESP_HDR_SEQ_SZ ((\
			2 +	/* verifier: AUTH_NULL, length 0 */\
			1 +	/* status */ \
			1 +	/* MIN tag is length with zero, only length */ \
			3 +	/* opcount, opcode, opstatus*/ \
			XDR_QUADLEN(NFS4_MAX_SESSIONID_LEN) + \
				/* seqid, slotID, slotID, slotID, status */ \
			5 ) * sizeof(__be32))

2341
static __be32 check_forechannel_attrs(struct nfsd4_channel_attrs *ca, struct nfsd_net *nn)
2342
{
2343 2344
	u32 maxrpc = nn->nfsd_serv->sv_max_mesg;

2345 2346 2347 2348
	if (ca->maxreq_sz < NFSD_MIN_REQ_HDR_SEQ_SZ)
		return nfserr_toosmall;
	if (ca->maxresp_sz < NFSD_MIN_RESP_HDR_SEQ_SZ)
		return nfserr_toosmall;
2349 2350 2351 2352 2353 2354 2355 2356 2357 2358 2359 2360 2361 2362 2363 2364 2365 2366 2367
	ca->headerpadsz = 0;
	ca->maxreq_sz = min_t(u32, ca->maxreq_sz, maxrpc);
	ca->maxresp_sz = min_t(u32, ca->maxresp_sz, maxrpc);
	ca->maxops = min_t(u32, ca->maxops, NFSD_MAX_OPS_PER_COMPOUND);
	ca->maxresp_cached = min_t(u32, ca->maxresp_cached,
			NFSD_SLOT_CACHE_SIZE + NFSD_MIN_HDR_SEQ_SZ);
	ca->maxreqs = min_t(u32, ca->maxreqs, NFSD_MAX_SLOTS_PER_SESSION);
	/*
	 * Note decreasing slot size below client's request may make it
	 * difficult for client to function correctly, whereas
	 * decreasing the number of slots will (just?) affect
	 * performance.  When short on memory we therefore prefer to
	 * decrease number of slots instead of their size.  Clients that
	 * request larger slots than they need will get poor results:
	 */
	ca->maxreqs = nfsd4_get_drc_mem(ca);
	if (!ca->maxreqs)
		return nfserr_jukebox;

2368
	return nfs_ok;
2369 2370
}

2371 2372 2373 2374 2375
#define NFSD_CB_MAX_REQ_SZ	((NFS4_enc_cb_recall_sz + \
				 RPC_MAX_HEADER_WITH_AUTH) * sizeof(__be32))
#define NFSD_CB_MAX_RESP_SZ	((NFS4_dec_cb_recall_sz + \
				 RPC_MAX_REPHEADER_WITH_AUTH) * sizeof(__be32))

2376
static __be32 check_backchannel_attrs(struct nfsd4_channel_attrs *ca)
2377
{
2378 2379 2380 2381 2382 2383 2384 2385
	ca->headerpadsz = 0;

	/*
	 * These RPC_MAX_HEADER macros are overkill, especially since we
	 * don't even do gss on the backchannel yet.  But this is still
	 * less than 1k.  Tighten up this estimate in the unlikely event
	 * it turns out to be a problem for some client:
	 */
2386
	if (ca->maxreq_sz < NFSD_CB_MAX_REQ_SZ)
2387
		return nfserr_toosmall;
2388
	if (ca->maxresp_sz < NFSD_CB_MAX_RESP_SZ)
2389 2390 2391 2392 2393 2394
		return nfserr_toosmall;
	ca->maxresp_cached = 0;
	if (ca->maxops < 2)
		return nfserr_toosmall;

	return nfs_ok;
2395 2396
}

2397 2398 2399 2400 2401 2402 2403 2404 2405 2406 2407 2408 2409 2410 2411 2412 2413 2414
static __be32 nfsd4_check_cb_sec(struct nfsd4_cb_sec *cbs)
{
	switch (cbs->flavor) {
	case RPC_AUTH_NULL:
	case RPC_AUTH_UNIX:
		return nfs_ok;
	default:
		/*
		 * GSS case: the spec doesn't allow us to return this
		 * error.  But it also doesn't allow us not to support
		 * GSS.
		 * I'd rather this fail hard than return some error the
		 * client might think it can already handle:
		 */
		return nfserr_encr_alg_unsupp;
	}
}

A
Andy Adamson 已提交
2415 2416 2417 2418 2419
__be32
nfsd4_create_session(struct svc_rqst *rqstp,
		     struct nfsd4_compound_state *cstate,
		     struct nfsd4_create_session *cr_ses)
{
2420
	struct sockaddr *sa = svc_addr(rqstp);
A
Andy Adamson 已提交
2421
	struct nfs4_client *conf, *unconf;
2422
	struct nfs4_client *old = NULL;
2423
	struct nfsd4_session *new;
2424
	struct nfsd4_conn *conn;
2425
	struct nfsd4_clid_slot *cs_slot = NULL;
J
J. Bruce Fields 已提交
2426
	__be32 status = 0;
2427
	struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id);
A
Andy Adamson 已提交
2428

2429 2430
	if (cr_ses->flags & ~SESSION4_FLAG_MASK_A)
		return nfserr_inval;
2431 2432 2433
	status = nfsd4_check_cb_sec(&cr_ses->cb_sec);
	if (status)
		return status;
2434
	status = check_forechannel_attrs(&cr_ses->fore_channel, nn);
2435 2436 2437
	if (status)
		return status;
	status = check_backchannel_attrs(&cr_ses->back_channel);
2438
	if (status)
2439
		goto out_release_drc_mem;
2440
	status = nfserr_jukebox;
2441
	new = alloc_session(&cr_ses->fore_channel, &cr_ses->back_channel);
2442 2443
	if (!new)
		goto out_release_drc_mem;
2444 2445 2446
	conn = alloc_conn_from_crses(rqstp, cr_ses);
	if (!conn)
		goto out_free_session;
2447

2448
	spin_lock(&nn->client_lock);
2449
	unconf = find_unconfirmed_client(&cr_ses->clientid, true, nn);
2450
	conf = find_confirmed_client(&cr_ses->clientid, true, nn);
2451
	WARN_ON_ONCE(conf && unconf);
A
Andy Adamson 已提交
2452 2453

	if (conf) {
2454 2455 2456
		status = nfserr_wrong_cred;
		if (!mach_creds_match(conf, rqstp))
			goto out_free_conn;
2457 2458
		cs_slot = &conf->cl_cs_slot;
		status = check_slot_seqid(cr_ses->seqid, cs_slot->sl_seqid, 0);
2459
		if (status == nfserr_replay_cache) {
2460
			status = nfsd4_replay_create_session(cr_ses, cs_slot);
2461
			goto out_free_conn;
2462
		} else if (cr_ses->seqid != cs_slot->sl_seqid + 1) {
A
Andy Adamson 已提交
2463
			status = nfserr_seq_misordered;
2464
			goto out_free_conn;
A
Andy Adamson 已提交
2465 2466 2467
		}
	} else if (unconf) {
		if (!same_creds(&unconf->cl_cred, &rqstp->rq_cred) ||
2468
		    !rpc_cmp_addr(sa, (struct sockaddr *) &unconf->cl_addr)) {
A
Andy Adamson 已提交
2469
			status = nfserr_clid_inuse;
2470
			goto out_free_conn;
A
Andy Adamson 已提交
2471
		}
2472 2473 2474
		status = nfserr_wrong_cred;
		if (!mach_creds_match(unconf, rqstp))
			goto out_free_conn;
2475 2476
		cs_slot = &unconf->cl_cs_slot;
		status = check_slot_seqid(cr_ses->seqid, cs_slot->sl_seqid, 0);
2477 2478
		if (status) {
			/* an unconfirmed replay returns misordered */
A
Andy Adamson 已提交
2479
			status = nfserr_seq_misordered;
2480
			goto out_free_conn;
A
Andy Adamson 已提交
2481
		}
2482
		old = find_confirmed_client_by_name(&unconf->cl_name, nn);
2483
		if (old) {
2484
			status = mark_client_expired_locked(old);
2485 2486
			if (status) {
				old = NULL;
2487
				goto out_free_conn;
2488
			}
2489
		}
J
J. Bruce Fields 已提交
2490
		move_to_confirmed(unconf);
A
Andy Adamson 已提交
2491 2492 2493
		conf = unconf;
	} else {
		status = nfserr_stale_clientid;
2494
		goto out_free_conn;
A
Andy Adamson 已提交
2495
	}
2496
	status = nfs_ok;
2497 2498 2499 2500 2501 2502
	/*
	 * We do not support RDMA or persistent sessions
	 */
	cr_ses->flags &= ~SESSION4_PERSIST;
	cr_ses->flags &= ~SESSION4_RDMA;

2503
	init_session(rqstp, new, conf, cr_ses);
2504
	nfsd4_get_session_locked(new);
2505

2506
	memcpy(cr_ses->sessionid.data, new->se_sessionid.data,
A
Andy Adamson 已提交
2507
	       NFS4_MAX_SESSIONID_LEN);
2508
	cs_slot->sl_seqid++;
2509
	cr_ses->seqid = cs_slot->sl_seqid;
A
Andy Adamson 已提交
2510

2511
	/* cache solo and embedded create sessions under the client_lock */
2512
	nfsd4_cache_create_session(cr_ses, cs_slot, status);
2513 2514 2515 2516 2517 2518
	spin_unlock(&nn->client_lock);
	/* init connection and backchannel */
	nfsd4_init_conn(rqstp, conn, new);
	nfsd4_put_session(new);
	if (old)
		expire_client(old);
A
Andy Adamson 已提交
2519
	return status;
2520
out_free_conn:
2521
	spin_unlock(&nn->client_lock);
2522
	free_conn(conn);
2523 2524
	if (old)
		expire_client(old);
2525 2526
out_free_session:
	__free_session(new);
2527 2528
out_release_drc_mem:
	nfsd4_put_drc_mem(&cr_ses->fore_channel);
J
J. Bruce Fields 已提交
2529
	return status;
A
Andy Adamson 已提交
2530 2531
}

2532 2533 2534 2535 2536 2537 2538 2539 2540 2541 2542 2543 2544 2545
static __be32 nfsd4_map_bcts_dir(u32 *dir)
{
	switch (*dir) {
	case NFS4_CDFC4_FORE:
	case NFS4_CDFC4_BACK:
		return nfs_ok;
	case NFS4_CDFC4_FORE_OR_BOTH:
	case NFS4_CDFC4_BACK_OR_BOTH:
		*dir = NFS4_CDFC4_BOTH;
		return nfs_ok;
	};
	return nfserr_inval;
}

2546 2547 2548
__be32 nfsd4_backchannel_ctl(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, struct nfsd4_backchannel_ctl *bc)
{
	struct nfsd4_session *session = cstate->session;
2549
	struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id);
2550
	__be32 status;
2551

2552 2553 2554
	status = nfsd4_check_cb_sec(&bc->bc_cb_sec);
	if (status)
		return status;
2555
	spin_lock(&nn->client_lock);
2556 2557
	session->se_cb_prog = bc->bc_cb_program;
	session->se_cb_sec = bc->bc_cb_sec;
2558
	spin_unlock(&nn->client_lock);
2559 2560 2561 2562 2563 2564

	nfsd4_probe_callback(session->se_client);

	return nfs_ok;
}

2565 2566 2567 2568 2569
__be32 nfsd4_bind_conn_to_session(struct svc_rqst *rqstp,
		     struct nfsd4_compound_state *cstate,
		     struct nfsd4_bind_conn_to_session *bcts)
{
	__be32 status;
2570
	struct nfsd4_conn *conn;
2571
	struct nfsd4_session *session;
2572 2573
	struct net *net = SVC_NET(rqstp);
	struct nfsd_net *nn = net_generic(net, nfsd_net_id);
2574 2575 2576

	if (!nfsd4_last_compound_op(rqstp))
		return nfserr_not_only_op;
2577
	spin_lock(&nn->client_lock);
2578
	session = find_in_sessionid_hashtbl(&bcts->sessionid, net, &status);
2579
	spin_unlock(&nn->client_lock);
2580
	if (!session)
2581
		goto out_no_session;
2582 2583 2584
	status = nfserr_wrong_cred;
	if (!mach_creds_match(session->se_client, rqstp))
		goto out;
2585
	status = nfsd4_map_bcts_dir(&bcts->dir);
2586
	if (status)
2587
		goto out;
2588
	conn = alloc_conn(rqstp, bcts->dir);
2589
	status = nfserr_jukebox;
2590
	if (!conn)
2591 2592 2593 2594
		goto out;
	nfsd4_init_conn(rqstp, conn, session);
	status = nfs_ok;
out:
2595 2596
	nfsd4_put_session(session);
out_no_session:
2597
	return status;
2598 2599
}

2600 2601 2602 2603 2604 2605 2606
static bool nfsd4_compound_in_session(struct nfsd4_session *session, struct nfs4_sessionid *sid)
{
	if (!session)
		return 0;
	return !memcmp(sid, &session->se_sessionid, sizeof(*sid));
}

A
Andy Adamson 已提交
2607 2608 2609 2610 2611
__be32
nfsd4_destroy_session(struct svc_rqst *r,
		      struct nfsd4_compound_state *cstate,
		      struct nfsd4_destroy_session *sessionid)
{
B
Benny Halevy 已提交
2612
	struct nfsd4_session *ses;
2613
	__be32 status;
2614
	int ref_held_by_me = 0;
2615 2616
	struct net *net = SVC_NET(r);
	struct nfsd_net *nn = net_generic(net, nfsd_net_id);
B
Benny Halevy 已提交
2617

2618
	status = nfserr_not_only_op;
2619
	if (nfsd4_compound_in_session(cstate->session, &sessionid->sessionid)) {
2620
		if (!nfsd4_last_compound_op(r))
2621
			goto out;
2622
		ref_held_by_me++;
2623
	}
B
Benny Halevy 已提交
2624
	dump_sessionid(__func__, &sessionid->sessionid);
2625
	spin_lock(&nn->client_lock);
2626
	ses = find_in_sessionid_hashtbl(&sessionid->sessionid, net, &status);
2627 2628
	if (!ses)
		goto out_client_lock;
2629 2630
	status = nfserr_wrong_cred;
	if (!mach_creds_match(ses->se_client, r))
2631
		goto out_put_session;
2632
	status = mark_session_dead_locked(ses, 1 + ref_held_by_me);
2633
	if (status)
2634
		goto out_put_session;
B
Benny Halevy 已提交
2635
	unhash_session(ses);
2636
	spin_unlock(&nn->client_lock);
B
Benny Halevy 已提交
2637

2638
	nfsd4_probe_callback_sync(ses->se_client);
2639

2640
	spin_lock(&nn->client_lock);
B
Benny Halevy 已提交
2641
	status = nfs_ok;
2642
out_put_session:
2643
	nfsd4_put_session_locked(ses);
2644 2645
out_client_lock:
	spin_unlock(&nn->client_lock);
B
Benny Halevy 已提交
2646 2647
out:
	return status;
A
Andy Adamson 已提交
2648 2649
}

2650
static struct nfsd4_conn *__nfsd4_find_conn(struct svc_xprt *xpt, struct nfsd4_session *s)
2651 2652 2653 2654
{
	struct nfsd4_conn *c;

	list_for_each_entry(c, &s->se_conns, cn_persession) {
2655
		if (c->cn_xprt == xpt) {
2656 2657 2658 2659 2660 2661
			return c;
		}
	}
	return NULL;
}

2662
static __be32 nfsd4_sequence_check_conn(struct nfsd4_conn *new, struct nfsd4_session *ses)
2663 2664
{
	struct nfs4_client *clp = ses->se_client;
2665
	struct nfsd4_conn *c;
2666
	__be32 status = nfs_ok;
2667
	int ret;
2668 2669

	spin_lock(&clp->cl_lock);
2670
	c = __nfsd4_find_conn(new->cn_xprt, ses);
2671 2672 2673 2674 2675
	if (c)
		goto out_free;
	status = nfserr_conn_not_bound_to_session;
	if (clp->cl_mach_cred)
		goto out_free;
2676 2677
	__nfsd4_hash_conn(new, ses);
	spin_unlock(&clp->cl_lock);
2678 2679 2680 2681
	ret = nfsd4_register_conn(new);
	if (ret)
		/* oops; xprt is already down: */
		nfsd4_conn_lost(&new->cn_xpt_user);
2682 2683 2684 2685 2686
	return nfs_ok;
out_free:
	spin_unlock(&clp->cl_lock);
	free_conn(new);
	return status;
2687 2688
}

2689 2690 2691 2692 2693 2694 2695
static bool nfsd4_session_too_many_ops(struct svc_rqst *rqstp, struct nfsd4_session *session)
{
	struct nfsd4_compoundargs *args = rqstp->rq_argp;

	return args->opcnt > session->se_fchannel.maxops;
}

M
Mi Jinlong 已提交
2696 2697 2698 2699 2700 2701 2702 2703
static bool nfsd4_request_too_big(struct svc_rqst *rqstp,
				  struct nfsd4_session *session)
{
	struct xdr_buf *xb = &rqstp->rq_arg;

	return xb->len > session->se_fchannel.maxreq_sz;
}

A
Andy Adamson 已提交
2704
__be32
B
Benny Halevy 已提交
2705
nfsd4_sequence(struct svc_rqst *rqstp,
A
Andy Adamson 已提交
2706 2707 2708
	       struct nfsd4_compound_state *cstate,
	       struct nfsd4_sequence *seq)
{
2709
	struct nfsd4_compoundres *resp = rqstp->rq_resp;
2710
	struct xdr_stream *xdr = &resp->xdr;
B
Benny Halevy 已提交
2711
	struct nfsd4_session *session;
2712
	struct nfs4_client *clp;
B
Benny Halevy 已提交
2713
	struct nfsd4_slot *slot;
2714
	struct nfsd4_conn *conn;
J
J. Bruce Fields 已提交
2715
	__be32 status;
2716
	int buflen;
2717 2718
	struct net *net = SVC_NET(rqstp);
	struct nfsd_net *nn = net_generic(net, nfsd_net_id);
B
Benny Halevy 已提交
2719

2720 2721 2722
	if (resp->opcnt != 1)
		return nfserr_sequence_pos;

2723 2724 2725 2726 2727 2728 2729 2730
	/*
	 * Will be either used or freed by nfsd4_sequence_check_conn
	 * below.
	 */
	conn = alloc_conn(rqstp, NFS4_CDFC4_FORE);
	if (!conn)
		return nfserr_jukebox;

2731
	spin_lock(&nn->client_lock);
2732
	session = find_in_sessionid_hashtbl(&seq->sessionid, net, &status);
B
Benny Halevy 已提交
2733
	if (!session)
2734 2735
		goto out_no_session;
	clp = session->se_client;
B
Benny Halevy 已提交
2736

2737 2738
	status = nfserr_too_many_ops;
	if (nfsd4_session_too_many_ops(rqstp, session))
2739
		goto out_put_session;
2740

M
Mi Jinlong 已提交
2741 2742
	status = nfserr_req_too_big;
	if (nfsd4_request_too_big(rqstp, session))
2743
		goto out_put_session;
M
Mi Jinlong 已提交
2744

B
Benny Halevy 已提交
2745
	status = nfserr_badslot;
2746
	if (seq->slotid >= session->se_fchannel.maxreqs)
2747
		goto out_put_session;
B
Benny Halevy 已提交
2748

2749
	slot = session->se_slots[seq->slotid];
B
Benny Halevy 已提交
2750 2751
	dprintk("%s: slotid %d\n", __func__, seq->slotid);

2752 2753 2754 2755 2756
	/* We do not negotiate the number of slots yet, so set the
	 * maxslots to the session maxreqs which is used to encode
	 * sr_highest_slotid and the sr_target_slot id to maxslots */
	seq->maxslots = session->se_fchannel.maxreqs;

2757 2758
	status = check_slot_seqid(seq->seqid, slot->sl_seqid,
					slot->sl_flags & NFSD4_SLOT_INUSE);
B
Benny Halevy 已提交
2759
	if (status == nfserr_replay_cache) {
2760 2761
		status = nfserr_seq_misordered;
		if (!(slot->sl_flags & NFSD4_SLOT_INITIALIZED))
2762
			goto out_put_session;
B
Benny Halevy 已提交
2763 2764
		cstate->slot = slot;
		cstate->session = session;
2765
		cstate->clp = clp;
A
Andy Adamson 已提交
2766
		/* Return the cached reply status and set cstate->status
2767
		 * for nfsd4_proc_compound processing */
2768
		status = nfsd4_replay_cache_entry(resp, seq);
A
Andy Adamson 已提交
2769
		cstate->status = nfserr_replay_cache;
2770
		goto out;
B
Benny Halevy 已提交
2771 2772
	}
	if (status)
2773
		goto out_put_session;
B
Benny Halevy 已提交
2774

2775
	status = nfsd4_sequence_check_conn(conn, session);
2776
	conn = NULL;
2777 2778
	if (status)
		goto out_put_session;
2779

2780 2781 2782 2783 2784
	buflen = (seq->cachethis) ?
			session->se_fchannel.maxresp_cached :
			session->se_fchannel.maxresp_sz;
	status = (seq->cachethis) ? nfserr_rep_too_big_to_cache :
				    nfserr_rep_too_big;
2785
	if (xdr_restrict_buflen(xdr, buflen - rqstp->rq_auth_slack))
2786
		goto out_put_session;
2787
	svc_reserve(rqstp, buflen);
2788 2789

	status = nfs_ok;
B
Benny Halevy 已提交
2790 2791
	/* Success! bump slot seqid */
	slot->sl_seqid = seq->seqid;
2792
	slot->sl_flags |= NFSD4_SLOT_INUSE;
2793 2794
	if (seq->cachethis)
		slot->sl_flags |= NFSD4_SLOT_CACHETHIS;
2795 2796
	else
		slot->sl_flags &= ~NFSD4_SLOT_CACHETHIS;
B
Benny Halevy 已提交
2797 2798 2799

	cstate->slot = slot;
	cstate->session = session;
2800
	cstate->clp = clp;
B
Benny Halevy 已提交
2801 2802

out:
2803 2804 2805 2806 2807 2808 2809 2810 2811
	switch (clp->cl_cb_state) {
	case NFSD4_CB_DOWN:
		seq->status_flags = SEQ4_STATUS_CB_PATH_DOWN;
		break;
	case NFSD4_CB_FAULT:
		seq->status_flags = SEQ4_STATUS_BACKCHANNEL_FAULT;
		break;
	default:
		seq->status_flags = 0;
2812
	}
2813 2814
	if (!list_empty(&clp->cl_revoked))
		seq->status_flags |= SEQ4_STATUS_RECALLABLE_STATE_REVOKED;
2815
out_no_session:
2816 2817
	if (conn)
		free_conn(conn);
2818
	spin_unlock(&nn->client_lock);
B
Benny Halevy 已提交
2819
	return status;
2820
out_put_session:
2821
	nfsd4_put_session_locked(session);
2822
	goto out_no_session;
A
Andy Adamson 已提交
2823 2824
}

2825 2826 2827 2828 2829 2830 2831 2832 2833 2834
void
nfsd4_sequence_done(struct nfsd4_compoundres *resp)
{
	struct nfsd4_compound_state *cs = &resp->cstate;

	if (nfsd4_has_session(cs)) {
		if (cs->status != nfserr_replay_cache) {
			nfsd4_store_cache_entry(resp);
			cs->slot->sl_flags &= ~NFSD4_SLOT_INUSE;
		}
2835
		/* Drop session reference that was taken in nfsd4_sequence() */
2836
		nfsd4_put_session(cs->session);
2837 2838
	} else if (cs->clp)
		put_client_renew(cs->clp);
2839 2840
}

2841 2842 2843
__be32
nfsd4_destroy_clientid(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, struct nfsd4_destroy_clientid *dc)
{
2844 2845
	struct nfs4_client *conf, *unconf;
	struct nfs4_client *clp = NULL;
J
J. Bruce Fields 已提交
2846
	__be32 status = 0;
2847
	struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id);
2848

2849
	spin_lock(&nn->client_lock);
2850
	unconf = find_unconfirmed_client(&dc->clientid, true, nn);
2851
	conf = find_confirmed_client(&dc->clientid, true, nn);
2852
	WARN_ON_ONCE(conf && unconf);
2853 2854

	if (conf) {
2855
		if (client_has_state(conf)) {
2856 2857 2858
			status = nfserr_clientid_busy;
			goto out;
		}
2859 2860 2861
		status = mark_client_expired_locked(conf);
		if (status)
			goto out;
2862
		clp = conf;
2863 2864 2865 2866 2867 2868
	} else if (unconf)
		clp = unconf;
	else {
		status = nfserr_stale_clientid;
		goto out;
	}
2869
	if (!mach_creds_match(clp, rqstp)) {
2870
		clp = NULL;
2871 2872 2873
		status = nfserr_wrong_cred;
		goto out;
	}
2874
	unhash_client_locked(clp);
2875
out:
2876 2877 2878
	spin_unlock(&nn->client_lock);
	if (clp)
		expire_client(clp);
2879 2880 2881
	return status;
}

2882 2883 2884
__be32
nfsd4_reclaim_complete(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, struct nfsd4_reclaim_complete *rc)
{
J
J. Bruce Fields 已提交
2885
	__be32 status = 0;
2886

2887 2888 2889 2890 2891 2892 2893 2894 2895
	if (rc->rca_one_fs) {
		if (!cstate->current_fh.fh_dentry)
			return nfserr_nofilehandle;
		/*
		 * We don't take advantage of the rca_one_fs case.
		 * That's OK, it's optional, we can safely ignore it.
		 */
		 return nfs_ok;
	}
2896 2897

	status = nfserr_complete_already;
2898 2899
	if (test_and_set_bit(NFSD4_CLIENT_RECLAIM_COMPLETE,
			     &cstate->session->se_client->cl_flags))
2900 2901 2902 2903
		goto out;

	status = nfserr_stale_clientid;
	if (is_client_expired(cstate->session->se_client))
2904 2905 2906 2907 2908 2909 2910
		/*
		 * The following error isn't really legal.
		 * But we only get here if the client just explicitly
		 * destroyed the client.  Surely it no longer cares what
		 * error it gets back on an operation for the dead
		 * client.
		 */
2911 2912 2913
		goto out;

	status = nfs_ok;
2914
	nfsd4_client_record_create(cstate->session->se_client);
2915 2916
out:
	return status;
2917 2918
}

2919
__be32
2920 2921
nfsd4_setclientid(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
		  struct nfsd4_setclientid *setclid)
L
Linus Torvalds 已提交
2922
{
2923
	struct xdr_netobj 	clname = setclid->se_name;
L
Linus Torvalds 已提交
2924
	nfs4_verifier		clverifier = setclid->se_verf;
2925 2926
	struct nfs4_client	*conf, *new;
	struct nfs4_client	*unconf = NULL;
2927
	__be32 			status;
2928 2929
	struct nfsd_net		*nn = net_generic(SVC_NET(rqstp), nfsd_net_id);

2930 2931 2932
	new = create_client(clname, rqstp, &clverifier);
	if (new == NULL)
		return nfserr_jukebox;
2933
	/* Cases below refer to rfc 3530 section 14.2.33: */
2934
	spin_lock(&nn->client_lock);
2935
	conf = find_confirmed_client_by_name(&clname, nn);
2936
	if (conf) {
2937
		/* case 0: */
L
Linus Torvalds 已提交
2938
		status = nfserr_clid_inuse;
2939 2940
		if (clp_used_exchangeid(conf))
			goto out;
2941
		if (!same_creds(&conf->cl_cred, &rqstp->rq_cred)) {
2942 2943 2944 2945 2946
			char addr_str[INET6_ADDRSTRLEN];
			rpc_ntop((struct sockaddr *) &conf->cl_addr, addr_str,
				 sizeof(addr_str));
			dprintk("NFSD: setclientid: string in use by client "
				"at %s\n", addr_str);
L
Linus Torvalds 已提交
2947 2948 2949
			goto out;
		}
	}
2950
	unconf = find_unconfirmed_client_by_name(&clname, nn);
2951
	if (unconf)
2952
		unhash_client_locked(unconf);
2953
	if (conf && same_verf(&conf->cl_verifier, &clverifier))
2954
		/* case 1: probable callback update */
L
Linus Torvalds 已提交
2955
		copy_clid(new, conf);
2956
	else /* case 4 (new client) or cases 2, 3 (client reboot): */
2957
		gen_clid(new, nn);
2958
	new->cl_minorversion = 0;
2959
	gen_callback(new, setclid, rqstp);
2960
	add_to_unconfirmed(new);
L
Linus Torvalds 已提交
2961 2962 2963
	setclid->se_clientid.cl_boot = new->cl_clientid.cl_boot;
	setclid->se_clientid.cl_id = new->cl_clientid.cl_id;
	memcpy(setclid->se_confirm.data, new->cl_confirm.data, sizeof(setclid->se_confirm.data));
2964
	new = NULL;
L
Linus Torvalds 已提交
2965 2966
	status = nfs_ok;
out:
2967
	spin_unlock(&nn->client_lock);
2968 2969
	if (new)
		free_client(new);
2970 2971
	if (unconf)
		expire_client(unconf);
L
Linus Torvalds 已提交
2972 2973 2974 2975
	return status;
}


2976
__be32
2977 2978 2979
nfsd4_setclientid_confirm(struct svc_rqst *rqstp,
			 struct nfsd4_compound_state *cstate,
			 struct nfsd4_setclientid_confirm *setclientid_confirm)
L
Linus Torvalds 已提交
2980
{
2981
	struct nfs4_client *conf, *unconf;
2982
	struct nfs4_client *old = NULL;
L
Linus Torvalds 已提交
2983 2984
	nfs4_verifier confirm = setclientid_confirm->sc_confirm; 
	clientid_t * clid = &setclientid_confirm->sc_clientid;
2985
	__be32 status;
2986
	struct nfsd_net	*nn = net_generic(SVC_NET(rqstp), nfsd_net_id);
L
Linus Torvalds 已提交
2987

2988
	if (STALE_CLIENTID(clid, nn))
L
Linus Torvalds 已提交
2989
		return nfserr_stale_clientid;
2990

2991
	spin_lock(&nn->client_lock);
2992
	conf = find_confirmed_client(clid, false, nn);
2993
	unconf = find_unconfirmed_client(clid, false, nn);
2994
	/*
2995 2996 2997 2998
	 * We try hard to give out unique clientid's, so if we get an
	 * attempt to confirm the same clientid with a different cred,
	 * there's a bug somewhere.  Let's charitably assume it's our
	 * bug.
2999
	 */
3000 3001 3002 3003 3004
	status = nfserr_serverfault;
	if (unconf && !same_creds(&unconf->cl_cred, &rqstp->rq_cred))
		goto out;
	if (conf && !same_creds(&conf->cl_cred, &rqstp->rq_cred))
		goto out;
3005
	/* cases below refer to rfc 3530 section 14.2.34: */
3006 3007
	if (!unconf || !same_verf(&confirm, &unconf->cl_confirm)) {
		if (conf && !unconf) /* case 2: probable retransmit */
L
Linus Torvalds 已提交
3008
			status = nfs_ok;
3009 3010 3011 3012 3013 3014
		else /* case 4: client hasn't noticed we rebooted yet? */
			status = nfserr_stale_clientid;
		goto out;
	}
	status = nfs_ok;
	if (conf) { /* case 1: callback update */
3015 3016
		old = unconf;
		unhash_client_locked(old);
3017
		nfsd4_change_callback(conf, &unconf->cl_cb_conn);
3018
	} else { /* case 3: normal case; new or rebooted client */
3019 3020 3021
		old = find_confirmed_client_by_name(&unconf->cl_name, nn);
		if (old) {
			status = mark_client_expired_locked(old);
3022 3023
			if (status) {
				old = NULL;
3024
				goto out;
3025
			}
3026
		}
3027
		move_to_confirmed(unconf);
3028
		conf = unconf;
3029
	}
3030 3031 3032 3033 3034
	get_client_locked(conf);
	spin_unlock(&nn->client_lock);
	nfsd4_probe_callback(conf);
	spin_lock(&nn->client_lock);
	put_client_renew_locked(conf);
L
Linus Torvalds 已提交
3035
out:
3036 3037 3038
	spin_unlock(&nn->client_lock);
	if (old)
		expire_client(old);
L
Linus Torvalds 已提交
3039 3040 3041
	return status;
}

3042 3043 3044 3045 3046
static struct nfs4_file *nfsd4_alloc_file(void)
{
	return kmem_cache_alloc(file_slab, GFP_KERNEL);
}

L
Linus Torvalds 已提交
3047
/* OPEN Share state helper functions */
3048
static void nfsd4_init_file(struct nfs4_file *fp, struct knfsd_fh *fh)
L
Linus Torvalds 已提交
3049
{
3050
	unsigned int hashval = file_hashval(fh);
L
Linus Torvalds 已提交
3051

3052 3053
	lockdep_assert_held(&state_lock);

3054
	atomic_set(&fp->fi_ref, 1);
3055
	spin_lock_init(&fp->fi_lock);
3056 3057
	INIT_LIST_HEAD(&fp->fi_stateids);
	INIT_LIST_HEAD(&fp->fi_delegations);
3058
	fh_copy_shallow(&fp->fi_fhandle, fh);
3059 3060
	fp->fi_had_conflict = false;
	fp->fi_lease = NULL;
3061
	fp->fi_share_deny = 0;
3062 3063
	memset(fp->fi_fds, 0, sizeof(fp->fi_fds));
	memset(fp->fi_access, 0, sizeof(fp->fi_access));
3064
	hlist_add_head(&fp->fi_hash, &file_hashtbl[hashval]);
L
Linus Torvalds 已提交
3065 3066
}

3067
void
L
Linus Torvalds 已提交
3068 3069
nfsd4_free_slabs(void)
{
C
Christoph Hellwig 已提交
3070 3071 3072 3073 3074
	kmem_cache_destroy(openowner_slab);
	kmem_cache_destroy(lockowner_slab);
	kmem_cache_destroy(file_slab);
	kmem_cache_destroy(stateid_slab);
	kmem_cache_destroy(deleg_slab);
N
NeilBrown 已提交
3075
}
L
Linus Torvalds 已提交
3076

3077
int
N
NeilBrown 已提交
3078 3079
nfsd4_init_slabs(void)
{
3080 3081 3082
	openowner_slab = kmem_cache_create("nfsd4_openowners",
			sizeof(struct nfs4_openowner), 0, 0, NULL);
	if (openowner_slab == NULL)
C
Christoph Hellwig 已提交
3083
		goto out;
3084
	lockowner_slab = kmem_cache_create("nfsd4_lockowners",
3085
			sizeof(struct nfs4_lockowner), 0, 0, NULL);
3086
	if (lockowner_slab == NULL)
C
Christoph Hellwig 已提交
3087
		goto out_free_openowner_slab;
N
NeilBrown 已提交
3088
	file_slab = kmem_cache_create("nfsd4_files",
3089
			sizeof(struct nfs4_file), 0, 0, NULL);
N
NeilBrown 已提交
3090
	if (file_slab == NULL)
C
Christoph Hellwig 已提交
3091
		goto out_free_lockowner_slab;
N
NeilBrown 已提交
3092
	stateid_slab = kmem_cache_create("nfsd4_stateids",
3093
			sizeof(struct nfs4_ol_stateid), 0, 0, NULL);
N
NeilBrown 已提交
3094
	if (stateid_slab == NULL)
C
Christoph Hellwig 已提交
3095
		goto out_free_file_slab;
N
NeilBrown 已提交
3096
	deleg_slab = kmem_cache_create("nfsd4_delegations",
3097
			sizeof(struct nfs4_delegation), 0, 0, NULL);
N
NeilBrown 已提交
3098
	if (deleg_slab == NULL)
C
Christoph Hellwig 已提交
3099
		goto out_free_stateid_slab;
N
NeilBrown 已提交
3100
	return 0;
C
Christoph Hellwig 已提交
3101 3102 3103 3104 3105 3106 3107 3108 3109 3110

out_free_stateid_slab:
	kmem_cache_destroy(stateid_slab);
out_free_file_slab:
	kmem_cache_destroy(file_slab);
out_free_lockowner_slab:
	kmem_cache_destroy(lockowner_slab);
out_free_openowner_slab:
	kmem_cache_destroy(openowner_slab);
out:
N
NeilBrown 已提交
3111 3112
	dprintk("nfsd4: out of memory while initializing nfsv4\n");
	return -ENOMEM;
L
Linus Torvalds 已提交
3113 3114
}

3115
static void init_nfs4_replay(struct nfs4_replay *rp)
L
Linus Torvalds 已提交
3116
{
3117 3118 3119
	rp->rp_status = nfserr_serverfault;
	rp->rp_buflen = 0;
	rp->rp_buf = rp->rp_ibuf;
3120 3121 3122 3123 3124 3125 3126 3127 3128 3129 3130 3131 3132 3133 3134 3135 3136 3137 3138 3139 3140 3141
	mutex_init(&rp->rp_mutex);
}

static void nfsd4_cstate_assign_replay(struct nfsd4_compound_state *cstate,
		struct nfs4_stateowner *so)
{
	if (!nfsd4_has_session(cstate)) {
		mutex_lock(&so->so_replay.rp_mutex);
		cstate->replay_owner = so;
		atomic_inc(&so->so_count);
	}
}

void nfsd4_cstate_clear_replay(struct nfsd4_compound_state *cstate)
{
	struct nfs4_stateowner *so = cstate->replay_owner;

	if (so != NULL) {
		cstate->replay_owner = NULL;
		mutex_unlock(&so->so_replay.rp_mutex);
		nfs4_put_stateowner(so);
	}
L
Linus Torvalds 已提交
3142 3143
}

3144
static inline void *alloc_stateowner(struct kmem_cache *slab, struct xdr_netobj *owner, struct nfs4_client *clp)
3145
{
L
Linus Torvalds 已提交
3146 3147
	struct nfs4_stateowner *sop;

3148
	sop = kmem_cache_alloc(slab, GFP_KERNEL);
3149 3150 3151 3152 3153
	if (!sop)
		return NULL;

	sop->so_owner.data = kmemdup(owner->data, owner->len, GFP_KERNEL);
	if (!sop->so_owner.data) {
3154
		kmem_cache_free(slab, sop);
L
Linus Torvalds 已提交
3155
		return NULL;
3156 3157 3158
	}
	sop->so_owner.len = owner->len;

3159
	INIT_LIST_HEAD(&sop->so_stateids);
3160 3161
	sop->so_client = clp;
	init_nfs4_replay(&sop->so_replay);
3162
	atomic_set(&sop->so_count, 1);
3163 3164 3165
	return sop;
}

3166
static void hash_openowner(struct nfs4_openowner *oo, struct nfs4_client *clp, unsigned int strhashval)
3167
{
3168
	lockdep_assert_held(&clp->cl_lock);
3169

3170 3171
	list_add(&oo->oo_owner.so_strhash,
		 &clp->cl_ownerstr_hashtbl[strhashval]);
3172
	list_add(&oo->oo_perclient, &clp->cl_openowners);
3173 3174
}

3175 3176
static void nfs4_unhash_openowner(struct nfs4_stateowner *so)
{
3177
	unhash_openowner_locked(openowner(so));
3178 3179
}

3180 3181 3182 3183 3184 3185 3186 3187
static void nfs4_free_openowner(struct nfs4_stateowner *so)
{
	struct nfs4_openowner *oo = openowner(so);

	kmem_cache_free(openowner_slab, oo);
}

static const struct nfs4_stateowner_operations openowner_ops = {
3188 3189
	.so_unhash =	nfs4_unhash_openowner,
	.so_free =	nfs4_free_openowner,
3190 3191
};

3192
static struct nfs4_openowner *
3193
alloc_init_open_stateowner(unsigned int strhashval, struct nfsd4_open *open,
3194 3195
			   struct nfsd4_compound_state *cstate)
{
3196
	struct nfs4_client *clp = cstate->clp;
3197
	struct nfs4_openowner *oo, *ret;
3198

3199 3200
	oo = alloc_stateowner(openowner_slab, &open->op_owner, clp);
	if (!oo)
3201
		return NULL;
3202
	oo->oo_owner.so_ops = &openowner_ops;
3203 3204
	oo->oo_owner.so_is_open_owner = 1;
	oo->oo_owner.so_seqid = open->op_seqid;
3205
	oo->oo_flags = 0;
3206 3207
	if (nfsd4_has_session(cstate))
		oo->oo_flags |= NFS4_OO_CONFIRMED;
3208
	oo->oo_time = 0;
3209
	oo->oo_last_closed_stid = NULL;
3210
	INIT_LIST_HEAD(&oo->oo_close_lru);
3211 3212
	spin_lock(&clp->cl_lock);
	ret = find_openstateowner_str_locked(strhashval, open, clp);
3213 3214 3215 3216 3217
	if (ret == NULL) {
		hash_openowner(oo, clp, strhashval);
		ret = oo;
	} else
		nfs4_free_openowner(&oo->oo_owner);
3218
	spin_unlock(&clp->cl_lock);
3219
	return oo;
L
Linus Torvalds 已提交
3220 3221
}

3222
static void init_open_stateid(struct nfs4_ol_stateid *stp, struct nfs4_file *fp, struct nfsd4_open *open) {
3223
	struct nfs4_openowner *oo = open->op_openowner;
L
Linus Torvalds 已提交
3224

3225
	atomic_inc(&stp->st_stid.sc_count);
J
J. Bruce Fields 已提交
3226
	stp->st_stid.sc_type = NFS4_OPEN_STID;
3227
	INIT_LIST_HEAD(&stp->st_locks);
3228
	stp->st_stateowner = &oo->oo_owner;
3229
	atomic_inc(&stp->st_stateowner->so_count);
3230
	get_nfs4_file(fp);
3231
	stp->st_stid.sc_file = fp;
L
Linus Torvalds 已提交
3232 3233
	stp->st_access_bmap = 0;
	stp->st_deny_bmap = 0;
3234
	stp->st_openstp = NULL;
3235 3236
	spin_lock(&oo->oo_owner.so_client->cl_lock);
	list_add(&stp->st_perstateowner, &oo->oo_owner.so_stateids);
3237 3238 3239
	spin_lock(&fp->fi_lock);
	list_add(&stp->st_perfile, &fp->fi_stateids);
	spin_unlock(&fp->fi_lock);
3240
	spin_unlock(&oo->oo_owner.so_client->cl_lock);
L
Linus Torvalds 已提交
3241 3242
}

3243 3244 3245 3246 3247
/*
 * In the 4.0 case we need to keep the owners around a little while to handle
 * CLOSE replay. We still do need to release any file access that is held by
 * them before returning however.
 */
3248
static void
3249
move_to_close_lru(struct nfs4_ol_stateid *s, struct net *net)
L
Linus Torvalds 已提交
3250
{
3251
	struct nfs4_ol_stateid *last;
3252 3253 3254
	struct nfs4_openowner *oo = openowner(s->st_stateowner);
	struct nfsd_net *nn = net_generic(s->st_stid.sc_client->net,
						nfsd_net_id);
3255

3256
	dprintk("NFSD: move_to_close_lru nfs4_openowner %p\n", oo);
L
Linus Torvalds 已提交
3257

3258 3259 3260 3261 3262 3263 3264 3265 3266 3267 3268
	/*
	 * We know that we hold one reference via nfsd4_close, and another
	 * "persistent" reference for the client. If the refcount is higher
	 * than 2, then there are still calls in progress that are using this
	 * stateid. We can't put the sc_file reference until they are finished.
	 * Wait for the refcount to drop to 2. Since it has been unhashed,
	 * there should be no danger of the refcount going back up again at
	 * this point.
	 */
	wait_event(close_wq, atomic_read(&s->st_stid.sc_count) == 2);

3269 3270 3271 3272 3273
	release_all_access(s);
	if (s->st_stid.sc_file) {
		put_nfs4_file(s->st_stid.sc_file);
		s->st_stid.sc_file = NULL;
	}
3274 3275 3276

	spin_lock(&nn->client_lock);
	last = oo->oo_last_closed_stid;
3277
	oo->oo_last_closed_stid = s;
3278
	list_move_tail(&oo->oo_close_lru, &nn->close_lru);
3279
	oo->oo_time = get_seconds();
3280 3281 3282
	spin_unlock(&nn->client_lock);
	if (last)
		nfs4_put_stid(&last->st_stid);
L
Linus Torvalds 已提交
3283 3284 3285 3286
}

/* search file_hashtbl[] for file */
static struct nfs4_file *
3287
find_file_locked(struct knfsd_fh *fh)
L
Linus Torvalds 已提交
3288
{
3289
	unsigned int hashval = file_hashval(fh);
L
Linus Torvalds 已提交
3290 3291
	struct nfs4_file *fp;

3292 3293
	lockdep_assert_held(&state_lock);

3294
	hlist_for_each_entry(fp, &file_hashtbl[hashval], fi_hash) {
3295
		if (nfsd_fh_match(&fp->fi_fhandle, fh)) {
3296
			get_nfs4_file(fp);
L
Linus Torvalds 已提交
3297
			return fp;
3298
		}
L
Linus Torvalds 已提交
3299 3300 3301 3302
	}
	return NULL;
}

3303
static struct nfs4_file *
3304
find_file(struct knfsd_fh *fh)
3305 3306 3307 3308
{
	struct nfs4_file *fp;

	spin_lock(&state_lock);
3309
	fp = find_file_locked(fh);
3310 3311 3312 3313 3314
	spin_unlock(&state_lock);
	return fp;
}

static struct nfs4_file *
3315
find_or_add_file(struct nfs4_file *new, struct knfsd_fh *fh)
3316 3317 3318 3319
{
	struct nfs4_file *fp;

	spin_lock(&state_lock);
3320
	fp = find_file_locked(fh);
3321
	if (fp == NULL) {
3322
		nfsd4_init_file(new, fh);
3323 3324 3325 3326 3327 3328 3329
		fp = new;
	}
	spin_unlock(&state_lock);

	return fp;
}

L
Linus Torvalds 已提交
3330 3331 3332 3333
/*
 * Called to check deny when READ with all zero stateid or
 * WRITE with all zero or all one stateid
 */
3334
static __be32
L
Linus Torvalds 已提交
3335 3336 3337
nfs4_share_conflict(struct svc_fh *current_fh, unsigned int deny_type)
{
	struct nfs4_file *fp;
3338
	__be32 ret = nfs_ok;
L
Linus Torvalds 已提交
3339

3340
	fp = find_file(&current_fh->fh_handle);
3341
	if (!fp)
3342 3343
		return ret;
	/* Check for conflicting share reservations */
3344
	spin_lock(&fp->fi_lock);
3345 3346
	if (fp->fi_share_deny & deny_type)
		ret = nfserr_locked;
3347
	spin_unlock(&fp->fi_lock);
3348 3349
	put_nfs4_file(fp);
	return ret;
L
Linus Torvalds 已提交
3350 3351
}

3352
void nfsd4_prepare_cb_recall(struct nfs4_delegation *dp)
L
Linus Torvalds 已提交
3353
{
3354 3355
	struct nfsd_net *nn = net_generic(dp->dl_stid.sc_client->net,
					  nfsd_net_id);
3356

3357
	block_delegations(&dp->dl_stid.sc_file->fi_fhandle);
3358

3359
	/*
3360 3361 3362
	 * We can't do this in nfsd_break_deleg_cb because it is
	 * already holding inode->i_lock.
	 *
3363 3364 3365
	 * If the dl_time != 0, then we know that it has already been
	 * queued for a lease break. Don't queue it again.
	 */
3366
	spin_lock(&state_lock);
3367 3368
	if (dp->dl_time == 0) {
		dp->dl_time = get_seconds();
3369
		list_add_tail(&dp->dl_recall_lru, &nn->del_recall_lru);
3370
	}
3371 3372
	spin_unlock(&state_lock);
}
L
Linus Torvalds 已提交
3373

3374 3375 3376 3377 3378 3379 3380 3381 3382
static void nfsd_break_one_deleg(struct nfs4_delegation *dp)
{
	/*
	 * We're assuming the state code never drops its reference
	 * without first removing the lease.  Since we're in this lease
	 * callback (and since the lease code is serialized by the kernel
	 * lock) we know the server hasn't removed the lease yet, we know
	 * it's safe to take a reference.
	 */
3383
	atomic_inc(&dp->dl_stid.sc_count);
3384 3385 3386
	nfsd4_cb_recall(dp);
}

3387
/* Called from break_lease() with i_lock held. */
3388 3389
static void nfsd_break_deleg_cb(struct file_lock *fl)
{
3390 3391
	struct nfs4_file *fp = (struct nfs4_file *)fl->fl_owner;
	struct nfs4_delegation *dp;
3392

3393 3394 3395 3396 3397 3398 3399 3400
	if (!fp) {
		WARN(1, "(%p)->fl_owner NULL\n", fl);
		return;
	}
	if (fp->fi_had_conflict) {
		WARN(1, "duplicate break on %p\n", fp);
		return;
	}
3401 3402
	/*
	 * We don't want the locks code to timeout the lease for us;
3403
	 * we'll remove it ourself if a delegation isn't returned
3404
	 * in time:
3405 3406
	 */
	fl->fl_break_time = 0;
L
Linus Torvalds 已提交
3407

3408
	spin_lock(&fp->fi_lock);
3409 3410 3411 3412 3413 3414 3415 3416 3417 3418 3419 3420
	fp->fi_had_conflict = true;
	/*
	 * If there are no delegations on the list, then we can't count on this
	 * lease ever being cleaned up. Set the fl_break_time to jiffies so that
	 * time_out_leases will do it ASAP. The fact that fi_had_conflict is now
	 * true should keep any new delegations from being hashed.
	 */
	if (list_empty(&fp->fi_delegations))
		fl->fl_break_time = jiffies;
	else
		list_for_each_entry(dp, &fp->fi_delegations, dl_perfile)
			nfsd_break_one_deleg(dp);
3421
	spin_unlock(&fp->fi_lock);
L
Linus Torvalds 已提交
3422 3423 3424 3425 3426 3427 3428 3429 3430 3431 3432
}

static
int nfsd_change_deleg_cb(struct file_lock **onlist, int arg)
{
	if (arg & F_UNLCK)
		return lease_modify(onlist, arg);
	else
		return -EAGAIN;
}

3433
static const struct lock_manager_operations nfsd_lease_mng_ops = {
J
J. Bruce Fields 已提交
3434 3435
	.lm_break = nfsd_break_deleg_cb,
	.lm_change = nfsd_change_deleg_cb,
L
Linus Torvalds 已提交
3436 3437
};

3438 3439 3440 3441 3442 3443 3444 3445 3446 3447
static __be32 nfsd4_check_seqid(struct nfsd4_compound_state *cstate, struct nfs4_stateowner *so, u32 seqid)
{
	if (nfsd4_has_session(cstate))
		return nfs_ok;
	if (seqid == so->so_seqid - 1)
		return nfserr_replay_me;
	if (seqid == so->so_seqid)
		return nfs_ok;
	return nfserr_bad_seqid;
}
L
Linus Torvalds 已提交
3448

3449 3450 3451 3452 3453 3454 3455 3456 3457 3458 3459 3460 3461 3462 3463 3464 3465 3466 3467 3468 3469 3470
static __be32 lookup_clientid(clientid_t *clid,
		struct nfsd4_compound_state *cstate,
		struct nfsd_net *nn)
{
	struct nfs4_client *found;

	if (cstate->clp) {
		found = cstate->clp;
		if (!same_clid(&found->cl_clientid, clid))
			return nfserr_stale_clientid;
		return nfs_ok;
	}

	if (STALE_CLIENTID(clid, nn))
		return nfserr_stale_clientid;

	/*
	 * For v4.1+ we get the client in the SEQUENCE op. If we don't have one
	 * cached already then we know this is for is for v4.0 and "sessions"
	 * will be false.
	 */
	WARN_ON_ONCE(cstate->session);
3471
	spin_lock(&nn->client_lock);
3472
	found = find_confirmed_client(clid, false, nn);
3473 3474
	if (!found) {
		spin_unlock(&nn->client_lock);
3475
		return nfserr_expired;
3476 3477 3478
	}
	atomic_inc(&found->cl_refcount);
	spin_unlock(&nn->client_lock);
3479 3480 3481 3482 3483 3484

	/* Cache the nfs4_client in cstate! */
	cstate->clp = found;
	return nfs_ok;
}

3485
__be32
A
Andy Adamson 已提交
3486
nfsd4_process_open1(struct nfsd4_compound_state *cstate,
3487
		    struct nfsd4_open *open, struct nfsd_net *nn)
L
Linus Torvalds 已提交
3488 3489 3490 3491
{
	clientid_t *clientid = &open->op_clientid;
	struct nfs4_client *clp = NULL;
	unsigned int strhashval;
3492
	struct nfs4_openowner *oo = NULL;
3493
	__be32 status;
L
Linus Torvalds 已提交
3494

3495
	if (STALE_CLIENTID(&open->op_clientid, nn))
L
Linus Torvalds 已提交
3496
		return nfserr_stale_clientid;
3497 3498 3499 3500 3501 3502 3503
	/*
	 * In case we need it later, after we've already created the
	 * file and don't want to risk a further failure:
	 */
	open->op_file = nfsd4_alloc_file();
	if (open->op_file == NULL)
		return nfserr_jukebox;
L
Linus Torvalds 已提交
3504

3505 3506 3507 3508 3509
	status = lookup_clientid(clientid, cstate, nn);
	if (status)
		return status;
	clp = cstate->clp;

3510 3511
	strhashval = ownerstr_hashval(&open->op_owner);
	oo = find_openstateowner_str(strhashval, open, clp);
3512 3513
	open->op_openowner = oo;
	if (!oo) {
3514
		goto new_owner;
L
Linus Torvalds 已提交
3515
	}
3516
	if (!(oo->oo_flags & NFS4_OO_CONFIRMED)) {
3517
		/* Replace unconfirmed owners without checking for replay. */
3518 3519
		release_openowner(oo);
		open->op_openowner = NULL;
3520
		goto new_owner;
3521
	}
3522 3523 3524 3525
	status = nfsd4_check_seqid(cstate, &oo->oo_owner, open->op_seqid);
	if (status)
		return status;
	goto alloc_stateid;
3526
new_owner:
3527
	oo = alloc_init_open_stateowner(strhashval, open, cstate);
3528 3529 3530
	if (oo == NULL)
		return nfserr_jukebox;
	open->op_openowner = oo;
3531
alloc_stateid:
3532
	open->op_stp = nfs4_alloc_open_stateid(clp);
3533 3534
	if (!open->op_stp)
		return nfserr_jukebox;
3535
	return nfs_ok;
L
Linus Torvalds 已提交
3536 3537
}

3538
static inline __be32
N
NeilBrown 已提交
3539 3540 3541 3542 3543 3544 3545 3546
nfs4_check_delegmode(struct nfs4_delegation *dp, int flags)
{
	if ((flags & WR_STATE) && (dp->dl_type == NFS4_OPEN_DELEGATE_READ))
		return nfserr_openmode;
	else
		return nfs_ok;
}

3547
static int share_access_to_flags(u32 share_access)
3548
{
3549
	return share_access == NFS4_SHARE_ACCESS_READ ? RD_STATE : WR_STATE;
3550 3551
}

3552
static struct nfs4_delegation *find_deleg_stateid(struct nfs4_client *cl, stateid_t *s)
3553
{
3554
	struct nfs4_stid *ret;
3555

3556
	ret = find_stateid_by_type(cl, s, NFS4_DELEG_STID);
3557 3558 3559
	if (!ret)
		return NULL;
	return delegstateid(ret);
3560 3561
}

3562 3563 3564 3565 3566 3567
static bool nfsd4_is_deleg_cur(struct nfsd4_open *open)
{
	return open->op_claim_type == NFS4_OPEN_CLAIM_DELEGATE_CUR ||
	       open->op_claim_type == NFS4_OPEN_CLAIM_DELEG_CUR_FH;
}

3568
static __be32
3569
nfs4_check_deleg(struct nfs4_client *cl, struct nfsd4_open *open,
3570 3571 3572
		struct nfs4_delegation **dp)
{
	int flags;
3573
	__be32 status = nfserr_bad_stateid;
3574
	struct nfs4_delegation *deleg;
3575

3576 3577
	deleg = find_deleg_stateid(cl, &open->op_delegate_stateid);
	if (deleg == NULL)
3578
		goto out;
3579
	flags = share_access_to_flags(open->op_share_access);
3580 3581 3582 3583 3584 3585
	status = nfs4_check_delegmode(deleg, flags);
	if (status) {
		nfs4_put_stid(&deleg->dl_stid);
		goto out;
	}
	*dp = deleg;
3586
out:
3587
	if (!nfsd4_is_deleg_cur(open))
3588 3589 3590
		return nfs_ok;
	if (status)
		return status;
3591
	open->op_openowner->oo_flags |= NFS4_OO_CONFIRMED;
3592
	return nfs_ok;
3593 3594
}

3595 3596
static struct nfs4_ol_stateid *
nfsd4_find_existing_open(struct nfs4_file *fp, struct nfsd4_open *open)
L
Linus Torvalds 已提交
3597
{
3598
	struct nfs4_ol_stateid *local, *ret = NULL;
3599
	struct nfs4_openowner *oo = open->op_openowner;
L
Linus Torvalds 已提交
3600

3601
	spin_lock(&fp->fi_lock);
3602
	list_for_each_entry(local, &fp->fi_stateids, st_perfile) {
L
Linus Torvalds 已提交
3603 3604 3605
		/* ignore lock owners */
		if (local->st_stateowner->so_is_open_owner == 0)
			continue;
3606
		if (local->st_stateowner == &oo->oo_owner) {
3607
			ret = local;
3608
			atomic_inc(&ret->st_stid.sc_count);
3609
			break;
3610
		}
L
Linus Torvalds 已提交
3611
	}
3612
	spin_unlock(&fp->fi_lock);
3613
	return ret;
L
Linus Torvalds 已提交
3614 3615
}

3616 3617 3618 3619 3620 3621 3622 3623 3624 3625 3626
static inline int nfs4_access_to_access(u32 nfs4_access)
{
	int flags = 0;

	if (nfs4_access & NFS4_SHARE_ACCESS_READ)
		flags |= NFSD_MAY_READ;
	if (nfs4_access & NFS4_SHARE_ACCESS_WRITE)
		flags |= NFSD_MAY_WRITE;
	return flags;
}

3627 3628 3629 3630 3631 3632 3633 3634 3635 3636 3637 3638 3639 3640 3641
static inline __be32
nfsd4_truncate(struct svc_rqst *rqstp, struct svc_fh *fh,
		struct nfsd4_open *open)
{
	struct iattr iattr = {
		.ia_valid = ATTR_SIZE,
		.ia_size = 0,
	};
	if (!open->op_truncate)
		return 0;
	if (!(open->op_share_access & NFS4_SHARE_ACCESS_WRITE))
		return nfserr_inval;
	return nfsd_setattr(rqstp, fh, &iattr, 0, (time_t)0);
}

3642
static __be32 nfs4_get_vfs_file(struct svc_rqst *rqstp, struct nfs4_file *fp,
3643 3644
		struct svc_fh *cur_fh, struct nfs4_ol_stateid *stp,
		struct nfsd4_open *open)
3645
{
3646
	struct file *filp = NULL;
3647
	__be32 status;
3648 3649
	int oflag = nfs4_access_to_omode(open->op_share_access);
	int access = nfs4_access_to_access(open->op_share_access);
3650
	unsigned char old_access_bmap, old_deny_bmap;
3651

3652
	spin_lock(&fp->fi_lock);
3653 3654 3655 3656 3657 3658 3659 3660 3661 3662 3663 3664 3665 3666 3667 3668 3669 3670 3671 3672 3673 3674 3675 3676 3677 3678 3679

	/*
	 * Are we trying to set a deny mode that would conflict with
	 * current access?
	 */
	status = nfs4_file_check_deny(fp, open->op_share_deny);
	if (status != nfs_ok) {
		spin_unlock(&fp->fi_lock);
		goto out;
	}

	/* set access to the file */
	status = nfs4_file_get_access(fp, open->op_share_access);
	if (status != nfs_ok) {
		spin_unlock(&fp->fi_lock);
		goto out;
	}

	/* Set access bits in stateid */
	old_access_bmap = stp->st_access_bmap;
	set_access(open->op_share_access, stp);

	/* Set new deny mask */
	old_deny_bmap = stp->st_deny_bmap;
	set_deny(open->op_share_deny, stp);
	fp->fi_share_deny |= (open->op_share_deny & NFS4_SHARE_DENY_BOTH);

3680
	if (!fp->fi_fds[oflag]) {
3681 3682
		spin_unlock(&fp->fi_lock);
		status = nfsd_open(rqstp, cur_fh, S_IFREG, access, &filp);
3683
		if (status)
3684
			goto out_put_access;
3685 3686 3687 3688 3689
		spin_lock(&fp->fi_lock);
		if (!fp->fi_fds[oflag]) {
			fp->fi_fds[oflag] = filp;
			filp = NULL;
		}
3690
	}
3691 3692 3693
	spin_unlock(&fp->fi_lock);
	if (filp)
		fput(filp);
3694

3695 3696 3697 3698 3699
	status = nfsd4_truncate(rqstp, cur_fh, open);
	if (status)
		goto out_put_access;
out:
	return status;
3700 3701 3702 3703 3704
out_put_access:
	stp->st_access_bmap = old_access_bmap;
	nfs4_file_put_access(fp, open->op_share_access);
	reset_union_bmap_deny(bmap_to_share_mode(old_deny_bmap), stp);
	goto out;
L
Linus Torvalds 已提交
3705 3706
}

3707
static __be32
3708
nfs4_upgrade_open(struct svc_rqst *rqstp, struct nfs4_file *fp, struct svc_fh *cur_fh, struct nfs4_ol_stateid *stp, struct nfsd4_open *open)
L
Linus Torvalds 已提交
3709
{
3710
	__be32 status;
3711
	unsigned char old_deny_bmap;
L
Linus Torvalds 已提交
3712

3713
	if (!test_access(open->op_share_access, stp))
3714
		return nfs4_get_vfs_file(rqstp, fp, cur_fh, stp, open);
3715

3716 3717 3718 3719 3720 3721 3722 3723 3724 3725 3726 3727
	/* test and set deny mode */
	spin_lock(&fp->fi_lock);
	status = nfs4_file_check_deny(fp, open->op_share_deny);
	if (status == nfs_ok) {
		old_deny_bmap = stp->st_deny_bmap;
		set_deny(open->op_share_deny, stp);
		fp->fi_share_deny |=
				(open->op_share_deny & NFS4_SHARE_DENY_BOTH);
	}
	spin_unlock(&fp->fi_lock);

	if (status != nfs_ok)
L
Linus Torvalds 已提交
3728 3729
		return status;

3730 3731 3732 3733 3734
	status = nfsd4_truncate(rqstp, cur_fh, open);
	if (status != nfs_ok)
		reset_union_bmap_deny(old_deny_bmap, stp);
	return status;
}
L
Linus Torvalds 已提交
3735 3736

static void
3737
nfs4_set_claim_prev(struct nfsd4_open *open, bool has_session)
L
Linus Torvalds 已提交
3738
{
3739
	open->op_openowner->oo_flags |= NFS4_OO_CONFIRMED;
L
Linus Torvalds 已提交
3740 3741
}

3742 3743 3744 3745 3746 3747 3748 3749 3750 3751 3752 3753 3754
/* Should we give out recallable state?: */
static bool nfsd4_cb_channel_good(struct nfs4_client *clp)
{
	if (clp->cl_cb_state == NFSD4_CB_UP)
		return true;
	/*
	 * In the sessions case, since we don't have to establish a
	 * separate connection for callbacks, we assume it's OK
	 * until we hear otherwise:
	 */
	return clp->cl_minorversion && clp->cl_cb_state == NFSD4_CB_UNKNOWN;
}

3755
static struct file_lock *nfs4_alloc_init_lease(struct nfs4_file *fp, int flag)
3756 3757 3758 3759 3760 3761 3762 3763
{
	struct file_lock *fl;

	fl = locks_alloc_lock();
	if (!fl)
		return NULL;
	locks_init_lock(fl);
	fl->fl_lmops = &nfsd_lease_mng_ops;
3764
	fl->fl_flags = FL_DELEG;
3765 3766
	fl->fl_type = flag == NFS4_OPEN_DELEGATE_READ? F_RDLCK: F_WRLCK;
	fl->fl_end = OFFSET_MAX;
3767
	fl->fl_owner = (fl_owner_t)fp;
3768 3769 3770 3771
	fl->fl_pid = current->tgid;
	return fl;
}

3772
static int nfs4_setlease(struct nfs4_delegation *dp)
3773
{
3774
	struct nfs4_file *fp = dp->dl_stid.sc_file;
3775
	struct file_lock *fl;
3776 3777
	struct file *filp;
	int status = 0;
3778

3779
	fl = nfs4_alloc_init_lease(fp, NFS4_OPEN_DELEGATE_READ);
3780 3781
	if (!fl)
		return -ENOMEM;
3782 3783 3784 3785 3786 3787 3788 3789 3790 3791 3792 3793 3794 3795 3796 3797 3798 3799 3800 3801 3802 3803 3804 3805 3806
	filp = find_readable_file(fp);
	if (!filp) {
		/* We should always have a readable file here */
		WARN_ON_ONCE(1);
		return -EBADF;
	}
	fl->fl_file = filp;
	status = vfs_setlease(filp, fl->fl_type, &fl);
	if (status) {
		locks_free_lock(fl);
		goto out_fput;
	}
	spin_lock(&state_lock);
	spin_lock(&fp->fi_lock);
	/* Did the lease get broken before we took the lock? */
	status = -EAGAIN;
	if (fp->fi_had_conflict)
		goto out_unlock;
	/* Race breaker */
	if (fp->fi_lease) {
		status = 0;
		atomic_inc(&fp->fi_delegees);
		hash_delegation_locked(dp, fp);
		goto out_unlock;
	}
3807
	fp->fi_lease = fl;
3808
	fp->fi_deleg_file = filp;
3809
	atomic_set(&fp->fi_delegees, 1);
3810
	hash_delegation_locked(dp, fp);
3811
	spin_unlock(&fp->fi_lock);
3812
	spin_unlock(&state_lock);
3813
	return 0;
3814 3815 3816 3817 3818
out_unlock:
	spin_unlock(&fp->fi_lock);
	spin_unlock(&state_lock);
out_fput:
	fput(filp);
3819
	return status;
3820 3821
}

J
Jeff Layton 已提交
3822 3823 3824
static struct nfs4_delegation *
nfs4_set_delegation(struct nfs4_client *clp, struct svc_fh *fh,
		    struct nfs4_file *fp)
3825
{
J
Jeff Layton 已提交
3826 3827
	int status;
	struct nfs4_delegation *dp;
3828

3829
	if (fp->fi_had_conflict)
J
Jeff Layton 已提交
3830 3831 3832 3833 3834 3835
		return ERR_PTR(-EAGAIN);

	dp = alloc_init_deleg(clp, fh);
	if (!dp)
		return ERR_PTR(-ENOMEM);

3836
	get_nfs4_file(fp);
3837 3838
	spin_lock(&state_lock);
	spin_lock(&fp->fi_lock);
3839
	dp->dl_stid.sc_file = fp;
3840 3841 3842
	if (!fp->fi_lease) {
		spin_unlock(&fp->fi_lock);
		spin_unlock(&state_lock);
J
Jeff Layton 已提交
3843 3844
		status = nfs4_setlease(dp);
		goto out;
3845
	}
3846
	atomic_inc(&fp->fi_delegees);
3847
	if (fp->fi_had_conflict) {
3848 3849
		status = -EAGAIN;
		goto out_unlock;
3850
	}
3851
	hash_delegation_locked(dp, fp);
J
Jeff Layton 已提交
3852
	status = 0;
3853 3854
out_unlock:
	spin_unlock(&fp->fi_lock);
3855
	spin_unlock(&state_lock);
J
Jeff Layton 已提交
3856 3857
out:
	if (status) {
3858
		nfs4_put_stid(&dp->dl_stid);
J
Jeff Layton 已提交
3859 3860 3861
		return ERR_PTR(status);
	}
	return dp;
3862 3863
}

3864 3865 3866 3867 3868 3869 3870 3871 3872 3873 3874 3875 3876 3877 3878 3879
static void nfsd4_open_deleg_none_ext(struct nfsd4_open *open, int status)
{
	open->op_delegate_type = NFS4_OPEN_DELEGATE_NONE_EXT;
	if (status == -EAGAIN)
		open->op_why_no_deleg = WND4_CONTENTION;
	else {
		open->op_why_no_deleg = WND4_RESOURCE;
		switch (open->op_deleg_want) {
		case NFS4_SHARE_WANT_READ_DELEG:
		case NFS4_SHARE_WANT_WRITE_DELEG:
		case NFS4_SHARE_WANT_ANY_DELEG:
			break;
		case NFS4_SHARE_WANT_CANCEL:
			open->op_why_no_deleg = WND4_CANCELLED;
			break;
		case NFS4_SHARE_WANT_NO_DELEG:
3880
			WARN_ON_ONCE(1);
3881 3882 3883 3884
		}
	}
}

L
Linus Torvalds 已提交
3885 3886
/*
 * Attempt to hand out a delegation.
3887 3888 3889
 *
 * Note we don't support write delegations, and won't until the vfs has
 * proper support for them.
L
Linus Torvalds 已提交
3890 3891
 */
static void
3892 3893
nfs4_open_delegation(struct svc_fh *fh, struct nfsd4_open *open,
			struct nfs4_ol_stateid *stp)
L
Linus Torvalds 已提交
3894 3895
{
	struct nfs4_delegation *dp;
3896 3897
	struct nfs4_openowner *oo = openowner(stp->st_stateowner);
	struct nfs4_client *clp = stp->st_stid.sc_client;
3898
	int cb_up;
3899
	int status = 0;
L
Linus Torvalds 已提交
3900

3901
	cb_up = nfsd4_cb_channel_good(oo->oo_owner.so_client);
3902 3903 3904
	open->op_recall = 0;
	switch (open->op_claim_type) {
		case NFS4_OPEN_CLAIM_PREVIOUS:
3905
			if (!cb_up)
3906
				open->op_recall = 1;
3907 3908
			if (open->op_delegate_type != NFS4_OPEN_DELEGATE_READ)
				goto out_no_deleg;
3909 3910
			break;
		case NFS4_OPEN_CLAIM_NULL:
3911
		case NFS4_OPEN_CLAIM_FH:
3912 3913 3914 3915
			/*
			 * Let's not give out any delegations till everyone's
			 * had the chance to reclaim theirs....
			 */
3916
			if (locks_in_grace(clp->net))
3917
				goto out_no_deleg;
3918
			if (!cb_up || !(oo->oo_flags & NFS4_OO_CONFIRMED))
3919
				goto out_no_deleg;
3920 3921 3922 3923 3924 3925 3926
			/*
			 * Also, if the file was opened for write or
			 * create, there's a good chance the client's
			 * about to write to it, resulting in an
			 * immediate recall (since we don't support
			 * write delegations):
			 */
3927
			if (open->op_share_access & NFS4_SHARE_ACCESS_WRITE)
3928 3929 3930
				goto out_no_deleg;
			if (open->op_create == NFS4_OPEN_CREATE)
				goto out_no_deleg;
3931 3932
			break;
		default:
3933
			goto out_no_deleg;
3934
	}
3935
	dp = nfs4_set_delegation(clp, fh, stp->st_stid.sc_file);
J
Jeff Layton 已提交
3936
	if (IS_ERR(dp))
3937
		goto out_no_deleg;
L
Linus Torvalds 已提交
3938

3939
	memcpy(&open->op_delegate_stateid, &dp->dl_stid.sc_stateid, sizeof(dp->dl_stid.sc_stateid));
L
Linus Torvalds 已提交
3940

3941
	dprintk("NFSD: delegation stateid=" STATEID_FMT "\n",
3942
		STATEID_VAL(&dp->dl_stid.sc_stateid));
3943
	open->op_delegate_type = NFS4_OPEN_DELEGATE_READ;
3944
	nfs4_put_stid(&dp->dl_stid);
3945 3946
	return;
out_no_deleg:
3947 3948
	open->op_delegate_type = NFS4_OPEN_DELEGATE_NONE;
	if (open->op_claim_type == NFS4_OPEN_CLAIM_PREVIOUS &&
3949
	    open->op_delegate_type != NFS4_OPEN_DELEGATE_NONE) {
3950
		dprintk("NFSD: WARNING: refusing delegation reclaim\n");
3951 3952
		open->op_recall = 1;
	}
3953 3954 3955 3956 3957

	/* 4.1 client asking for a delegation? */
	if (open->op_deleg_want)
		nfsd4_open_deleg_none_ext(open, status);
	return;
L
Linus Torvalds 已提交
3958 3959
}

3960 3961 3962 3963 3964 3965 3966 3967 3968 3969 3970 3971 3972 3973 3974 3975 3976 3977
static void nfsd4_deleg_xgrade_none_ext(struct nfsd4_open *open,
					struct nfs4_delegation *dp)
{
	if (open->op_deleg_want == NFS4_SHARE_WANT_READ_DELEG &&
	    dp->dl_type == NFS4_OPEN_DELEGATE_WRITE) {
		open->op_delegate_type = NFS4_OPEN_DELEGATE_NONE_EXT;
		open->op_why_no_deleg = WND4_NOT_SUPP_DOWNGRADE;
	} else if (open->op_deleg_want == NFS4_SHARE_WANT_WRITE_DELEG &&
		   dp->dl_type == NFS4_OPEN_DELEGATE_WRITE) {
		open->op_delegate_type = NFS4_OPEN_DELEGATE_NONE_EXT;
		open->op_why_no_deleg = WND4_NOT_SUPP_UPGRADE;
	}
	/* Otherwise the client must be confused wanting a delegation
	 * it already has, therefore we don't return
	 * NFS4_OPEN_DELEGATE_NONE_EXT and reason.
	 */
}

3978
__be32
L
Linus Torvalds 已提交
3979 3980
nfsd4_process_open2(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_open *open)
{
A
Andy Adamson 已提交
3981
	struct nfsd4_compoundres *resp = rqstp->rq_resp;
3982
	struct nfs4_client *cl = open->op_openowner->oo_owner.so_client;
L
Linus Torvalds 已提交
3983
	struct nfs4_file *fp = NULL;
3984
	struct nfs4_ol_stateid *stp = NULL;
3985
	struct nfs4_delegation *dp = NULL;
3986
	__be32 status;
L
Linus Torvalds 已提交
3987 3988 3989 3990 3991 3992

	/*
	 * Lookup file; if found, lookup stateid and check open request,
	 * and check for delegations in the process of being recalled.
	 * If not found, create the nfs4_file struct
	 */
3993
	fp = find_or_add_file(open->op_file, &current_fh->fh_handle);
3994
	if (fp != open->op_file) {
3995
		status = nfs4_check_deleg(cl, open, &dp);
3996 3997
		if (status)
			goto out;
3998
		stp = nfsd4_find_existing_open(fp, open);
L
Linus Torvalds 已提交
3999
	} else {
4000
		open->op_file = NULL;
4001
		status = nfserr_bad_stateid;
4002
		if (nfsd4_is_deleg_cur(open))
4003
			goto out;
4004
		status = nfserr_jukebox;
L
Linus Torvalds 已提交
4005 4006 4007 4008 4009 4010 4011 4012
	}

	/*
	 * OPEN the file, or upgrade an existing OPEN.
	 * If truncate fails, the OPEN fails.
	 */
	if (stp) {
		/* Stateid was found, this is an OPEN upgrade */
4013
		status = nfs4_upgrade_open(rqstp, fp, current_fh, stp, open);
L
Linus Torvalds 已提交
4014 4015 4016
		if (status)
			goto out;
	} else {
4017 4018
		stp = open->op_stp;
		open->op_stp = NULL;
4019
		init_open_stateid(stp, fp, open);
4020 4021 4022 4023 4024
		status = nfs4_get_vfs_file(rqstp, fp, current_fh, stp, open);
		if (status) {
			release_open_stateid(stp);
			goto out;
		}
L
Linus Torvalds 已提交
4025
	}
4026 4027
	update_stateid(&stp->st_stid.sc_stateid);
	memcpy(&open->op_stateid, &stp->st_stid.sc_stateid, sizeof(stateid_t));
L
Linus Torvalds 已提交
4028

4029 4030 4031 4032 4033 4034 4035 4036
	if (nfsd4_has_session(&resp->cstate)) {
		if (open->op_deleg_want & NFS4_SHARE_WANT_NO_DELEG) {
			open->op_delegate_type = NFS4_OPEN_DELEGATE_NONE_EXT;
			open->op_why_no_deleg = WND4_NOT_WANTED;
			goto nodeleg;
		}
	}

L
Linus Torvalds 已提交
4037 4038 4039 4040
	/*
	* Attempt to hand out a delegation. No error return, because the
	* OPEN succeeds even if we fail.
	*/
4041
	nfs4_open_delegation(current_fh, open, stp);
4042
nodeleg:
L
Linus Torvalds 已提交
4043 4044
	status = nfs_ok;

4045
	dprintk("%s: stateid=" STATEID_FMT "\n", __func__,
4046
		STATEID_VAL(&stp->st_stid.sc_stateid));
L
Linus Torvalds 已提交
4047
out:
4048 4049
	/* 4.1 client trying to upgrade/downgrade delegation? */
	if (open->op_delegate_type == NFS4_OPEN_DELEGATE_NONE && dp &&
4050 4051
	    open->op_deleg_want)
		nfsd4_deleg_xgrade_none_ext(open, dp);
4052

4053 4054
	if (fp)
		put_nfs4_file(fp);
4055
	if (status == 0 && open->op_claim_type == NFS4_OPEN_CLAIM_PREVIOUS)
4056
		nfs4_set_claim_prev(open, nfsd4_has_session(&resp->cstate));
L
Linus Torvalds 已提交
4057 4058 4059 4060
	/*
	* To finish the open response, we just need to set the rflags.
	*/
	open->op_rflags = NFS4_OPEN_RESULT_LOCKTYPE_POSIX;
4061
	if (!(open->op_openowner->oo_flags & NFS4_OO_CONFIRMED) &&
A
Andy Adamson 已提交
4062
	    !nfsd4_has_session(&resp->cstate))
L
Linus Torvalds 已提交
4063
		open->op_rflags |= NFS4_OPEN_RESULT_CONFIRM;
4064 4065
	if (dp)
		nfs4_put_stid(&dp->dl_stid);
4066 4067
	if (stp)
		nfs4_put_stid(&stp->st_stid);
L
Linus Torvalds 已提交
4068 4069 4070 4071

	return status;
}

4072 4073
void nfsd4_cleanup_open_state(struct nfsd4_compound_state *cstate,
			      struct nfsd4_open *open, __be32 status)
4074 4075
{
	if (open->op_openowner) {
4076 4077 4078 4079
		struct nfs4_stateowner *so = &open->op_openowner->oo_owner;

		nfsd4_cstate_assign_replay(cstate, so);
		nfs4_put_stateowner(so);
4080
	}
4081 4082
	if (open->op_file)
		nfsd4_free_file(open->op_file);
4083
	if (open->op_stp)
4084
		nfs4_put_stid(&open->op_stp->st_stid);
4085 4086
}

4087
__be32
4088 4089
nfsd4_renew(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
	    clientid_t *clid)
L
Linus Torvalds 已提交
4090 4091
{
	struct nfs4_client *clp;
4092
	__be32 status;
4093
	struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id);
L
Linus Torvalds 已提交
4094 4095 4096

	dprintk("process_renew(%08x/%08x): starting\n", 
			clid->cl_boot, clid->cl_id);
4097
	status = lookup_clientid(clid, cstate, nn);
4098
	if (status)
L
Linus Torvalds 已提交
4099
		goto out;
4100
	clp = cstate->clp;
L
Linus Torvalds 已提交
4101
	status = nfserr_cb_path_down;
4102
	if (!list_empty(&clp->cl_delegations)
4103
			&& clp->cl_cb_state != NFSD4_CB_UP)
L
Linus Torvalds 已提交
4104 4105 4106 4107 4108 4109
		goto out;
	status = nfs_ok;
out:
	return status;
}

4110
static void
4111
nfsd4_end_grace(struct nfsd_net *nn)
4112
{
4113
	/* do nothing if grace period already ended */
4114
	if (nn->grace_ended)
4115 4116
		return;

4117
	dprintk("NFSD: end of grace period\n");
4118
	nn->grace_ended = true;
4119
	nfsd4_record_grace_done(nn, nn->boot_time);
4120
	locks_end_grace(&nn->nfsd4_manager);
4121 4122 4123 4124 4125
	/*
	 * Now that every NFSv4 client has had the chance to recover and
	 * to see the (possibly new, possibly shorter) lease time, we
	 * can safely set the next grace time to the current lease time:
	 */
4126
	nn->nfsd4_grace = nn->nfsd4_lease;
4127 4128
}

4129
static time_t
4130
nfs4_laundromat(struct nfsd_net *nn)
L
Linus Torvalds 已提交
4131 4132
{
	struct nfs4_client *clp;
4133
	struct nfs4_openowner *oo;
L
Linus Torvalds 已提交
4134
	struct nfs4_delegation *dp;
4135
	struct nfs4_ol_stateid *stp;
L
Linus Torvalds 已提交
4136
	struct list_head *pos, *next, reaplist;
4137
	time_t cutoff = get_seconds() - nn->nfsd4_lease;
4138
	time_t t, new_timeo = nn->nfsd4_lease;
L
Linus Torvalds 已提交
4139 4140

	dprintk("NFSD: laundromat service - starting\n");
4141
	nfsd4_end_grace(nn);
4142
	INIT_LIST_HEAD(&reaplist);
4143
	spin_lock(&nn->client_lock);
4144
	list_for_each_safe(pos, next, &nn->client_lru) {
L
Linus Torvalds 已提交
4145 4146 4147
		clp = list_entry(pos, struct nfs4_client, cl_lru);
		if (time_after((unsigned long)clp->cl_time, (unsigned long)cutoff)) {
			t = clp->cl_time - cutoff;
4148
			new_timeo = min(new_timeo, t);
L
Linus Torvalds 已提交
4149 4150
			break;
		}
4151
		if (mark_client_expired_locked(clp)) {
4152 4153 4154 4155
			dprintk("NFSD: client in use (clientid %08x)\n",
				clp->cl_clientid.cl_id);
			continue;
		}
4156
		list_add(&clp->cl_lru, &reaplist);
4157
	}
4158
	spin_unlock(&nn->client_lock);
4159 4160
	list_for_each_safe(pos, next, &reaplist) {
		clp = list_entry(pos, struct nfs4_client, cl_lru);
L
Linus Torvalds 已提交
4161 4162
		dprintk("NFSD: purging unused client (clientid %08x)\n",
			clp->cl_clientid.cl_id);
4163
		list_del_init(&clp->cl_lru);
L
Linus Torvalds 已提交
4164 4165
		expire_client(clp);
	}
4166
	spin_lock(&state_lock);
4167
	list_for_each_safe(pos, next, &nn->del_recall_lru) {
L
Linus Torvalds 已提交
4168
		dp = list_entry (pos, struct nfs4_delegation, dl_recall_lru);
4169 4170
		if (net_generic(dp->dl_stid.sc_client->net, nfsd_net_id) != nn)
			continue;
L
Linus Torvalds 已提交
4171
		if (time_after((unsigned long)dp->dl_time, (unsigned long)cutoff)) {
4172 4173
			t = dp->dl_time - cutoff;
			new_timeo = min(new_timeo, t);
L
Linus Torvalds 已提交
4174 4175
			break;
		}
4176 4177
		unhash_delegation_locked(dp);
		list_add(&dp->dl_recall_lru, &reaplist);
L
Linus Torvalds 已提交
4178
	}
4179
	spin_unlock(&state_lock);
4180 4181 4182 4183
	while (!list_empty(&reaplist)) {
		dp = list_first_entry(&reaplist, struct nfs4_delegation,
					dl_recall_lru);
		list_del_init(&dp->dl_recall_lru);
4184
		revoke_delegation(dp);
L
Linus Torvalds 已提交
4185
	}
4186 4187 4188 4189 4190 4191 4192

	spin_lock(&nn->client_lock);
	while (!list_empty(&nn->close_lru)) {
		oo = list_first_entry(&nn->close_lru, struct nfs4_openowner,
					oo_close_lru);
		if (time_after((unsigned long)oo->oo_time,
			       (unsigned long)cutoff)) {
4193 4194
			t = oo->oo_time - cutoff;
			new_timeo = min(new_timeo, t);
L
Linus Torvalds 已提交
4195 4196
			break;
		}
4197 4198 4199 4200 4201 4202
		list_del_init(&oo->oo_close_lru);
		stp = oo->oo_last_closed_stid;
		oo->oo_last_closed_stid = NULL;
		spin_unlock(&nn->client_lock);
		nfs4_put_stid(&stp->st_stid);
		spin_lock(&nn->client_lock);
L
Linus Torvalds 已提交
4203
	}
4204 4205
	spin_unlock(&nn->client_lock);

4206 4207
	new_timeo = max_t(time_t, new_timeo, NFSD_LAUNDROMAT_MINTIMEOUT);
	return new_timeo;
L
Linus Torvalds 已提交
4208 4209
}

H
Harvey Harrison 已提交
4210 4211 4212 4213
static struct workqueue_struct *laundry_wq;
static void laundromat_main(struct work_struct *);

static void
4214
laundromat_main(struct work_struct *laundry)
L
Linus Torvalds 已提交
4215 4216
{
	time_t t;
4217 4218 4219 4220
	struct delayed_work *dwork = container_of(laundry, struct delayed_work,
						  work);
	struct nfsd_net *nn = container_of(dwork, struct nfsd_net,
					   laundromat_work);
L
Linus Torvalds 已提交
4221

4222
	t = nfs4_laundromat(nn);
L
Linus Torvalds 已提交
4223
	dprintk("NFSD: laundromat_main - sleeping for %ld seconds\n", t);
4224
	queue_delayed_work(laundry_wq, &nn->laundromat_work, t*HZ);
L
Linus Torvalds 已提交
4225 4226
}

4227
static inline __be32 nfs4_check_fh(struct svc_fh *fhp, struct nfs4_ol_stateid *stp)
L
Linus Torvalds 已提交
4228
{
4229
	if (!nfsd_fh_match(&fhp->fh_handle, &stp->st_stid.sc_file->fi_fhandle))
4230 4231
		return nfserr_bad_stateid;
	return nfs_ok;
L
Linus Torvalds 已提交
4232 4233 4234
}

static inline int
4235
access_permit_read(struct nfs4_ol_stateid *stp)
L
Linus Torvalds 已提交
4236
{
4237 4238 4239
	return test_access(NFS4_SHARE_ACCESS_READ, stp) ||
		test_access(NFS4_SHARE_ACCESS_BOTH, stp) ||
		test_access(NFS4_SHARE_ACCESS_WRITE, stp);
L
Linus Torvalds 已提交
4240 4241 4242
}

static inline int
4243
access_permit_write(struct nfs4_ol_stateid *stp)
L
Linus Torvalds 已提交
4244
{
4245 4246
	return test_access(NFS4_SHARE_ACCESS_WRITE, stp) ||
		test_access(NFS4_SHARE_ACCESS_BOTH, stp);
L
Linus Torvalds 已提交
4247 4248 4249
}

static
4250
__be32 nfs4_check_openmode(struct nfs4_ol_stateid *stp, int flags)
L
Linus Torvalds 已提交
4251
{
4252
        __be32 status = nfserr_openmode;
L
Linus Torvalds 已提交
4253

4254 4255 4256
	/* For lock stateid's, we test the parent open, not the lock: */
	if (stp->st_openstp)
		stp = stp->st_openstp;
4257
	if ((flags & WR_STATE) && !access_permit_write(stp))
L
Linus Torvalds 已提交
4258
                goto out;
4259
	if ((flags & RD_STATE) && !access_permit_read(stp))
L
Linus Torvalds 已提交
4260 4261 4262 4263 4264 4265
                goto out;
	status = nfs_ok;
out:
	return status;
}

4266
static inline __be32
4267
check_special_stateids(struct net *net, svc_fh *current_fh, stateid_t *stateid, int flags)
L
Linus Torvalds 已提交
4268
{
4269
	if (ONE_STATEID(stateid) && (flags & RD_STATE))
L
Linus Torvalds 已提交
4270
		return nfs_ok;
4271
	else if (locks_in_grace(net)) {
L
Lucas De Marchi 已提交
4272
		/* Answer in remaining cases depends on existence of
L
Linus Torvalds 已提交
4273 4274 4275 4276 4277 4278 4279 4280 4281 4282 4283 4284 4285 4286 4287
		 * conflicting state; so we must wait out the grace period. */
		return nfserr_grace;
	} else if (flags & WR_STATE)
		return nfs4_share_conflict(current_fh,
				NFS4_SHARE_DENY_WRITE);
	else /* (flags & RD_STATE) && ZERO_STATEID(stateid) */
		return nfs4_share_conflict(current_fh,
				NFS4_SHARE_DENY_READ);
}

/*
 * Allow READ/WRITE during grace period on recovered state only for files
 * that are not able to provide mandatory locking.
 */
static inline int
4288
grace_disallows_io(struct net *net, struct inode *inode)
L
Linus Torvalds 已提交
4289
{
4290
	return locks_in_grace(net) && mandatory_lock(inode);
L
Linus Torvalds 已提交
4291 4292
}

4293 4294 4295
/* Returns true iff a is later than b: */
static bool stateid_generation_after(stateid_t *a, stateid_t *b)
{
J
Jim Rees 已提交
4296
	return (s32)(a->si_generation - b->si_generation) > 0;
4297 4298
}

J
J. Bruce Fields 已提交
4299
static __be32 check_stateid_generation(stateid_t *in, stateid_t *ref, bool has_session)
4300
{
A
Andy Adamson 已提交
4301 4302 4303 4304
	/*
	 * When sessions are used the stateid generation number is ignored
	 * when it is zero.
	 */
J
J. Bruce Fields 已提交
4305
	if (has_session && in->si_generation == 0)
4306 4307 4308 4309
		return nfs_ok;

	if (in->si_generation == ref->si_generation)
		return nfs_ok;
A
Andy Adamson 已提交
4310

4311
	/* If the client sends us a stateid from the future, it's buggy: */
4312
	if (stateid_generation_after(in, ref))
4313 4314
		return nfserr_bad_stateid;
	/*
4315 4316 4317 4318 4319 4320 4321 4322
	 * However, we could see a stateid from the past, even from a
	 * non-buggy client.  For example, if the client sends a lock
	 * while some IO is outstanding, the lock may bump si_generation
	 * while the IO is still in flight.  The client could avoid that
	 * situation by waiting for responses on all the IO requests,
	 * but better performance may result in retrying IO that
	 * receives an old_stateid error if requests are rarely
	 * reordered in flight:
4323
	 */
4324
	return nfserr_old_stateid;
4325 4326
}

4327
static __be32 nfsd4_validate_stateid(struct nfs4_client *cl, stateid_t *stateid)
4328
{
4329 4330
	struct nfs4_stid *s;
	struct nfs4_ol_stateid *ols;
4331
	__be32 status = nfserr_bad_stateid;
4332

4333
	if (ZERO_STATEID(stateid) || ONE_STATEID(stateid))
4334
		return status;
4335 4336 4337 4338 4339 4340 4341
	/* Client debugging aid. */
	if (!same_clid(&stateid->si_opaque.so_clid, &cl->cl_clientid)) {
		char addr_str[INET6_ADDRSTRLEN];
		rpc_ntop((struct sockaddr *)&cl->cl_addr, addr_str,
				 sizeof(addr_str));
		pr_warn_ratelimited("NFSD: client %s testing state ID "
					"with incorrect client ID\n", addr_str);
4342
		return status;
4343
	}
4344 4345
	spin_lock(&cl->cl_lock);
	s = find_stateid_locked(cl, stateid);
4346
	if (!s)
4347
		goto out_unlock;
4348
	status = check_stateid_generation(stateid, &s->sc_stateid, 1);
4349
	if (status)
4350
		goto out_unlock;
4351 4352
	switch (s->sc_type) {
	case NFS4_DELEG_STID:
4353 4354
		status = nfs_ok;
		break;
4355
	case NFS4_REVOKED_DELEG_STID:
4356 4357
		status = nfserr_deleg_revoked;
		break;
4358 4359 4360 4361 4362 4363
	case NFS4_OPEN_STID:
	case NFS4_LOCK_STID:
		ols = openlockstateid(s);
		if (ols->st_stateowner->so_is_open_owner
	    			&& !(openowner(ols->st_stateowner)->oo_flags
						& NFS4_OO_CONFIRMED))
4364 4365 4366 4367
			status = nfserr_bad_stateid;
		else
			status = nfs_ok;
		break;
4368 4369
	default:
		printk("unknown stateid type %x\n", s->sc_type);
4370
		/* Fallthrough */
4371
	case NFS4_CLOSED_STID:
4372
	case NFS4_CLOSED_DELEG_STID:
4373
		status = nfserr_bad_stateid;
4374
	}
4375 4376 4377
out_unlock:
	spin_unlock(&cl->cl_lock);
	return status;
4378 4379
}

4380 4381 4382 4383
static __be32
nfsd4_lookup_stateid(struct nfsd4_compound_state *cstate,
		     stateid_t *stateid, unsigned char typemask,
		     struct nfs4_stid **s, struct nfsd_net *nn)
4384
{
J
J. Bruce Fields 已提交
4385
	__be32 status;
4386 4387 4388

	if (ZERO_STATEID(stateid) || ONE_STATEID(stateid))
		return nfserr_bad_stateid;
4389
	status = lookup_clientid(&stateid->si_opaque.so_clid, cstate, nn);
4390
	if (status == nfserr_stale_clientid) {
4391
		if (cstate->session)
4392
			return nfserr_bad_stateid;
4393
		return nfserr_stale_stateid;
4394
	}
J
J. Bruce Fields 已提交
4395 4396
	if (status)
		return status;
4397
	*s = find_stateid_by_type(cstate->clp, stateid, typemask);
4398 4399 4400 4401 4402
	if (!*s)
		return nfserr_bad_stateid;
	return nfs_ok;
}

L
Linus Torvalds 已提交
4403 4404 4405
/*
* Checks for stateid operations
*/
4406
__be32
4407
nfs4_preprocess_stateid_op(struct net *net, struct nfsd4_compound_state *cstate,
4408
			   stateid_t *stateid, int flags, struct file **filpp)
L
Linus Torvalds 已提交
4409
{
4410
	struct nfs4_stid *s;
4411
	struct nfs4_ol_stateid *stp = NULL;
L
Linus Torvalds 已提交
4412
	struct nfs4_delegation *dp = NULL;
4413
	struct svc_fh *current_fh = &cstate->current_fh;
L
Linus Torvalds 已提交
4414
	struct inode *ino = current_fh->fh_dentry->d_inode;
4415
	struct nfsd_net *nn = net_generic(net, nfsd_net_id);
4416
	struct file *file = NULL;
4417
	__be32 status;
L
Linus Torvalds 已提交
4418 4419 4420 4421

	if (filpp)
		*filpp = NULL;

4422
	if (grace_disallows_io(net, ino))
L
Linus Torvalds 已提交
4423 4424 4425
		return nfserr_grace;

	if (ZERO_STATEID(stateid) || ONE_STATEID(stateid))
4426
		return check_special_stateids(net, current_fh, stateid, flags);
L
Linus Torvalds 已提交
4427

4428
	status = nfsd4_lookup_stateid(cstate, stateid,
4429
				NFS4_DELEG_STID|NFS4_OPEN_STID|NFS4_LOCK_STID,
4430
				&s, nn);
4431
	if (status)
4432
		return status;
4433 4434 4435
	status = check_stateid_generation(stateid, &s->sc_stateid, nfsd4_has_session(cstate));
	if (status)
		goto out;
4436 4437
	switch (s->sc_type) {
	case NFS4_DELEG_STID:
4438
		dp = delegstateid(s);
4439 4440 4441
		status = nfs4_check_delegmode(dp, flags);
		if (status)
			goto out;
4442
		if (filpp) {
4443
			file = dp->dl_stid.sc_file->fi_deleg_file;
4444
			if (!file) {
4445 4446 4447 4448
				WARN_ON_ONCE(1);
				status = nfserr_serverfault;
				goto out;
			}
4449
			get_file(file);
4450
		}
4451 4452 4453
		break;
	case NFS4_OPEN_STID:
	case NFS4_LOCK_STID:
4454
		stp = openlockstateid(s);
4455 4456
		status = nfs4_check_fh(current_fh, stp);
		if (status)
L
Linus Torvalds 已提交
4457
			goto out;
4458
		if (stp->st_stateowner->so_is_open_owner
4459
		    && !(openowner(stp->st_stateowner)->oo_flags & NFS4_OO_CONFIRMED))
L
Linus Torvalds 已提交
4460
			goto out;
4461 4462
		status = nfs4_check_openmode(stp, flags);
		if (status)
L
Linus Torvalds 已提交
4463
			goto out;
4464
		if (filpp) {
4465 4466
			struct nfs4_file *fp = stp->st_stid.sc_file;

4467
			if (flags & RD_STATE)
4468
				file = find_readable_file(fp);
4469
			else
4470
				file = find_writeable_file(fp);
4471
		}
4472 4473
		break;
	default:
4474 4475
		status = nfserr_bad_stateid;
		goto out;
L
Linus Torvalds 已提交
4476 4477
	}
	status = nfs_ok;
4478
	if (file)
4479
		*filpp = file;
L
Linus Torvalds 已提交
4480
out:
4481
	nfs4_put_stid(s);
L
Linus Torvalds 已提交
4482 4483 4484
	return status;
}

4485 4486 4487 4488 4489 4490 4491
/*
 * Test if the stateid is valid
 */
__be32
nfsd4_test_stateid(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
		   struct nfsd4_test_stateid *test_stateid)
{
4492 4493 4494 4495
	struct nfsd4_test_stateid_id *stateid;
	struct nfs4_client *cl = cstate->session->se_client;

	list_for_each_entry(stateid, &test_stateid->ts_stateid_list, ts_id_list)
4496 4497
		stateid->ts_id_status =
			nfsd4_validate_stateid(cl, &stateid->ts_id_stateid);
4498

4499 4500 4501
	return nfs_ok;
}

4502 4503 4504 4505 4506
__be32
nfsd4_free_stateid(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
		   struct nfsd4_free_stateid *free_stateid)
{
	stateid_t *stateid = &free_stateid->fr_stateid;
J
J. Bruce Fields 已提交
4507
	struct nfs4_stid *s;
4508
	struct nfs4_delegation *dp;
4509
	struct nfs4_ol_stateid *stp;
4510
	struct nfs4_client *cl = cstate->session->se_client;
J
J. Bruce Fields 已提交
4511
	__be32 ret = nfserr_bad_stateid;
4512

4513 4514
	spin_lock(&cl->cl_lock);
	s = find_stateid_locked(cl, stateid);
J
J. Bruce Fields 已提交
4515
	if (!s)
4516
		goto out_unlock;
J
J. Bruce Fields 已提交
4517 4518
	switch (s->sc_type) {
	case NFS4_DELEG_STID:
4519
		ret = nfserr_locks_held;
4520
		break;
J
J. Bruce Fields 已提交
4521 4522 4523
	case NFS4_OPEN_STID:
		ret = check_stateid_generation(stateid, &s->sc_stateid, 1);
		if (ret)
4524 4525
			break;
		ret = nfserr_locks_held;
4526
		break;
4527 4528 4529 4530
	case NFS4_LOCK_STID:
		ret = check_stateid_generation(stateid, &s->sc_stateid, 1);
		if (ret)
			break;
4531 4532 4533 4534 4535 4536
		stp = openlockstateid(s);
		ret = nfserr_locks_held;
		if (check_for_locks(stp->st_stid.sc_file,
				    lockowner(stp->st_stateowner)))
			break;
		unhash_lock_stateid(stp);
4537
		spin_unlock(&cl->cl_lock);
4538 4539
		nfs4_put_stid(s);
		ret = nfs_ok;
4540
		goto out;
4541 4542
	case NFS4_REVOKED_DELEG_STID:
		dp = delegstateid(s);
4543 4544
		list_del_init(&dp->dl_recall_lru);
		spin_unlock(&cl->cl_lock);
4545
		nfs4_put_stid(s);
4546
		ret = nfs_ok;
4547 4548
		goto out;
	/* Default falls through and returns nfserr_bad_stateid */
4549
	}
4550 4551
out_unlock:
	spin_unlock(&cl->cl_lock);
4552 4553 4554 4555
out:
	return ret;
}

4556 4557 4558 4559 4560 4561
static inline int
setlkflg (int type)
{
	return (type == NFS4_READW_LT || type == NFS4_READ_LT) ?
		RD_STATE : WR_STATE;
}
L
Linus Torvalds 已提交
4562

4563
static __be32 nfs4_seqid_op_checks(struct nfsd4_compound_state *cstate, stateid_t *stateid, u32 seqid, struct nfs4_ol_stateid *stp)
4564 4565 4566 4567 4568 4569 4570 4571
{
	struct svc_fh *current_fh = &cstate->current_fh;
	struct nfs4_stateowner *sop = stp->st_stateowner;
	__be32 status;

	status = nfsd4_check_seqid(cstate, sop, seqid);
	if (status)
		return status;
4572 4573
	if (stp->st_stid.sc_type == NFS4_CLOSED_STID
		|| stp->st_stid.sc_type == NFS4_REVOKED_DELEG_STID)
4574 4575
		/*
		 * "Closed" stateid's exist *only* to return
4576 4577
		 * nfserr_replay_me from the previous step, and
		 * revoked delegations are kept only for free_stateid.
4578 4579 4580 4581 4582 4583
		 */
		return nfserr_bad_stateid;
	status = check_stateid_generation(stateid, &stp->st_stid.sc_stateid, nfsd4_has_session(cstate));
	if (status)
		return status;
	return nfs4_check_fh(current_fh, stp);
4584 4585
}

L
Linus Torvalds 已提交
4586 4587 4588
/* 
 * Checks for sequence id mutating operations. 
 */
4589
static __be32
4590
nfs4_preprocess_seqid_op(struct nfsd4_compound_state *cstate, u32 seqid,
4591
			 stateid_t *stateid, char typemask,
4592 4593
			 struct nfs4_ol_stateid **stpp,
			 struct nfsd_net *nn)
L
Linus Torvalds 已提交
4594
{
4595
	__be32 status;
4596
	struct nfs4_stid *s;
4597
	struct nfs4_ol_stateid *stp = NULL;
L
Linus Torvalds 已提交
4598

4599 4600
	dprintk("NFSD: %s: seqid=%d stateid = " STATEID_FMT "\n", __func__,
		seqid, STATEID_VAL(stateid));
4601

L
Linus Torvalds 已提交
4602
	*stpp = NULL;
4603
	status = nfsd4_lookup_stateid(cstate, stateid, typemask, &s, nn);
4604 4605
	if (status)
		return status;
4606
	stp = openlockstateid(s);
4607
	nfsd4_cstate_assign_replay(cstate, stp->st_stateowner);
L
Linus Torvalds 已提交
4608

4609
	status = nfs4_seqid_op_checks(cstate, stateid, seqid, stp);
4610
	if (!status)
4611
		*stpp = stp;
4612 4613
	else
		nfs4_put_stid(&stp->st_stid);
4614
	return status;
4615
}
4616

4617 4618
static __be32 nfs4_preprocess_confirmed_seqid_op(struct nfsd4_compound_state *cstate, u32 seqid,
						 stateid_t *stateid, struct nfs4_ol_stateid **stpp, struct nfsd_net *nn)
4619 4620 4621
{
	__be32 status;
	struct nfs4_openowner *oo;
4622
	struct nfs4_ol_stateid *stp;
L
Linus Torvalds 已提交
4623

4624
	status = nfs4_preprocess_seqid_op(cstate, seqid, stateid,
4625
						NFS4_OPEN_STID, &stp, nn);
4626 4627
	if (status)
		return status;
4628 4629 4630
	oo = openowner(stp->st_stateowner);
	if (!(oo->oo_flags & NFS4_OO_CONFIRMED)) {
		nfs4_put_stid(&stp->st_stid);
4631
		return nfserr_bad_stateid;
4632 4633
	}
	*stpp = stp;
4634
	return nfs_ok;
L
Linus Torvalds 已提交
4635 4636
}

4637
__be32
4638
nfsd4_open_confirm(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
4639
		   struct nfsd4_open_confirm *oc)
L
Linus Torvalds 已提交
4640
{
4641
	__be32 status;
4642
	struct nfs4_openowner *oo;
4643
	struct nfs4_ol_stateid *stp;
4644
	struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id);
L
Linus Torvalds 已提交
4645

A
Al Viro 已提交
4646 4647
	dprintk("NFSD: nfsd4_open_confirm on file %pd\n",
			cstate->current_fh.fh_dentry);
L
Linus Torvalds 已提交
4648

4649
	status = fh_verify(rqstp, &cstate->current_fh, S_IFREG, 0);
4650 4651
	if (status)
		return status;
L
Linus Torvalds 已提交
4652

4653
	status = nfs4_preprocess_seqid_op(cstate,
4654
					oc->oc_seqid, &oc->oc_req_stateid,
4655
					NFS4_OPEN_STID, &stp, nn);
4656
	if (status)
4657
		goto out;
4658
	oo = openowner(stp->st_stateowner);
4659
	status = nfserr_bad_stateid;
4660
	if (oo->oo_flags & NFS4_OO_CONFIRMED)
4661
		goto put_stateid;
4662
	oo->oo_flags |= NFS4_OO_CONFIRMED;
4663 4664
	update_stateid(&stp->st_stid.sc_stateid);
	memcpy(&oc->oc_resp_stateid, &stp->st_stid.sc_stateid, sizeof(stateid_t));
4665
	dprintk("NFSD: %s: success, seqid=%d stateid=" STATEID_FMT "\n",
4666
		__func__, oc->oc_seqid, STATEID_VAL(&stp->st_stid.sc_stateid));
4667

4668
	nfsd4_client_record_create(oo->oo_owner.so_client);
4669
	status = nfs_ok;
4670 4671
put_stateid:
	nfs4_put_stid(&stp->st_stid);
L
Linus Torvalds 已提交
4672
out:
4673
	nfsd4_bump_seqid(cstate, status);
L
Linus Torvalds 已提交
4674 4675 4676
	return status;
}

J
J. Bruce Fields 已提交
4677
static inline void nfs4_stateid_downgrade_bit(struct nfs4_ol_stateid *stp, u32 access)
L
Linus Torvalds 已提交
4678
{
4679
	if (!test_access(access, stp))
J
J. Bruce Fields 已提交
4680
		return;
4681
	nfs4_file_put_access(stp->st_stid.sc_file, access);
4682
	clear_access(access, stp);
J
J. Bruce Fields 已提交
4683
}
4684

J
J. Bruce Fields 已提交
4685 4686 4687 4688 4689 4690 4691 4692 4693 4694 4695 4696 4697 4698
static inline void nfs4_stateid_downgrade(struct nfs4_ol_stateid *stp, u32 to_access)
{
	switch (to_access) {
	case NFS4_SHARE_ACCESS_READ:
		nfs4_stateid_downgrade_bit(stp, NFS4_SHARE_ACCESS_WRITE);
		nfs4_stateid_downgrade_bit(stp, NFS4_SHARE_ACCESS_BOTH);
		break;
	case NFS4_SHARE_ACCESS_WRITE:
		nfs4_stateid_downgrade_bit(stp, NFS4_SHARE_ACCESS_READ);
		nfs4_stateid_downgrade_bit(stp, NFS4_SHARE_ACCESS_BOTH);
		break;
	case NFS4_SHARE_ACCESS_BOTH:
		break;
	default:
4699
		WARN_ON_ONCE(1);
L
Linus Torvalds 已提交
4700 4701 4702
	}
}

4703
__be32
4704 4705
nfsd4_open_downgrade(struct svc_rqst *rqstp,
		     struct nfsd4_compound_state *cstate,
4706
		     struct nfsd4_open_downgrade *od)
L
Linus Torvalds 已提交
4707
{
4708
	__be32 status;
4709
	struct nfs4_ol_stateid *stp;
4710
	struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id);
L
Linus Torvalds 已提交
4711

A
Al Viro 已提交
4712 4713
	dprintk("NFSD: nfsd4_open_downgrade on file %pd\n", 
			cstate->current_fh.fh_dentry);
L
Linus Torvalds 已提交
4714

4715
	/* We don't yet support WANT bits: */
4716 4717 4718
	if (od->od_deleg_want)
		dprintk("NFSD: %s: od_deleg_want=0x%x ignored\n", __func__,
			od->od_deleg_want);
L
Linus Torvalds 已提交
4719

4720
	status = nfs4_preprocess_confirmed_seqid_op(cstate, od->od_seqid,
4721
					&od->od_stateid, &stp, nn);
4722
	if (status)
L
Linus Torvalds 已提交
4723 4724
		goto out; 
	status = nfserr_inval;
4725
	if (!test_access(od->od_share_access, stp)) {
4726
		dprintk("NFSD: access not a subset of current bitmap: 0x%hhx, input access=%08x\n",
L
Linus Torvalds 已提交
4727
			stp->st_access_bmap, od->od_share_access);
4728
		goto put_stateid;
L
Linus Torvalds 已提交
4729
	}
4730
	if (!test_deny(od->od_share_deny, stp)) {
4731
		dprintk("NFSD: deny not a subset of current bitmap: 0x%hhx, input deny=%08x\n",
L
Linus Torvalds 已提交
4732
			stp->st_deny_bmap, od->od_share_deny);
4733
		goto put_stateid;
L
Linus Torvalds 已提交
4734
	}
J
J. Bruce Fields 已提交
4735
	nfs4_stateid_downgrade(stp, od->od_share_access);
L
Linus Torvalds 已提交
4736

4737
	reset_union_bmap_deny(od->od_share_deny, stp);
L
Linus Torvalds 已提交
4738

4739 4740
	update_stateid(&stp->st_stid.sc_stateid);
	memcpy(&od->od_stateid, &stp->st_stid.sc_stateid, sizeof(stateid_t));
L
Linus Torvalds 已提交
4741
	status = nfs_ok;
4742 4743
put_stateid:
	nfs4_put_stid(&stp->st_stid);
L
Linus Torvalds 已提交
4744
out:
4745
	nfsd4_bump_seqid(cstate, status);
L
Linus Torvalds 已提交
4746 4747 4748
	return status;
}

4749 4750
static void nfsd4_close_open_stateid(struct nfs4_ol_stateid *s)
{
4751
	struct nfs4_client *clp = s->st_stid.sc_client;
4752
	LIST_HEAD(reaplist);
4753

4754
	s->st_stid.sc_type = NFS4_CLOSED_STID;
4755
	spin_lock(&clp->cl_lock);
4756
	unhash_open_stateid(s, &reaplist);
4757

4758 4759 4760 4761 4762 4763 4764
	if (clp->cl_minorversion) {
		put_ol_stateid_locked(s, &reaplist);
		spin_unlock(&clp->cl_lock);
		free_ol_stateid_reaplist(&reaplist);
	} else {
		spin_unlock(&clp->cl_lock);
		free_ol_stateid_reaplist(&reaplist);
4765
		move_to_close_lru(s, clp->net);
4766
	}
4767 4768
}

L
Linus Torvalds 已提交
4769 4770 4771
/*
 * nfs4_unlock_state() called after encode
 */
4772
__be32
4773
nfsd4_close(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
4774
	    struct nfsd4_close *close)
L
Linus Torvalds 已提交
4775
{
4776
	__be32 status;
4777
	struct nfs4_ol_stateid *stp;
4778 4779
	struct net *net = SVC_NET(rqstp);
	struct nfsd_net *nn = net_generic(net, nfsd_net_id);
L
Linus Torvalds 已提交
4780

A
Al Viro 已提交
4781 4782
	dprintk("NFSD: nfsd4_close on file %pd\n", 
			cstate->current_fh.fh_dentry);
L
Linus Torvalds 已提交
4783

4784 4785 4786
	status = nfs4_preprocess_seqid_op(cstate, close->cl_seqid,
					&close->cl_stateid,
					NFS4_OPEN_STID|NFS4_CLOSED_STID,
4787
					&stp, nn);
4788
	nfsd4_bump_seqid(cstate, status);
4789
	if (status)
L
Linus Torvalds 已提交
4790
		goto out; 
4791 4792
	update_stateid(&stp->st_stid.sc_stateid);
	memcpy(&close->cl_stateid, &stp->st_stid.sc_stateid, sizeof(stateid_t));
L
Linus Torvalds 已提交
4793

4794
	nfsd4_close_open_stateid(stp);
4795 4796 4797

	/* put reference from nfs4_preprocess_seqid_op */
	nfs4_put_stid(&stp->st_stid);
L
Linus Torvalds 已提交
4798 4799 4800 4801
out:
	return status;
}

4802
__be32
4803 4804
nfsd4_delegreturn(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
		  struct nfsd4_delegreturn *dr)
L
Linus Torvalds 已提交
4805
{
4806 4807
	struct nfs4_delegation *dp;
	stateid_t *stateid = &dr->dr_stateid;
4808
	struct nfs4_stid *s;
4809
	__be32 status;
4810
	struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id);
L
Linus Torvalds 已提交
4811

4812
	if ((status = fh_verify(rqstp, &cstate->current_fh, S_IFREG, 0)))
4813
		return status;
L
Linus Torvalds 已提交
4814

4815
	status = nfsd4_lookup_stateid(cstate, stateid, NFS4_DELEG_STID, &s, nn);
4816
	if (status)
4817
		goto out;
4818
	dp = delegstateid(s);
4819
	status = check_stateid_generation(stateid, &dp->dl_stid.sc_stateid, nfsd4_has_session(cstate));
4820
	if (status)
4821
		goto put_stateid;
4822

4823
	destroy_delegation(dp);
4824 4825
put_stateid:
	nfs4_put_stid(&dp->dl_stid);
L
Linus Torvalds 已提交
4826 4827 4828 4829 4830 4831 4832
out:
	return status;
}


#define LOFF_OVERFLOW(start, len)      ((u64)(len) > ~(u64)(start))

B
Benny Halevy 已提交
4833 4834 4835 4836 4837 4838 4839 4840 4841 4842 4843 4844 4845 4846 4847
static inline u64
end_offset(u64 start, u64 len)
{
	u64 end;

	end = start + len;
	return end >= start ? end: NFS4_MAX_UINT64;
}

/* last octet in a range */
static inline u64
last_byte_offset(u64 start, u64 len)
{
	u64 end;

4848
	WARN_ON_ONCE(!len);
B
Benny Halevy 已提交
4849 4850 4851 4852
	end = start + len;
	return end > start ? end - 1: NFS4_MAX_UINT64;
}

L
Linus Torvalds 已提交
4853 4854 4855 4856 4857 4858 4859 4860 4861 4862 4863 4864 4865 4866 4867 4868 4869
/*
 * TODO: Linux file offsets are _signed_ 64-bit quantities, which means that
 * we can't properly handle lock requests that go beyond the (2^63 - 1)-th
 * byte, because of sign extension problems.  Since NFSv4 calls for 64-bit
 * locking, this prevents us from being completely protocol-compliant.  The
 * real solution to this problem is to start using unsigned file offsets in
 * the VFS, but this is a very deep change!
 */
static inline void
nfs4_transform_lock_offset(struct file_lock *lock)
{
	if (lock->fl_start < 0)
		lock->fl_start = OFFSET_MAX;
	if (lock->fl_end < 0)
		lock->fl_end = OFFSET_MAX;
}

4870 4871
/* Hack!: For now, we're defining this just so we can use a pointer to it
 * as a unique cookie to identify our (NFSv4's) posix locks. */
4872
static const struct lock_manager_operations nfsd_posix_mng_ops  = {
4873
};
L
Linus Torvalds 已提交
4874 4875 4876 4877

static inline void
nfs4_set_lock_denied(struct file_lock *fl, struct nfsd4_lock_denied *deny)
{
4878
	struct nfs4_lockowner *lo;
L
Linus Torvalds 已提交
4879

4880
	if (fl->fl_lmops == &nfsd_posix_mng_ops) {
4881 4882 4883
		lo = (struct nfs4_lockowner *) fl->fl_owner;
		deny->ld_owner.data = kmemdup(lo->lo_owner.so_owner.data,
					lo->lo_owner.so_owner.len, GFP_KERNEL);
4884 4885 4886
		if (!deny->ld_owner.data)
			/* We just don't care that much */
			goto nevermind;
4887 4888
		deny->ld_owner.len = lo->lo_owner.so_owner.len;
		deny->ld_clientid = lo->lo_owner.so_client->cl_clientid;
4889
	} else {
4890 4891 4892
nevermind:
		deny->ld_owner.len = 0;
		deny->ld_owner.data = NULL;
4893 4894
		deny->ld_clientid.cl_boot = 0;
		deny->ld_clientid.cl_id = 0;
L
Linus Torvalds 已提交
4895 4896
	}
	deny->ld_start = fl->fl_start;
B
Benny Halevy 已提交
4897 4898
	deny->ld_length = NFS4_MAX_UINT64;
	if (fl->fl_end != NFS4_MAX_UINT64)
L
Linus Torvalds 已提交
4899 4900 4901 4902 4903 4904
		deny->ld_length = fl->fl_end - fl->fl_start + 1;        
	deny->ld_type = NFS4_READ_LT;
	if (fl->fl_type != F_RDLCK)
		deny->ld_type = NFS4_WRITE_LT;
}

4905
static struct nfs4_lockowner *
4906
find_lockowner_str_locked(clientid_t *clid, struct xdr_netobj *owner,
4907
		struct nfs4_client *clp)
L
Linus Torvalds 已提交
4908
{
4909
	unsigned int strhashval = ownerstr_hashval(owner);
4910
	struct nfs4_stateowner *so;
L
Linus Torvalds 已提交
4911

4912 4913
	lockdep_assert_held(&clp->cl_lock);

4914 4915
	list_for_each_entry(so, &clp->cl_ownerstr_hashtbl[strhashval],
			    so_strhash) {
4916 4917
		if (so->so_is_open_owner)
			continue;
4918
		if (!same_owner_str(so, owner))
4919
			continue;
4920
		atomic_inc(&so->so_count);
4921
		return lockowner(so);
L
Linus Torvalds 已提交
4922 4923 4924 4925
	}
	return NULL;
}

4926 4927
static struct nfs4_lockowner *
find_lockowner_str(clientid_t *clid, struct xdr_netobj *owner,
4928
		struct nfs4_client *clp)
4929 4930 4931
{
	struct nfs4_lockowner *lo;

4932 4933 4934
	spin_lock(&clp->cl_lock);
	lo = find_lockowner_str_locked(clid, owner, clp);
	spin_unlock(&clp->cl_lock);
4935 4936 4937
	return lo;
}

4938 4939
static void nfs4_unhash_lockowner(struct nfs4_stateowner *sop)
{
4940
	unhash_lockowner_locked(lockowner(sop));
4941 4942
}

4943 4944 4945 4946 4947 4948 4949 4950
static void nfs4_free_lockowner(struct nfs4_stateowner *sop)
{
	struct nfs4_lockowner *lo = lockowner(sop);

	kmem_cache_free(lockowner_slab, lo);
}

static const struct nfs4_stateowner_operations lockowner_ops = {
4951 4952
	.so_unhash =	nfs4_unhash_lockowner,
	.so_free =	nfs4_free_lockowner,
4953 4954
};

L
Linus Torvalds 已提交
4955 4956 4957
/*
 * Alloc a lock owner structure.
 * Called in nfsd4_lock - therefore, OPEN and OPEN_CONFIRM (if needed) has 
L
Lucas De Marchi 已提交
4958
 * occurred. 
L
Linus Torvalds 已提交
4959
 *
4960
 * strhashval = ownerstr_hashval
L
Linus Torvalds 已提交
4961
 */
4962
static struct nfs4_lockowner *
4963 4964 4965 4966 4967
alloc_init_lock_stateowner(unsigned int strhashval, struct nfs4_client *clp,
			   struct nfs4_ol_stateid *open_stp,
			   struct nfsd4_lock *lock)
{
	struct nfs4_lockowner *lo, *ret;
L
Linus Torvalds 已提交
4968

4969 4970
	lo = alloc_stateowner(lockowner_slab, &lock->lk_new_owner, clp);
	if (!lo)
L
Linus Torvalds 已提交
4971
		return NULL;
4972 4973
	INIT_LIST_HEAD(&lo->lo_owner.so_stateids);
	lo->lo_owner.so_is_open_owner = 0;
4974
	lo->lo_owner.so_seqid = lock->lk_new_lock_seqid;
4975
	lo->lo_owner.so_ops = &lockowner_ops;
4976
	spin_lock(&clp->cl_lock);
4977
	ret = find_lockowner_str_locked(&clp->cl_clientid,
4978
			&lock->lk_new_owner, clp);
4979 4980
	if (ret == NULL) {
		list_add(&lo->lo_owner.so_strhash,
4981
			 &clp->cl_ownerstr_hashtbl[strhashval]);
4982 4983 4984
		ret = lo;
	} else
		nfs4_free_lockowner(&lo->lo_owner);
4985
	spin_unlock(&clp->cl_lock);
4986
	return lo;
L
Linus Torvalds 已提交
4987 4988
}

4989 4990 4991 4992
static void
init_lock_stateid(struct nfs4_ol_stateid *stp, struct nfs4_lockowner *lo,
		  struct nfs4_file *fp, struct inode *inode,
		  struct nfs4_ol_stateid *open_stp)
L
Linus Torvalds 已提交
4993
{
4994
	struct nfs4_client *clp = lo->lo_owner.so_client;
L
Linus Torvalds 已提交
4995

4996 4997
	lockdep_assert_held(&clp->cl_lock);

4998
	atomic_inc(&stp->st_stid.sc_count);
J
J. Bruce Fields 已提交
4999
	stp->st_stid.sc_type = NFS4_LOCK_STID;
5000
	stp->st_stateowner = &lo->lo_owner;
5001
	atomic_inc(&lo->lo_owner.so_count);
5002
	get_nfs4_file(fp);
5003
	stp->st_stid.sc_file = fp;
5004
	stp->st_stid.sc_free = nfs4_free_lock_stateid;
J
J. Bruce Fields 已提交
5005
	stp->st_access_bmap = 0;
L
Linus Torvalds 已提交
5006
	stp->st_deny_bmap = open_stp->st_deny_bmap;
5007
	stp->st_openstp = open_stp;
5008
	list_add(&stp->st_locks, &open_stp->st_locks);
5009
	list_add(&stp->st_perstateowner, &lo->lo_owner.so_stateids);
5010 5011 5012
	spin_lock(&fp->fi_lock);
	list_add(&stp->st_perfile, &fp->fi_stateids);
	spin_unlock(&fp->fi_lock);
L
Linus Torvalds 已提交
5013 5014
}

5015 5016 5017 5018
static struct nfs4_ol_stateid *
find_lock_stateid(struct nfs4_lockowner *lo, struct nfs4_file *fp)
{
	struct nfs4_ol_stateid *lst;
5019 5020 5021
	struct nfs4_client *clp = lo->lo_owner.so_client;

	lockdep_assert_held(&clp->cl_lock);
5022 5023

	list_for_each_entry(lst, &lo->lo_owner.so_stateids, st_perstateowner) {
5024 5025
		if (lst->st_stid.sc_file == fp) {
			atomic_inc(&lst->st_stid.sc_count);
5026
			return lst;
5027
		}
5028 5029 5030 5031
	}
	return NULL;
}

5032 5033 5034 5035 5036 5037 5038 5039 5040 5041 5042 5043 5044 5045 5046 5047 5048 5049 5050 5051 5052 5053 5054 5055 5056 5057 5058 5059 5060 5061 5062 5063
static struct nfs4_ol_stateid *
find_or_create_lock_stateid(struct nfs4_lockowner *lo, struct nfs4_file *fi,
			    struct inode *inode, struct nfs4_ol_stateid *ost,
			    bool *new)
{
	struct nfs4_stid *ns = NULL;
	struct nfs4_ol_stateid *lst;
	struct nfs4_openowner *oo = openowner(ost->st_stateowner);
	struct nfs4_client *clp = oo->oo_owner.so_client;

	spin_lock(&clp->cl_lock);
	lst = find_lock_stateid(lo, fi);
	if (lst == NULL) {
		spin_unlock(&clp->cl_lock);
		ns = nfs4_alloc_stid(clp, stateid_slab);
		if (ns == NULL)
			return NULL;

		spin_lock(&clp->cl_lock);
		lst = find_lock_stateid(lo, fi);
		if (likely(!lst)) {
			lst = openlockstateid(ns);
			init_lock_stateid(lst, lo, fi, inode, ost);
			ns = NULL;
			*new = true;
		}
	}
	spin_unlock(&clp->cl_lock);
	if (ns)
		nfs4_put_stid(ns);
	return lst;
}
5064

5065
static int
L
Linus Torvalds 已提交
5066 5067
check_lock_length(u64 offset, u64 length)
{
B
Benny Halevy 已提交
5068
	return ((length == 0)  || ((length != NFS4_MAX_UINT64) &&
L
Linus Torvalds 已提交
5069 5070 5071
	     LOFF_OVERFLOW(offset, length)));
}

5072
static void get_lock_access(struct nfs4_ol_stateid *lock_stp, u32 access)
J
J. Bruce Fields 已提交
5073
{
5074
	struct nfs4_file *fp = lock_stp->st_stid.sc_file;
J
J. Bruce Fields 已提交
5075

5076 5077
	lockdep_assert_held(&fp->fi_lock);

5078
	if (test_access(access, lock_stp))
J
J. Bruce Fields 已提交
5079
		return;
5080
	__nfs4_file_get_access(fp, access);
5081
	set_access(access, lock_stp);
J
J. Bruce Fields 已提交
5082 5083
}

5084 5085 5086 5087 5088
static __be32
lookup_or_create_lock_state(struct nfsd4_compound_state *cstate,
			    struct nfs4_ol_stateid *ost,
			    struct nfsd4_lock *lock,
			    struct nfs4_ol_stateid **lst, bool *new)
5089
{
5090
	__be32 status;
5091
	struct nfs4_file *fi = ost->st_stid.sc_file;
5092 5093
	struct nfs4_openowner *oo = openowner(ost->st_stateowner);
	struct nfs4_client *cl = oo->oo_owner.so_client;
5094
	struct inode *inode = cstate->current_fh.fh_dentry->d_inode;
5095 5096 5097
	struct nfs4_lockowner *lo;
	unsigned int strhashval;

5098
	lo = find_lockowner_str(&cl->cl_clientid, &lock->v.new.owner, cl);
5099
	if (!lo) {
5100
		strhashval = ownerstr_hashval(&lock->v.new.owner);
5101 5102 5103 5104 5105
		lo = alloc_init_lock_stateowner(strhashval, cl, ost, lock);
		if (lo == NULL)
			return nfserr_jukebox;
	} else {
		/* with an existing lockowner, seqids must be the same */
5106
		status = nfserr_bad_seqid;
5107 5108
		if (!cstate->minorversion &&
		    lock->lk_new_lock_seqid != lo->lo_owner.so_seqid)
5109
			goto out;
5110
	}
5111

5112
	*lst = find_or_create_lock_stateid(lo, fi, inode, ost, new);
5113
	if (*lst == NULL) {
5114 5115
		status = nfserr_jukebox;
		goto out;
5116
	}
5117 5118 5119 5120
	status = nfs_ok;
out:
	nfs4_put_stateowner(&lo->lo_owner);
	return status;
5121 5122
}

L
Linus Torvalds 已提交
5123 5124 5125
/*
 *  LOCK operation 
 */
5126
__be32
5127
nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
5128
	   struct nfsd4_lock *lock)
L
Linus Torvalds 已提交
5129
{
5130 5131
	struct nfs4_openowner *open_sop = NULL;
	struct nfs4_lockowner *lock_sop = NULL;
5132
	struct nfs4_ol_stateid *lock_stp = NULL;
5133
	struct nfs4_ol_stateid *open_stp = NULL;
5134
	struct nfs4_file *fp;
5135
	struct file *filp = NULL;
5136 5137
	struct file_lock *file_lock = NULL;
	struct file_lock *conflock = NULL;
5138
	__be32 status = 0;
5139
	int lkflg;
5140
	int err;
5141
	bool new = false;
5142 5143
	struct net *net = SVC_NET(rqstp);
	struct nfsd_net *nn = net_generic(net, nfsd_net_id);
L
Linus Torvalds 已提交
5144 5145 5146 5147 5148 5149 5150 5151

	dprintk("NFSD: nfsd4_lock: start=%Ld length=%Ld\n",
		(long long) lock->lk_offset,
		(long long) lock->lk_length);

	if (check_lock_length(lock->lk_offset, lock->lk_length))
		 return nfserr_inval;

5152
	if ((status = fh_verify(rqstp, &cstate->current_fh,
M
Miklos Szeredi 已提交
5153
				S_IFREG, NFSD_MAY_LOCK))) {
A
Andy Adamson 已提交
5154 5155 5156 5157
		dprintk("NFSD: nfsd4_lock: permission denied!\n");
		return status;
	}

L
Linus Torvalds 已提交
5158
	if (lock->lk_is_new) {
5159 5160 5161 5162 5163 5164
		if (nfsd4_has_session(cstate))
			/* See rfc 5661 18.10.3: given clientid is ignored: */
			memcpy(&lock->v.new.clientid,
				&cstate->session->se_client->cl_clientid,
				sizeof(clientid_t));

L
Linus Torvalds 已提交
5165
		status = nfserr_stale_clientid;
5166
		if (STALE_CLIENTID(&lock->lk_new_clientid, nn))
L
Linus Torvalds 已提交
5167 5168 5169
			goto out;

		/* validate and update open stateid and open seqid */
5170
		status = nfs4_preprocess_confirmed_seqid_op(cstate,
L
Linus Torvalds 已提交
5171 5172
				        lock->lk_new_open_seqid,
		                        &lock->lk_new_open_stateid,
5173
					&open_stp, nn);
5174
		if (status)
L
Linus Torvalds 已提交
5175
			goto out;
5176
		open_sop = openowner(open_stp->st_stateowner);
5177
		status = nfserr_bad_stateid;
5178
		if (!same_clid(&open_sop->oo_owner.so_client->cl_clientid,
5179 5180
						&lock->v.new.clientid))
			goto out;
5181
		status = lookup_or_create_lock_state(cstate, open_stp, lock,
5182
							&lock_stp, &new);
5183
	} else {
5184
		status = nfs4_preprocess_seqid_op(cstate,
5185 5186
				       lock->lk_old_lock_seqid,
				       &lock->lk_old_lock_stateid,
5187
				       NFS4_LOCK_STID, &lock_stp, nn);
5188
	}
J
J. Bruce Fields 已提交
5189 5190
	if (status)
		goto out;
5191
	lock_sop = lockowner(lock_stp->st_stateowner);
L
Linus Torvalds 已提交
5192

5193 5194 5195 5196 5197
	lkflg = setlkflg(lock->lk_type);
	status = nfs4_check_openmode(lock_stp, lkflg);
	if (status)
		goto out;

5198
	status = nfserr_grace;
5199
	if (locks_in_grace(net) && !lock->lk_reclaim)
5200 5201
		goto out;
	status = nfserr_no_grace;
5202
	if (!locks_in_grace(net) && lock->lk_reclaim)
5203 5204
		goto out;

5205 5206 5207 5208 5209 5210 5211
	file_lock = locks_alloc_lock();
	if (!file_lock) {
		dprintk("NFSD: %s: unable to allocate lock!\n", __func__);
		status = nfserr_jukebox;
		goto out;
	}

5212
	fp = lock_stp->st_stid.sc_file;
5213
	locks_init_lock(file_lock);
L
Linus Torvalds 已提交
5214 5215 5216
	switch (lock->lk_type) {
		case NFS4_READ_LT:
		case NFS4_READW_LT:
5217 5218
			spin_lock(&fp->fi_lock);
			filp = find_readable_file_locked(fp);
J
J. Bruce Fields 已提交
5219 5220
			if (filp)
				get_lock_access(lock_stp, NFS4_SHARE_ACCESS_READ);
5221
			spin_unlock(&fp->fi_lock);
5222
			file_lock->fl_type = F_RDLCK;
5223
			break;
L
Linus Torvalds 已提交
5224 5225
		case NFS4_WRITE_LT:
		case NFS4_WRITEW_LT:
5226 5227
			spin_lock(&fp->fi_lock);
			filp = find_writeable_file_locked(fp);
J
J. Bruce Fields 已提交
5228 5229
			if (filp)
				get_lock_access(lock_stp, NFS4_SHARE_ACCESS_WRITE);
5230
			spin_unlock(&fp->fi_lock);
5231
			file_lock->fl_type = F_WRLCK;
5232
			break;
L
Linus Torvalds 已提交
5233 5234 5235 5236
		default:
			status = nfserr_inval;
		goto out;
	}
5237 5238 5239 5240
	if (!filp) {
		status = nfserr_openmode;
		goto out;
	}
5241 5242 5243 5244 5245 5246 5247 5248 5249 5250 5251 5252 5253 5254 5255
	file_lock->fl_owner = (fl_owner_t)lock_sop;
	file_lock->fl_pid = current->tgid;
	file_lock->fl_file = filp;
	file_lock->fl_flags = FL_POSIX;
	file_lock->fl_lmops = &nfsd_posix_mng_ops;
	file_lock->fl_start = lock->lk_offset;
	file_lock->fl_end = last_byte_offset(lock->lk_offset, lock->lk_length);
	nfs4_transform_lock_offset(file_lock);

	conflock = locks_alloc_lock();
	if (!conflock) {
		dprintk("NFSD: %s: unable to allocate lock!\n", __func__);
		status = nfserr_jukebox;
		goto out;
	}
L
Linus Torvalds 已提交
5256

5257
	err = vfs_lock_file(filp, F_SETLK, file_lock, conflock);
5258
	switch (-err) {
L
Linus Torvalds 已提交
5259
	case 0: /* success! */
5260 5261
		update_stateid(&lock_stp->st_stid.sc_stateid);
		memcpy(&lock->lk_resp_stateid, &lock_stp->st_stid.sc_stateid, 
L
Linus Torvalds 已提交
5262
				sizeof(stateid_t));
5263
		status = 0;
5264 5265 5266 5267
		break;
	case (EAGAIN):		/* conflock holds conflicting lock */
		status = nfserr_denied;
		dprintk("NFSD: nfsd4_lock: conflicting lock found!\n");
5268
		nfs4_set_lock_denied(conflock, &lock->lk_denied);
5269
		break;
L
Linus Torvalds 已提交
5270 5271
	case (EDEADLK):
		status = nfserr_deadlock;
5272
		break;
5273
	default:
5274
		dprintk("NFSD: nfsd4_lock: vfs_lock_file() failed! status %d\n",err);
5275
		status = nfserrno(err);
5276
		break;
L
Linus Torvalds 已提交
5277 5278
	}
out:
5279 5280
	if (filp)
		fput(filp);
5281 5282 5283 5284 5285 5286 5287 5288 5289 5290 5291 5292 5293 5294
	if (lock_stp) {
		/* Bump seqid manually if the 4.0 replay owner is openowner */
		if (cstate->replay_owner &&
		    cstate->replay_owner != &lock_sop->lo_owner &&
		    seqid_mutating_err(ntohl(status)))
			lock_sop->lo_owner.so_seqid++;

		/*
		 * If this is a new, never-before-used stateid, and we are
		 * returning an error, then just go ahead and release it.
		 */
		if (status && new)
			release_lock_stateid(lock_stp);

5295
		nfs4_put_stid(&lock_stp->st_stid);
5296
	}
5297 5298
	if (open_stp)
		nfs4_put_stid(&open_stp->st_stid);
5299
	nfsd4_bump_seqid(cstate, status);
5300 5301 5302 5303
	if (file_lock)
		locks_free_lock(file_lock);
	if (conflock)
		locks_free_lock(conflock);
L
Linus Torvalds 已提交
5304 5305 5306
	return status;
}

5307 5308 5309 5310 5311 5312
/*
 * The NFSv4 spec allows a client to do a LOCKT without holding an OPEN,
 * so we do a temporary open here just to get an open file to pass to
 * vfs_test_lock.  (Arguably perhaps test_lock should be done with an
 * inode operation.)
 */
5313
static __be32 nfsd_test_lock(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file_lock *lock)
5314 5315
{
	struct file *file;
5316 5317 5318 5319 5320
	__be32 err = nfsd_open(rqstp, fhp, S_IFREG, NFSD_MAY_READ, &file);
	if (!err) {
		err = nfserrno(vfs_test_lock(file, lock));
		nfsd_close(file);
	}
5321 5322 5323
	return err;
}

L
Linus Torvalds 已提交
5324 5325 5326
/*
 * LOCKT operation
 */
5327
__be32
5328 5329
nfsd4_lockt(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
	    struct nfsd4_lockt *lockt)
L
Linus Torvalds 已提交
5330
{
5331
	struct file_lock *file_lock = NULL;
5332
	struct nfs4_lockowner *lo = NULL;
5333
	__be32 status;
5334
	struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id);
L
Linus Torvalds 已提交
5335

5336
	if (locks_in_grace(SVC_NET(rqstp)))
L
Linus Torvalds 已提交
5337 5338 5339 5340 5341
		return nfserr_grace;

	if (check_lock_length(lockt->lt_offset, lockt->lt_length))
		 return nfserr_inval;

5342
	if (!nfsd4_has_session(cstate)) {
5343
		status = lookup_clientid(&lockt->lt_clientid, cstate, nn);
5344 5345 5346
		if (status)
			goto out;
	}
L
Linus Torvalds 已提交
5347

5348
	if ((status = fh_verify(rqstp, &cstate->current_fh, S_IFREG, 0)))
L
Linus Torvalds 已提交
5349 5350
		goto out;

5351 5352 5353 5354 5355 5356 5357
	file_lock = locks_alloc_lock();
	if (!file_lock) {
		dprintk("NFSD: %s: unable to allocate lock!\n", __func__);
		status = nfserr_jukebox;
		goto out;
	}
	locks_init_lock(file_lock);
L
Linus Torvalds 已提交
5358 5359 5360
	switch (lockt->lt_type) {
		case NFS4_READ_LT:
		case NFS4_READW_LT:
5361
			file_lock->fl_type = F_RDLCK;
L
Linus Torvalds 已提交
5362 5363 5364
		break;
		case NFS4_WRITE_LT:
		case NFS4_WRITEW_LT:
5365
			file_lock->fl_type = F_WRLCK;
L
Linus Torvalds 已提交
5366 5367
		break;
		default:
5368
			dprintk("NFSD: nfs4_lockt: bad lock type!\n");
L
Linus Torvalds 已提交
5369 5370 5371 5372
			status = nfserr_inval;
		goto out;
	}

5373 5374
	lo = find_lockowner_str(&lockt->lt_clientid, &lockt->lt_owner,
				cstate->clp);
5375
	if (lo)
5376 5377 5378
		file_lock->fl_owner = (fl_owner_t)lo;
	file_lock->fl_pid = current->tgid;
	file_lock->fl_flags = FL_POSIX;
L
Linus Torvalds 已提交
5379

5380 5381
	file_lock->fl_start = lockt->lt_offset;
	file_lock->fl_end = last_byte_offset(lockt->lt_offset, lockt->lt_length);
L
Linus Torvalds 已提交
5382

5383
	nfs4_transform_lock_offset(file_lock);
L
Linus Torvalds 已提交
5384

5385
	status = nfsd_test_lock(rqstp, &cstate->current_fh, file_lock);
5386
	if (status)
5387
		goto out;
5388

5389
	if (file_lock->fl_type != F_UNLCK) {
L
Linus Torvalds 已提交
5390
		status = nfserr_denied;
5391
		nfs4_set_lock_denied(file_lock, &lockt->lt_denied);
L
Linus Torvalds 已提交
5392 5393
	}
out:
5394 5395
	if (lo)
		nfs4_put_stateowner(&lo->lo_owner);
5396 5397
	if (file_lock)
		locks_free_lock(file_lock);
L
Linus Torvalds 已提交
5398 5399 5400
	return status;
}

5401
__be32
5402
nfsd4_locku(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
5403
	    struct nfsd4_locku *locku)
L
Linus Torvalds 已提交
5404
{
5405
	struct nfs4_ol_stateid *stp;
L
Linus Torvalds 已提交
5406
	struct file *filp = NULL;
5407
	struct file_lock *file_lock = NULL;
5408
	__be32 status;
5409
	int err;
5410 5411
	struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id);

L
Linus Torvalds 已提交
5412 5413 5414 5415 5416 5417 5418
	dprintk("NFSD: nfsd4_locku: start=%Ld length=%Ld\n",
		(long long) locku->lu_offset,
		(long long) locku->lu_length);

	if (check_lock_length(locku->lu_offset, locku->lu_length))
		 return nfserr_inval;

5419
	status = nfs4_preprocess_seqid_op(cstate, locku->lu_seqid,
5420 5421
					&locku->lu_stateid, NFS4_LOCK_STID,
					&stp, nn);
5422
	if (status)
L
Linus Torvalds 已提交
5423
		goto out;
5424
	filp = find_any_file(stp->st_stid.sc_file);
5425 5426
	if (!filp) {
		status = nfserr_lock_range;
5427
		goto put_stateid;
5428
	}
5429 5430 5431 5432
	file_lock = locks_alloc_lock();
	if (!file_lock) {
		dprintk("NFSD: %s: unable to allocate lock!\n", __func__);
		status = nfserr_jukebox;
5433
		goto fput;
5434 5435 5436
	}
	locks_init_lock(file_lock);
	file_lock->fl_type = F_UNLCK;
5437
	file_lock->fl_owner = (fl_owner_t)lockowner(stp->st_stateowner);
5438 5439 5440 5441 5442 5443 5444 5445 5446
	file_lock->fl_pid = current->tgid;
	file_lock->fl_file = filp;
	file_lock->fl_flags = FL_POSIX;
	file_lock->fl_lmops = &nfsd_posix_mng_ops;
	file_lock->fl_start = locku->lu_offset;

	file_lock->fl_end = last_byte_offset(locku->lu_offset,
						locku->lu_length);
	nfs4_transform_lock_offset(file_lock);
L
Linus Torvalds 已提交
5447

5448
	err = vfs_lock_file(filp, F_SETLK, file_lock, NULL);
5449
	if (err) {
5450
		dprintk("NFSD: nfs4_locku: vfs_lock_file failed!\n");
L
Linus Torvalds 已提交
5451 5452
		goto out_nfserr;
	}
5453 5454
	update_stateid(&stp->st_stid.sc_stateid);
	memcpy(&locku->lu_stateid, &stp->st_stid.sc_stateid, sizeof(stateid_t));
5455 5456
fput:
	fput(filp);
5457 5458
put_stateid:
	nfs4_put_stid(&stp->st_stid);
L
Linus Torvalds 已提交
5459
out:
5460
	nfsd4_bump_seqid(cstate, status);
5461 5462
	if (file_lock)
		locks_free_lock(file_lock);
L
Linus Torvalds 已提交
5463 5464 5465
	return status;

out_nfserr:
5466
	status = nfserrno(err);
5467
	goto fput;
L
Linus Torvalds 已提交
5468 5469 5470 5471
}

/*
 * returns
5472 5473
 * 	true:  locks held by lockowner
 * 	false: no locks held by lockowner
L
Linus Torvalds 已提交
5474
 */
5475 5476
static bool
check_for_locks(struct nfs4_file *fp, struct nfs4_lockowner *lowner)
L
Linus Torvalds 已提交
5477 5478
{
	struct file_lock **flpp;
5479 5480 5481 5482 5483 5484 5485 5486 5487 5488 5489
	int status = false;
	struct file *filp = find_any_file(fp);
	struct inode *inode;

	if (!filp) {
		/* Any valid lock stateid should have some sort of access */
		WARN_ON_ONCE(1);
		return status;
	}

	inode = file_inode(filp);
L
Linus Torvalds 已提交
5490

5491
	spin_lock(&inode->i_lock);
L
Linus Torvalds 已提交
5492
	for (flpp = &inode->i_flock; *flpp != NULL; flpp = &(*flpp)->fl_next) {
5493
		if ((*flpp)->fl_owner == (fl_owner_t)lowner) {
5494 5495
			status = true;
			break;
5496
		}
L
Linus Torvalds 已提交
5497
	}
5498
	spin_unlock(&inode->i_lock);
5499
	fput(filp);
L
Linus Torvalds 已提交
5500 5501 5502
	return status;
}

5503
__be32
5504 5505 5506
nfsd4_release_lockowner(struct svc_rqst *rqstp,
			struct nfsd4_compound_state *cstate,
			struct nfsd4_release_lockowner *rlockowner)
L
Linus Torvalds 已提交
5507 5508
{
	clientid_t *clid = &rlockowner->rl_clientid;
5509 5510
	struct nfs4_stateowner *sop;
	struct nfs4_lockowner *lo = NULL;
5511
	struct nfs4_ol_stateid *stp;
L
Linus Torvalds 已提交
5512
	struct xdr_netobj *owner = &rlockowner->rl_owner;
5513
	unsigned int hashval = ownerstr_hashval(owner);
5514
	__be32 status;
5515
	struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id);
5516
	struct nfs4_client *clp;
L
Linus Torvalds 已提交
5517 5518 5519 5520

	dprintk("nfsd4_release_lockowner clientid: (%08x/%08x):\n",
		clid->cl_boot, clid->cl_id);

5521
	status = lookup_clientid(clid, cstate, nn);
5522
	if (status)
5523
		return status;
5524

5525
	clp = cstate->clp;
5526
	/* Find the matching lock stateowner */
5527
	spin_lock(&clp->cl_lock);
5528
	list_for_each_entry(sop, &clp->cl_ownerstr_hashtbl[hashval],
5529
			    so_strhash) {
5530

5531 5532
		if (sop->so_is_open_owner || !same_owner_str(sop, owner))
			continue;
5533

5534 5535 5536 5537 5538 5539
		/* see if there are still any locks associated with it */
		lo = lockowner(sop);
		list_for_each_entry(stp, &sop->so_stateids, st_perstateowner) {
			if (check_for_locks(stp->st_stid.sc_file, lo)) {
				status = nfserr_locks_held;
				spin_unlock(&clp->cl_lock);
5540
				return status;
5541
			}
5542
		}
5543 5544 5545

		atomic_inc(&sop->so_count);
		break;
L
Linus Torvalds 已提交
5546
	}
5547
	spin_unlock(&clp->cl_lock);
5548 5549
	if (lo)
		release_lockowner(lo);
L
Linus Torvalds 已提交
5550 5551 5552 5553
	return status;
}

static inline struct nfs4_client_reclaim *
N
NeilBrown 已提交
5554
alloc_reclaim(void)
L
Linus Torvalds 已提交
5555
{
N
NeilBrown 已提交
5556
	return kmalloc(sizeof(struct nfs4_client_reclaim), GFP_KERNEL);
L
Linus Torvalds 已提交
5557 5558
}

5559
bool
5560
nfs4_has_reclaimed_state(const char *name, struct nfsd_net *nn)
5561
{
5562
	struct nfs4_client_reclaim *crp;
5563

5564
	crp = nfsd4_find_reclaim_client(name, nn);
5565
	return (crp && crp->cr_clp);
5566 5567
}

L
Linus Torvalds 已提交
5568 5569 5570
/*
 * failure => all reset bets are off, nfserr_no_grace...
 */
5571
struct nfs4_client_reclaim *
5572
nfs4_client_to_reclaim(const char *name, struct nfsd_net *nn)
L
Linus Torvalds 已提交
5573 5574
{
	unsigned int strhashval;
5575
	struct nfs4_client_reclaim *crp;
L
Linus Torvalds 已提交
5576

N
NeilBrown 已提交
5577 5578
	dprintk("NFSD nfs4_client_to_reclaim NAME: %.*s\n", HEXDIR_LEN, name);
	crp = alloc_reclaim();
5579 5580 5581
	if (crp) {
		strhashval = clientstr_hashval(name);
		INIT_LIST_HEAD(&crp->cr_strhash);
5582
		list_add(&crp->cr_strhash, &nn->reclaim_str_hashtbl[strhashval]);
5583
		memcpy(crp->cr_recdir, name, HEXDIR_LEN);
5584
		crp->cr_clp = NULL;
5585
		nn->reclaim_str_hashtbl_size++;
5586 5587
	}
	return crp;
L
Linus Torvalds 已提交
5588 5589
}

5590
void
5591
nfs4_remove_reclaim_record(struct nfs4_client_reclaim *crp, struct nfsd_net *nn)
5592 5593 5594
{
	list_del(&crp->cr_strhash);
	kfree(crp);
5595
	nn->reclaim_str_hashtbl_size--;
5596 5597
}

5598
void
5599
nfs4_release_reclaim(struct nfsd_net *nn)
L
Linus Torvalds 已提交
5600 5601 5602 5603 5604
{
	struct nfs4_client_reclaim *crp = NULL;
	int i;

	for (i = 0; i < CLIENT_HASH_SIZE; i++) {
5605 5606
		while (!list_empty(&nn->reclaim_str_hashtbl[i])) {
			crp = list_entry(nn->reclaim_str_hashtbl[i].next,
L
Linus Torvalds 已提交
5607
			                struct nfs4_client_reclaim, cr_strhash);
5608
			nfs4_remove_reclaim_record(crp, nn);
L
Linus Torvalds 已提交
5609 5610
		}
	}
5611
	WARN_ON_ONCE(nn->reclaim_str_hashtbl_size);
L
Linus Torvalds 已提交
5612 5613 5614 5615
}

/*
 * called from OPEN, CLAIM_PREVIOUS with a new clientid. */
5616
struct nfs4_client_reclaim *
5617
nfsd4_find_reclaim_client(const char *recdir, struct nfsd_net *nn)
L
Linus Torvalds 已提交
5618 5619 5620 5621
{
	unsigned int strhashval;
	struct nfs4_client_reclaim *crp = NULL;

5622
	dprintk("NFSD: nfs4_find_reclaim_client for recdir %s\n", recdir);
L
Linus Torvalds 已提交
5623

5624
	strhashval = clientstr_hashval(recdir);
5625
	list_for_each_entry(crp, &nn->reclaim_str_hashtbl[strhashval], cr_strhash) {
5626
		if (same_name(crp->cr_recdir, recdir)) {
L
Linus Torvalds 已提交
5627 5628 5629 5630 5631 5632 5633 5634 5635
			return crp;
		}
	}
	return NULL;
}

/*
* Called from OPEN. Look for clientid in reclaim list.
*/
5636
__be32
5637 5638 5639
nfs4_check_open_reclaim(clientid_t *clid,
		struct nfsd4_compound_state *cstate,
		struct nfsd_net *nn)
L
Linus Torvalds 已提交
5640
{
5641
	__be32 status;
5642 5643

	/* find clientid in conf_id_hashtbl */
5644 5645
	status = lookup_clientid(clid, cstate, nn);
	if (status)
5646 5647
		return nfserr_reclaim_bad;

5648 5649 5650 5651
	if (nfsd4_client_record_check(cstate->clp))
		return nfserr_reclaim_bad;

	return nfs_ok;
L
Linus Torvalds 已提交
5652 5653
}

B
Bryan Schumaker 已提交
5654
#ifdef CONFIG_NFSD_FAULT_INJECTION
5655 5656 5657 5658 5659 5660
static inline void
put_client(struct nfs4_client *clp)
{
	atomic_dec(&clp->cl_refcount);
}

5661 5662 5663 5664 5665 5666 5667 5668 5669 5670 5671 5672 5673 5674 5675 5676 5677
static struct nfs4_client *
nfsd_find_client(struct sockaddr_storage *addr, size_t addr_size)
{
	struct nfs4_client *clp;
	struct nfsd_net *nn = net_generic(current->nsproxy->net_ns,
					  nfsd_net_id);

	if (!nfsd_netns_ready(nn))
		return NULL;

	list_for_each_entry(clp, &nn->client_lru, cl_lru) {
		if (memcmp(&clp->cl_addr, addr, addr_size) == 0)
			return clp;
	}
	return NULL;
}

5678
u64
5679
nfsd_inject_print_clients(void)
5680 5681 5682 5683 5684 5685 5686 5687 5688 5689 5690 5691 5692 5693 5694 5695 5696 5697 5698 5699
{
	struct nfs4_client *clp;
	u64 count = 0;
	struct nfsd_net *nn = net_generic(current->nsproxy->net_ns,
					  nfsd_net_id);
	char buf[INET6_ADDRSTRLEN];

	if (!nfsd_netns_ready(nn))
		return 0;

	spin_lock(&nn->client_lock);
	list_for_each_entry(clp, &nn->client_lru, cl_lru) {
		rpc_ntop((struct sockaddr *)&clp->cl_addr, buf, sizeof(buf));
		pr_info("NFS Client: %s\n", buf);
		++count;
	}
	spin_unlock(&nn->client_lock);

	return count;
}
B
Bryan Schumaker 已提交
5700

5701
u64
5702
nfsd_inject_forget_client(struct sockaddr_storage *addr, size_t addr_size)
5703 5704 5705 5706 5707 5708 5709 5710 5711 5712 5713 5714 5715 5716 5717 5718 5719 5720 5721 5722 5723 5724 5725 5726 5727
{
	u64 count = 0;
	struct nfs4_client *clp;
	struct nfsd_net *nn = net_generic(current->nsproxy->net_ns,
					  nfsd_net_id);

	if (!nfsd_netns_ready(nn))
		return count;

	spin_lock(&nn->client_lock);
	clp = nfsd_find_client(addr, addr_size);
	if (clp) {
		if (mark_client_expired_locked(clp) == nfs_ok)
			++count;
		else
			clp = NULL;
	}
	spin_unlock(&nn->client_lock);

	if (clp)
		expire_client(clp);

	return count;
}

5728
u64
5729
nfsd_inject_forget_clients(u64 max)
5730 5731 5732 5733 5734 5735 5736 5737 5738 5739 5740 5741 5742 5743 5744 5745 5746 5747 5748 5749 5750 5751 5752 5753 5754 5755
{
	u64 count = 0;
	struct nfs4_client *clp, *next;
	struct nfsd_net *nn = net_generic(current->nsproxy->net_ns,
						nfsd_net_id);
	LIST_HEAD(reaplist);

	if (!nfsd_netns_ready(nn))
		return count;

	spin_lock(&nn->client_lock);
	list_for_each_entry_safe(clp, next, &nn->client_lru, cl_lru) {
		if (mark_client_expired_locked(clp) == nfs_ok) {
			list_add(&clp->cl_lru, &reaplist);
			if (max != 0 && ++count >= max)
				break;
		}
	}
	spin_unlock(&nn->client_lock);

	list_for_each_entry_safe(clp, next, &reaplist, cl_lru)
		expire_client(clp);

	return count;
}

5756 5757 5758 5759
static void nfsd_print_count(struct nfs4_client *clp, unsigned int count,
			     const char *type)
{
	char buf[INET6_ADDRSTRLEN];
5760
	rpc_ntop((struct sockaddr *)&clp->cl_addr, buf, sizeof(buf));
5761 5762 5763
	printk(KERN_INFO "NFS Client: %s has %u %s\n", buf, count, type);
}

5764 5765 5766 5767 5768 5769 5770 5771 5772 5773 5774 5775 5776 5777 5778 5779
static void
nfsd_inject_add_lock_to_list(struct nfs4_ol_stateid *lst,
			     struct list_head *collect)
{
	struct nfs4_client *clp = lst->st_stid.sc_client;
	struct nfsd_net *nn = net_generic(current->nsproxy->net_ns,
					  nfsd_net_id);

	if (!collect)
		return;

	lockdep_assert_held(&nn->client_lock);
	atomic_inc(&clp->cl_refcount);
	list_add(&lst->st_locks, collect);
}

5780
static u64 nfsd_foreach_client_lock(struct nfs4_client *clp, u64 max,
5781
				    struct list_head *collect,
5782
				    void (*func)(struct nfs4_ol_stateid *))
B
Bryan Schumaker 已提交
5783 5784 5785
{
	struct nfs4_openowner *oop;
	struct nfs4_ol_stateid *stp, *st_next;
5786
	struct nfs4_ol_stateid *lst, *lst_next;
B
Bryan Schumaker 已提交
5787 5788
	u64 count = 0;

5789
	spin_lock(&clp->cl_lock);
B
Bryan Schumaker 已提交
5790
	list_for_each_entry(oop, &clp->cl_openowners, oo_perclient) {
5791 5792 5793 5794
		list_for_each_entry_safe(stp, st_next,
				&oop->oo_owner.so_stateids, st_perstateowner) {
			list_for_each_entry_safe(lst, lst_next,
					&stp->st_locks, st_locks) {
5795
				if (func) {
5796
					func(lst);
5797 5798
					nfsd_inject_add_lock_to_list(lst,
								collect);
5799
				}
5800 5801 5802 5803 5804 5805 5806 5807 5808 5809 5810
				++count;
				/*
				 * Despite the fact that these functions deal
				 * with 64-bit integers for "count", we must
				 * ensure that it doesn't blow up the
				 * clp->cl_refcount. Throw a warning if we
				 * start to approach INT_MAX here.
				 */
				WARN_ON_ONCE(count == (INT_MAX / 2));
				if (count == max)
					goto out;
B
Bryan Schumaker 已提交
5811 5812 5813
			}
		}
	}
5814 5815
out:
	spin_unlock(&clp->cl_lock);
B
Bryan Schumaker 已提交
5816 5817 5818 5819

	return count;
}

5820 5821 5822
static u64
nfsd_collect_client_locks(struct nfs4_client *clp, struct list_head *collect,
			  u64 max)
B
Bryan Schumaker 已提交
5823
{
5824
	return nfsd_foreach_client_lock(clp, max, collect, unhash_lock_stateid);
B
Bryan Schumaker 已提交
5825 5826
}

5827 5828
static u64
nfsd_print_client_locks(struct nfs4_client *clp)
5829
{
5830
	u64 count = nfsd_foreach_client_lock(clp, 0, NULL, NULL);
5831 5832 5833 5834
	nfsd_print_count(clp, count, "locked files");
	return count;
}

5835
u64
5836
nfsd_inject_print_locks(void)
5837 5838 5839 5840 5841 5842 5843 5844 5845 5846 5847 5848 5849 5850 5851 5852 5853 5854 5855 5856 5857 5858 5859 5860 5861 5862 5863 5864 5865 5866 5867 5868
{
	struct nfs4_client *clp;
	u64 count = 0;
	struct nfsd_net *nn = net_generic(current->nsproxy->net_ns,
						nfsd_net_id);

	if (!nfsd_netns_ready(nn))
		return 0;

	spin_lock(&nn->client_lock);
	list_for_each_entry(clp, &nn->client_lru, cl_lru)
		count += nfsd_print_client_locks(clp);
	spin_unlock(&nn->client_lock);

	return count;
}

static void
nfsd_reap_locks(struct list_head *reaplist)
{
	struct nfs4_client *clp;
	struct nfs4_ol_stateid *stp, *next;

	list_for_each_entry_safe(stp, next, reaplist, st_locks) {
		list_del_init(&stp->st_locks);
		clp = stp->st_stid.sc_client;
		nfs4_put_stid(&stp->st_stid);
		put_client(clp);
	}
}

u64
5869
nfsd_inject_forget_client_locks(struct sockaddr_storage *addr, size_t addr_size)
5870 5871 5872 5873 5874 5875 5876 5877 5878 5879 5880 5881 5882 5883 5884 5885 5886 5887 5888 5889
{
	unsigned int count = 0;
	struct nfs4_client *clp;
	struct nfsd_net *nn = net_generic(current->nsproxy->net_ns,
						nfsd_net_id);
	LIST_HEAD(reaplist);

	if (!nfsd_netns_ready(nn))
		return count;

	spin_lock(&nn->client_lock);
	clp = nfsd_find_client(addr, addr_size);
	if (clp)
		count = nfsd_collect_client_locks(clp, &reaplist, 0);
	spin_unlock(&nn->client_lock);
	nfsd_reap_locks(&reaplist);
	return count;
}

u64
5890
nfsd_inject_forget_locks(u64 max)
5891 5892 5893 5894 5895 5896 5897 5898 5899 5900 5901 5902 5903 5904 5905 5906 5907 5908 5909 5910 5911
{
	u64 count = 0;
	struct nfs4_client *clp;
	struct nfsd_net *nn = net_generic(current->nsproxy->net_ns,
						nfsd_net_id);
	LIST_HEAD(reaplist);

	if (!nfsd_netns_ready(nn))
		return count;

	spin_lock(&nn->client_lock);
	list_for_each_entry(clp, &nn->client_lru, cl_lru) {
		count += nfsd_collect_client_locks(clp, &reaplist, max - count);
		if (max != 0 && count >= max)
			break;
	}
	spin_unlock(&nn->client_lock);
	nfsd_reap_locks(&reaplist);
	return count;
}

5912 5913 5914 5915
static u64
nfsd_foreach_client_openowner(struct nfs4_client *clp, u64 max,
			      struct list_head *collect,
			      void (*func)(struct nfs4_openowner *))
5916 5917
{
	struct nfs4_openowner *oop, *next;
5918 5919
	struct nfsd_net *nn = net_generic(current->nsproxy->net_ns,
						nfsd_net_id);
5920 5921
	u64 count = 0;

5922 5923 5924
	lockdep_assert_held(&nn->client_lock);

	spin_lock(&clp->cl_lock);
5925
	list_for_each_entry_safe(oop, next, &clp->cl_openowners, oo_perclient) {
5926
		if (func) {
5927
			func(oop);
5928 5929 5930 5931 5932 5933 5934 5935 5936 5937 5938 5939 5940 5941
			if (collect) {
				atomic_inc(&clp->cl_refcount);
				list_add(&oop->oo_perclient, collect);
			}
		}
		++count;
		/*
		 * Despite the fact that these functions deal with
		 * 64-bit integers for "count", we must ensure that
		 * it doesn't blow up the clp->cl_refcount. Throw a
		 * warning if we start to approach INT_MAX here.
		 */
		WARN_ON_ONCE(count == (INT_MAX / 2));
		if (count == max)
5942 5943
			break;
	}
5944 5945 5946 5947 5948 5949 5950 5951 5952 5953 5954 5955 5956 5957 5958 5959 5960 5961 5962 5963 5964 5965 5966
	spin_unlock(&clp->cl_lock);

	return count;
}

static u64
nfsd_print_client_openowners(struct nfs4_client *clp)
{
	u64 count = nfsd_foreach_client_openowner(clp, 0, NULL, NULL);

	nfsd_print_count(clp, count, "openowners");
	return count;
}

static u64
nfsd_collect_client_openowners(struct nfs4_client *clp,
			       struct list_head *collect, u64 max)
{
	return nfsd_foreach_client_openowner(clp, max, collect,
						unhash_openowner_locked);
}

u64
5967
nfsd_inject_print_openowners(void)
5968 5969 5970 5971 5972 5973 5974 5975 5976 5977 5978 5979 5980
{
	struct nfs4_client *clp;
	u64 count = 0;
	struct nfsd_net *nn = net_generic(current->nsproxy->net_ns,
						nfsd_net_id);

	if (!nfsd_netns_ready(nn))
		return 0;

	spin_lock(&nn->client_lock);
	list_for_each_entry(clp, &nn->client_lru, cl_lru)
		count += nfsd_print_client_openowners(clp);
	spin_unlock(&nn->client_lock);
5981 5982 5983 5984

	return count;
}

5985 5986 5987 5988 5989 5990 5991 5992 5993 5994 5995 5996 5997 5998 5999
static void
nfsd_reap_openowners(struct list_head *reaplist)
{
	struct nfs4_client *clp;
	struct nfs4_openowner *oop, *next;

	list_for_each_entry_safe(oop, next, reaplist, oo_perclient) {
		list_del_init(&oop->oo_perclient);
		clp = oop->oo_owner.so_client;
		release_openowner(oop);
		put_client(clp);
	}
}

u64
6000 6001
nfsd_inject_forget_client_openowners(struct sockaddr_storage *addr,
				     size_t addr_size)
6002
{
6003 6004 6005 6006 6007 6008 6009 6010 6011 6012 6013 6014 6015 6016 6017 6018
	unsigned int count = 0;
	struct nfs4_client *clp;
	struct nfsd_net *nn = net_generic(current->nsproxy->net_ns,
						nfsd_net_id);
	LIST_HEAD(reaplist);

	if (!nfsd_netns_ready(nn))
		return count;

	spin_lock(&nn->client_lock);
	clp = nfsd_find_client(addr, addr_size);
	if (clp)
		count = nfsd_collect_client_openowners(clp, &reaplist, 0);
	spin_unlock(&nn->client_lock);
	nfsd_reap_openowners(&reaplist);
	return count;
6019 6020
}

6021
u64
6022
nfsd_inject_forget_openowners(u64 max)
6023
{
6024 6025 6026 6027 6028 6029 6030 6031 6032 6033 6034 6035 6036 6037 6038 6039 6040 6041
	u64 count = 0;
	struct nfs4_client *clp;
	struct nfsd_net *nn = net_generic(current->nsproxy->net_ns,
						nfsd_net_id);
	LIST_HEAD(reaplist);

	if (!nfsd_netns_ready(nn))
		return count;

	spin_lock(&nn->client_lock);
	list_for_each_entry(clp, &nn->client_lru, cl_lru) {
		count += nfsd_collect_client_openowners(clp, &reaplist,
							max - count);
		if (max != 0 && count >= max)
			break;
	}
	spin_unlock(&nn->client_lock);
	nfsd_reap_openowners(&reaplist);
6042 6043 6044
	return count;
}

6045 6046 6047 6048
static u64 nfsd_find_all_delegations(struct nfs4_client *clp, u64 max,
				     struct list_head *victims)
{
	struct nfs4_delegation *dp, *next;
6049 6050
	struct nfsd_net *nn = net_generic(current->nsproxy->net_ns,
						nfsd_net_id);
6051 6052
	u64 count = 0;

6053 6054 6055
	lockdep_assert_held(&nn->client_lock);

	spin_lock(&state_lock);
6056
	list_for_each_entry_safe(dp, next, &clp->cl_delegations, dl_perclnt) {
6057 6058 6059 6060 6061 6062 6063 6064 6065 6066
		if (victims) {
			/*
			 * It's not safe to mess with delegations that have a
			 * non-zero dl_time. They might have already been broken
			 * and could be processed by the laundromat outside of
			 * the state_lock. Just leave them be.
			 */
			if (dp->dl_time != 0)
				continue;

6067
			atomic_inc(&clp->cl_refcount);
6068 6069
			unhash_delegation_locked(dp);
			list_add(&dp->dl_recall_lru, victims);
6070
		}
6071 6072 6073 6074 6075 6076 6077 6078 6079
		++count;
		/*
		 * Despite the fact that these functions deal with
		 * 64-bit integers for "count", we must ensure that
		 * it doesn't blow up the clp->cl_refcount. Throw a
		 * warning if we start to approach INT_MAX here.
		 */
		WARN_ON_ONCE(count == (INT_MAX / 2));
		if (count == max)
6080 6081
			break;
	}
6082
	spin_unlock(&state_lock);
6083 6084 6085
	return count;
}

6086 6087
static u64
nfsd_print_client_delegations(struct nfs4_client *clp)
6088
{
6089
	u64 count = nfsd_find_all_delegations(clp, 0, NULL);
6090

6091 6092 6093 6094 6095
	nfsd_print_count(clp, count, "delegations");
	return count;
}

u64
6096
nfsd_inject_print_delegations(void)
6097 6098 6099 6100 6101 6102 6103 6104
{
	struct nfs4_client *clp;
	u64 count = 0;
	struct nfsd_net *nn = net_generic(current->nsproxy->net_ns,
						nfsd_net_id);

	if (!nfsd_netns_ready(nn))
		return 0;
6105

6106 6107 6108 6109 6110 6111 6112 6113 6114 6115 6116 6117 6118 6119 6120
	spin_lock(&nn->client_lock);
	list_for_each_entry(clp, &nn->client_lru, cl_lru)
		count += nfsd_print_client_delegations(clp);
	spin_unlock(&nn->client_lock);

	return count;
}

static void
nfsd_forget_delegations(struct list_head *reaplist)
{
	struct nfs4_client *clp;
	struct nfs4_delegation *dp, *next;

	list_for_each_entry_safe(dp, next, reaplist, dl_recall_lru) {
6121
		list_del_init(&dp->dl_recall_lru);
6122
		clp = dp->dl_stid.sc_client;
6123
		revoke_delegation(dp);
6124
		put_client(clp);
6125
	}
6126 6127 6128
}

u64
6129 6130
nfsd_inject_forget_client_delegations(struct sockaddr_storage *addr,
				      size_t addr_size)
6131 6132 6133 6134 6135 6136 6137 6138 6139 6140 6141 6142 6143 6144 6145 6146 6147 6148 6149
{
	u64 count = 0;
	struct nfs4_client *clp;
	struct nfsd_net *nn = net_generic(current->nsproxy->net_ns,
						nfsd_net_id);
	LIST_HEAD(reaplist);

	if (!nfsd_netns_ready(nn))
		return count;

	spin_lock(&nn->client_lock);
	clp = nfsd_find_client(addr, addr_size);
	if (clp)
		count = nfsd_find_all_delegations(clp, 0, &reaplist);
	spin_unlock(&nn->client_lock);

	nfsd_forget_delegations(&reaplist);
	return count;
}
6150

6151
u64
6152
nfsd_inject_forget_delegations(u64 max)
6153 6154 6155 6156 6157 6158 6159 6160 6161 6162 6163 6164 6165 6166 6167 6168 6169 6170
{
	u64 count = 0;
	struct nfs4_client *clp;
	struct nfsd_net *nn = net_generic(current->nsproxy->net_ns,
						nfsd_net_id);
	LIST_HEAD(reaplist);

	if (!nfsd_netns_ready(nn))
		return count;

	spin_lock(&nn->client_lock);
	list_for_each_entry(clp, &nn->client_lru, cl_lru) {
		count += nfsd_find_all_delegations(clp, max - count, &reaplist);
		if (max != 0 && count >= max)
			break;
	}
	spin_unlock(&nn->client_lock);
	nfsd_forget_delegations(&reaplist);
6171 6172 6173
	return count;
}

6174 6175
static void
nfsd_recall_delegations(struct list_head *reaplist)
6176
{
6177 6178
	struct nfs4_client *clp;
	struct nfs4_delegation *dp, *next;
6179

6180
	list_for_each_entry_safe(dp, next, reaplist, dl_recall_lru) {
6181
		list_del_init(&dp->dl_recall_lru);
6182 6183 6184 6185 6186 6187 6188 6189
		clp = dp->dl_stid.sc_client;
		/*
		 * We skipped all entries that had a zero dl_time before,
		 * so we can now reset the dl_time back to 0. If a delegation
		 * break comes in now, then it won't make any difference since
		 * we're recalling it either way.
		 */
		spin_lock(&state_lock);
6190
		dp->dl_time = 0;
6191
		spin_unlock(&state_lock);
6192
		nfsd_break_one_deleg(dp);
6193
		put_client(clp);
6194
	}
6195
}
6196

6197
u64
6198
nfsd_inject_recall_client_delegations(struct sockaddr_storage *addr,
6199 6200 6201 6202 6203 6204 6205 6206 6207 6208 6209 6210 6211 6212 6213 6214 6215 6216
				      size_t addr_size)
{
	u64 count = 0;
	struct nfs4_client *clp;
	struct nfsd_net *nn = net_generic(current->nsproxy->net_ns,
						nfsd_net_id);
	LIST_HEAD(reaplist);

	if (!nfsd_netns_ready(nn))
		return count;

	spin_lock(&nn->client_lock);
	clp = nfsd_find_client(addr, addr_size);
	if (clp)
		count = nfsd_find_all_delegations(clp, 0, &reaplist);
	spin_unlock(&nn->client_lock);

	nfsd_recall_delegations(&reaplist);
6217 6218 6219
	return count;
}

6220
u64
6221
nfsd_inject_recall_delegations(u64 max)
6222 6223
{
	u64 count = 0;
6224 6225 6226 6227
	struct nfs4_client *clp, *next;
	struct nfsd_net *nn = net_generic(current->nsproxy->net_ns,
						nfsd_net_id);
	LIST_HEAD(reaplist);
6228

6229 6230
	if (!nfsd_netns_ready(nn))
		return count;
6231

6232 6233 6234 6235 6236 6237 6238 6239
	spin_lock(&nn->client_lock);
	list_for_each_entry_safe(clp, next, &nn->client_lru, cl_lru) {
		count += nfsd_find_all_delegations(clp, max - count, &reaplist);
		if (max != 0 && ++count >= max)
			break;
	}
	spin_unlock(&nn->client_lock);
	nfsd_recall_delegations(&reaplist);
6240 6241
	return count;
}
B
Bryan Schumaker 已提交
6242 6243
#endif /* CONFIG_NFSD_FAULT_INJECTION */

6244 6245 6246 6247 6248 6249 6250 6251 6252 6253 6254 6255 6256 6257 6258 6259 6260 6261 6262 6263 6264
/*
 * Since the lifetime of a delegation isn't limited to that of an open, a
 * client may quite reasonably hang on to a delegation as long as it has
 * the inode cached.  This becomes an obvious problem the first time a
 * client's inode cache approaches the size of the server's total memory.
 *
 * For now we avoid this problem by imposing a hard limit on the number
 * of delegations, which varies according to the server's memory size.
 */
static void
set_max_delegations(void)
{
	/*
	 * Allow at most 4 delegations per megabyte of RAM.  Quick
	 * estimates suggest that in the worst case (where every delegation
	 * is for a different inode), a delegation could take about 1.5K,
	 * giving a worst case usage of about 6% of memory.
	 */
	max_delegations = nr_free_buffer_pages() >> (20 - 2 - PAGE_SHIFT);
}

6265
static int nfs4_state_create_net(struct net *net)
6266 6267 6268 6269 6270 6271 6272
{
	struct nfsd_net *nn = net_generic(net, nfsd_net_id);
	int i;

	nn->conf_id_hashtbl = kmalloc(sizeof(struct list_head) *
			CLIENT_HASH_SIZE, GFP_KERNEL);
	if (!nn->conf_id_hashtbl)
6273
		goto err;
6274 6275 6276 6277
	nn->unconf_id_hashtbl = kmalloc(sizeof(struct list_head) *
			CLIENT_HASH_SIZE, GFP_KERNEL);
	if (!nn->unconf_id_hashtbl)
		goto err_unconf_id;
6278 6279 6280 6281
	nn->sessionid_hashtbl = kmalloc(sizeof(struct list_head) *
			SESSION_HASH_SIZE, GFP_KERNEL);
	if (!nn->sessionid_hashtbl)
		goto err_sessionid;
6282

6283
	for (i = 0; i < CLIENT_HASH_SIZE; i++) {
6284
		INIT_LIST_HEAD(&nn->conf_id_hashtbl[i]);
6285
		INIT_LIST_HEAD(&nn->unconf_id_hashtbl[i]);
6286
	}
6287 6288
	for (i = 0; i < SESSION_HASH_SIZE; i++)
		INIT_LIST_HEAD(&nn->sessionid_hashtbl[i]);
6289
	nn->conf_name_tree = RB_ROOT;
6290
	nn->unconf_name_tree = RB_ROOT;
6291
	INIT_LIST_HEAD(&nn->client_lru);
6292
	INIT_LIST_HEAD(&nn->close_lru);
6293
	INIT_LIST_HEAD(&nn->del_recall_lru);
6294
	spin_lock_init(&nn->client_lock);
6295

6296
	INIT_DELAYED_WORK(&nn->laundromat_work, laundromat_main);
6297
	get_net(net);
6298

6299
	return 0;
6300

6301
err_sessionid:
6302
	kfree(nn->unconf_id_hashtbl);
6303 6304
err_unconf_id:
	kfree(nn->conf_id_hashtbl);
6305 6306
err:
	return -ENOMEM;
6307 6308 6309
}

static void
6310
nfs4_state_destroy_net(struct net *net)
6311 6312 6313 6314 6315 6316 6317 6318 6319 6320 6321
{
	int i;
	struct nfs4_client *clp = NULL;
	struct nfsd_net *nn = net_generic(net, nfsd_net_id);

	for (i = 0; i < CLIENT_HASH_SIZE; i++) {
		while (!list_empty(&nn->conf_id_hashtbl[i])) {
			clp = list_entry(nn->conf_id_hashtbl[i].next, struct nfs4_client, cl_idhash);
			destroy_client(clp);
		}
	}
6322

6323 6324 6325 6326 6327
	for (i = 0; i < CLIENT_HASH_SIZE; i++) {
		while (!list_empty(&nn->unconf_id_hashtbl[i])) {
			clp = list_entry(nn->unconf_id_hashtbl[i].next, struct nfs4_client, cl_idhash);
			destroy_client(clp);
		}
6328 6329
	}

6330
	kfree(nn->sessionid_hashtbl);
6331
	kfree(nn->unconf_id_hashtbl);
6332
	kfree(nn->conf_id_hashtbl);
6333
	put_net(net);
6334 6335
}

6336
int
6337
nfs4_state_start_net(struct net *net)
6338
{
6339
	struct nfsd_net *nn = net_generic(net, nfsd_net_id);
6340 6341
	int ret;

6342
	ret = nfs4_state_create_net(net);
6343 6344
	if (ret)
		return ret;
6345
	nfsd4_client_tracking_init(net);
6346
	nn->boot_time = get_seconds();
6347
	locks_start_grace(net, &nn->nfsd4_manager);
6348
	nn->grace_ended = false;
6349
	printk(KERN_INFO "NFSD: starting %ld-second grace period (net %p)\n",
6350 6351
	       nn->nfsd4_grace, net);
	queue_delayed_work(laundry_wq, &nn->laundromat_work, nn->nfsd4_grace * HZ);
6352 6353 6354 6355 6356 6357 6358 6359 6360 6361
	return 0;
}

/* initialization to perform when the nfsd service is started: */

int
nfs4_state_start(void)
{
	int ret;

6362
	ret = set_callback_cred();
6363 6364
	if (ret)
		return -ENOMEM;
6365
	laundry_wq = create_singlethread_workqueue("nfsd4");
6366 6367 6368 6369
	if (laundry_wq == NULL) {
		ret = -ENOMEM;
		goto out_recovery;
	}
6370 6371 6372
	ret = nfsd4_create_callback_queue();
	if (ret)
		goto out_free_laundry;
6373

6374
	set_max_delegations();
6375

6376
	return 0;
6377

6378 6379
out_free_laundry:
	destroy_workqueue(laundry_wq);
6380
out_recovery:
6381
	return ret;
L
Linus Torvalds 已提交
6382 6383
}

6384
void
6385
nfs4_state_shutdown_net(struct net *net)
L
Linus Torvalds 已提交
6386 6387 6388
{
	struct nfs4_delegation *dp = NULL;
	struct list_head *pos, *next, reaplist;
6389
	struct nfsd_net *nn = net_generic(net, nfsd_net_id);
L
Linus Torvalds 已提交
6390

6391 6392
	cancel_delayed_work_sync(&nn->laundromat_work);
	locks_end_grace(&nn->nfsd4_manager);
6393

L
Linus Torvalds 已提交
6394
	INIT_LIST_HEAD(&reaplist);
6395
	spin_lock(&state_lock);
6396
	list_for_each_safe(pos, next, &nn->del_recall_lru) {
L
Linus Torvalds 已提交
6397
		dp = list_entry (pos, struct nfs4_delegation, dl_recall_lru);
6398 6399
		unhash_delegation_locked(dp);
		list_add(&dp->dl_recall_lru, &reaplist);
L
Linus Torvalds 已提交
6400
	}
6401
	spin_unlock(&state_lock);
L
Linus Torvalds 已提交
6402 6403
	list_for_each_safe(pos, next, &reaplist) {
		dp = list_entry (pos, struct nfs4_delegation, dl_recall_lru);
6404
		list_del_init(&dp->dl_recall_lru);
6405
		nfs4_put_stid(&dp->dl_stid);
L
Linus Torvalds 已提交
6406 6407
	}

6408
	nfsd4_client_tracking_exit(net);
6409
	nfs4_state_destroy_net(net);
L
Linus Torvalds 已提交
6410 6411 6412 6413 6414
}

void
nfs4_state_shutdown(void)
{
6415
	destroy_workqueue(laundry_wq);
6416
	nfsd4_destroy_callback_queue();
L
Linus Torvalds 已提交
6417
}
6418 6419 6420 6421

static void
get_stateid(struct nfsd4_compound_state *cstate, stateid_t *stateid)
{
6422 6423
	if (HAS_STATE_ID(cstate, CURRENT_STATE_ID_FLAG) && CURRENT_STATEID(stateid))
		memcpy(stateid, &cstate->current_stateid, sizeof(stateid_t));
6424 6425 6426 6427 6428
}

static void
put_stateid(struct nfsd4_compound_state *cstate, stateid_t *stateid)
{
6429 6430 6431 6432 6433 6434 6435 6436 6437 6438
	if (cstate->minorversion) {
		memcpy(&cstate->current_stateid, stateid, sizeof(stateid_t));
		SET_STATE_ID(cstate, CURRENT_STATE_ID_FLAG);
	}
}

void
clear_current_stateid(struct nfsd4_compound_state *cstate)
{
	CLEAR_STATE_ID(cstate, CURRENT_STATE_ID_FLAG);
6439 6440
}

6441 6442 6443
/*
 * functions to set current state id
 */
6444 6445 6446 6447 6448 6449
void
nfsd4_set_opendowngradestateid(struct nfsd4_compound_state *cstate, struct nfsd4_open_downgrade *odp)
{
	put_stateid(cstate, &odp->od_stateid);
}

6450 6451 6452 6453 6454 6455
void
nfsd4_set_openstateid(struct nfsd4_compound_state *cstate, struct nfsd4_open *open)
{
	put_stateid(cstate, &open->op_stateid);
}

6456 6457 6458 6459 6460 6461 6462 6463 6464 6465 6466 6467 6468 6469 6470
void
nfsd4_set_closestateid(struct nfsd4_compound_state *cstate, struct nfsd4_close *close)
{
	put_stateid(cstate, &close->cl_stateid);
}

void
nfsd4_set_lockstateid(struct nfsd4_compound_state *cstate, struct nfsd4_lock *lock)
{
	put_stateid(cstate, &lock->lk_resp_stateid);
}

/*
 * functions to consume current state id
 */
6471

6472 6473 6474 6475 6476 6477 6478 6479 6480 6481 6482 6483
void
nfsd4_get_opendowngradestateid(struct nfsd4_compound_state *cstate, struct nfsd4_open_downgrade *odp)
{
	get_stateid(cstate, &odp->od_stateid);
}

void
nfsd4_get_delegreturnstateid(struct nfsd4_compound_state *cstate, struct nfsd4_delegreturn *drp)
{
	get_stateid(cstate, &drp->dr_stateid);
}

6484 6485 6486 6487 6488 6489 6490 6491 6492 6493 6494 6495
void
nfsd4_get_freestateid(struct nfsd4_compound_state *cstate, struct nfsd4_free_stateid *fsp)
{
	get_stateid(cstate, &fsp->fr_stateid);
}

void
nfsd4_get_setattrstateid(struct nfsd4_compound_state *cstate, struct nfsd4_setattr *setattr)
{
	get_stateid(cstate, &setattr->sa_stateid);
}

6496 6497 6498 6499 6500 6501 6502
void
nfsd4_get_closestateid(struct nfsd4_compound_state *cstate, struct nfsd4_close *close)
{
	get_stateid(cstate, &close->cl_stateid);
}

void
6503
nfsd4_get_lockustateid(struct nfsd4_compound_state *cstate, struct nfsd4_locku *locku)
6504
{
6505
	get_stateid(cstate, &locku->lu_stateid);
6506
}
6507 6508 6509 6510 6511 6512 6513 6514 6515 6516 6517 6518

void
nfsd4_get_readstateid(struct nfsd4_compound_state *cstate, struct nfsd4_read *read)
{
	get_stateid(cstate, &read->rd_stateid);
}

void
nfsd4_get_writestateid(struct nfsd4_compound_state *cstate, struct nfsd4_write *write)
{
	get_stateid(cstate, &write->wr_stateid);
}