nfssvc.c 14.6 KB
Newer Older
L
Linus Torvalds 已提交
1 2 3 4 5 6 7 8 9 10 11
/*
 * linux/fs/nfsd/nfssvc.c
 *
 * Central processing for nfsd.
 *
 * Authors:	Olaf Kirch (okir@monad.swb.de)
 *
 * Copyright (C) 1995, 1996, 1997 Olaf Kirch <okir@monad.swb.de>
 */

#include <linux/module.h>
A
Alexey Dobriyan 已提交
12
#include <linux/sched.h>
L
Linus Torvalds 已提交
13 14 15 16 17 18 19 20
#include <linux/time.h>
#include <linux/errno.h>
#include <linux/nfs.h>
#include <linux/in.h>
#include <linux/uio.h>
#include <linux/unistd.h>
#include <linux/slab.h>
#include <linux/smp.h>
21
#include <linux/freezer.h>
L
Linus Torvalds 已提交
22
#include <linux/fs_struct.h>
23
#include <linux/kthread.h>
A
Andy Adamson 已提交
24
#include <linux/swap.h>
L
Linus Torvalds 已提交
25 26 27 28 29 30 31 32 33

#include <linux/sunrpc/types.h>
#include <linux/sunrpc/stats.h>
#include <linux/sunrpc/svc.h>
#include <linux/sunrpc/svcsock.h>
#include <linux/sunrpc/cache.h>
#include <linux/nfsd/nfsd.h>
#include <linux/nfsd/stats.h>
#include <linux/nfsd/cache.h>
34
#include <linux/nfsd/syscall.h>
L
Linus Torvalds 已提交
35
#include <linux/lockd/bind.h>
36
#include <linux/nfsacl.h>
L
Linus Torvalds 已提交
37 38 39 40

#define NFSDDBG_FACILITY	NFSDDBG_SVC

extern struct svc_program	nfsd_program;
41
static int			nfsd(void *vrqstp);
L
Linus Torvalds 已提交
42 43
struct timeval			nfssvc_boot;

44 45 46 47 48 49 50 51 52 53 54 55 56
/*
 * nfsd_mutex protects nfsd_serv -- both the pointer itself and the members
 * of the svc_serv struct. In particular, ->sv_nrthreads but also to some
 * extent ->sv_temp_socks and ->sv_permsocks. It also protects nfsdstats.th_cnt
 *
 * If (out side the lock) nfsd_serv is non-NULL, then it must point to a
 * properly initialised 'struct svc_serv' with ->sv_nrthreads > 0. That number
 * of nfsd threads must exist and each must listed in ->sp_all_threads in each
 * entry of ->sv_pools[].
 *
 * Transitions of the thread count between zero and non-zero are of particular
 * interest since the svc_serv needs to be created and initialized at that
 * point, or freed.
57 58 59 60 61 62 63 64
 *
 * Finally, the nfsd_mutex also protects some of the global variables that are
 * accessed when nfsd starts and that are settable via the write_* routines in
 * nfsctl.c. In particular:
 *
 *	user_recovery_dirname
 *	user_lease_time
 *	nfsd_versions
65 66 67 68
 */
DEFINE_MUTEX(nfsd_mutex);
struct svc_serv 		*nfsd_serv;

69 70 71 72 73 74 75
/*
 * nfsd_drc_lock protects nfsd_drc_max_pages and nfsd_drc_pages_used.
 * nfsd_drc_max_pages limits the total amount of memory available for
 * version 4.1 DRC caches.
 * nfsd_drc_pages_used tracks the current version 4.1 DRC memory usage.
 */
spinlock_t	nfsd_drc_lock;
76 77
unsigned int	nfsd_drc_max_mem;
unsigned int	nfsd_drc_mem_used;
78

79 80 81 82 83 84 85 86
#if defined(CONFIG_NFSD_V2_ACL) || defined(CONFIG_NFSD_V3_ACL)
static struct svc_stat	nfsd_acl_svcstats;
static struct svc_version *	nfsd_acl_version[] = {
	[2] = &nfsd_acl_version2,
	[3] = &nfsd_acl_version3,
};

#define NFSD_ACL_MINVERS            2
87
#define NFSD_ACL_NRVERS		ARRAY_SIZE(nfsd_acl_version)
88 89 90 91 92 93
static struct svc_version *nfsd_acl_versions[NFSD_ACL_NRVERS];

static struct svc_program	nfsd_acl_program = {
	.pg_prog		= NFS_ACL_PROGRAM,
	.pg_nvers		= NFSD_ACL_NRVERS,
	.pg_vers		= nfsd_acl_versions,
94
	.pg_name		= "nfsacl",
95 96 97 98 99 100 101 102 103 104
	.pg_class		= "nfsd",
	.pg_stats		= &nfsd_acl_svcstats,
	.pg_authenticate	= &svc_set_client,
};

static struct svc_stat	nfsd_acl_svcstats = {
	.program	= &nfsd_acl_program,
};
#endif /* defined(CONFIG_NFSD_V2_ACL) || defined(CONFIG_NFSD_V3_ACL) */

105 106 107 108 109 110 111 112 113 114 115
static struct svc_version *	nfsd_version[] = {
	[2] = &nfsd_version2,
#if defined(CONFIG_NFSD_V3)
	[3] = &nfsd_version3,
#endif
#if defined(CONFIG_NFSD_V4)
	[4] = &nfsd_version4,
#endif
};

#define NFSD_MINVERS    	2
116
#define NFSD_NRVERS		ARRAY_SIZE(nfsd_version)
117 118 119
static struct svc_version *nfsd_versions[NFSD_NRVERS];

struct svc_program		nfsd_program = {
120 121 122
#if defined(CONFIG_NFSD_V2_ACL) || defined(CONFIG_NFSD_V3_ACL)
	.pg_next		= &nfsd_acl_program,
#endif
123 124 125 126 127 128 129 130 131 132
	.pg_prog		= NFS_PROGRAM,		/* program number */
	.pg_nvers		= NFSD_NRVERS,		/* nr of entries in nfsd_version */
	.pg_vers		= nfsd_versions,	/* version table */
	.pg_name		= "nfsd",		/* program name */
	.pg_class		= "nfsd",		/* authentication class */
	.pg_stats		= &nfsd_svcstats,	/* version table */
	.pg_authenticate	= &svc_set_client,	/* export authentication */

};

133 134
u32 nfsd_supported_minorversion;

135 136 137 138 139 140 141 142 143
int nfsd_vers(int vers, enum vers_op change)
{
	if (vers < NFSD_MINVERS || vers >= NFSD_NRVERS)
		return -1;
	switch(change) {
	case NFSD_SET:
		nfsd_versions[vers] = nfsd_version[vers];
#if defined(CONFIG_NFSD_V2_ACL) || defined(CONFIG_NFSD_V3_ACL)
		if (vers < NFSD_ACL_NRVERS)
144
			nfsd_acl_versions[vers] = nfsd_acl_version[vers];
145
#endif
146
		break;
147 148 149 150
	case NFSD_CLEAR:
		nfsd_versions[vers] = NULL;
#if defined(CONFIG_NFSD_V2_ACL) || defined(CONFIG_NFSD_V3_ACL)
		if (vers < NFSD_ACL_NRVERS)
151
			nfsd_acl_versions[vers] = NULL;
152 153 154 155 156 157 158 159 160
#endif
		break;
	case NFSD_TEST:
		return nfsd_versions[vers] != NULL;
	case NFSD_AVAIL:
		return nfsd_version[vers] != NULL;
	}
	return 0;
}
161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182

int nfsd_minorversion(u32 minorversion, enum vers_op change)
{
	if (minorversion > NFSD_SUPPORTED_MINOR_VERSION)
		return -1;
	switch(change) {
	case NFSD_SET:
		nfsd_supported_minorversion = minorversion;
		break;
	case NFSD_CLEAR:
		if (minorversion == 0)
			return -1;
		nfsd_supported_minorversion = minorversion - 1;
		break;
	case NFSD_TEST:
		return minorversion <= nfsd_supported_minorversion;
	case NFSD_AVAIL:
		return minorversion <= NFSD_SUPPORTED_MINOR_VERSION;
	}
	return 0;
}

L
Linus Torvalds 已提交
183 184 185 186 187 188 189
/*
 * Maximum number of nfsd processes
 */
#define	NFSD_MAXSERVS		8192

int nfsd_nrthreads(void)
{
N
Neil Brown 已提交
190 191 192 193 194 195
	int rv = 0;
	mutex_lock(&nfsd_mutex);
	if (nfsd_serv)
		rv = nfsd_serv->sv_nrthreads;
	mutex_unlock(&nfsd_mutex);
	return rv;
L
Linus Torvalds 已提交
196 197
}

198 199 200
static void nfsd_last_thread(struct svc_serv *serv)
{
	/* When last nfsd thread exits we need to do some clean-up */
201 202
	struct svc_xprt *xprt;
	list_for_each_entry(xprt, &serv->sv_permsocks, xpt_list)
203
		lockd_down();
204 205 206 207
	nfsd_serv = NULL;
	nfsd_racache_shutdown();
	nfs4_state_shutdown();

208 209 210
	printk(KERN_WARNING "nfsd: last server has exited, flushing export "
			    "cache\n");
	nfsd_export_flush();
211
}
212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233

void nfsd_reset_versions(void)
{
	int found_one = 0;
	int i;

	for (i = NFSD_MINVERS; i < NFSD_NRVERS; i++) {
		if (nfsd_program.pg_vers[i])
			found_one = 1;
	}

	if (!found_one) {
		for (i = NFSD_MINVERS; i < NFSD_NRVERS; i++)
			nfsd_program.pg_vers[i] = nfsd_version[i];
#if defined(CONFIG_NFSD_V2_ACL) || defined(CONFIG_NFSD_V3_ACL)
		for (i = NFSD_ACL_MINVERS; i < NFSD_ACL_NRVERS; i++)
			nfsd_acl_program.pg_vers[i] =
				nfsd_acl_version[i];
#endif
	}
}

A
Andy Adamson 已提交
234 235 236 237 238 239 240 241 242 243 244 245 246 247
/*
 * Each session guarantees a negotiated per slot memory cache for replies
 * which in turn consumes memory beyond the v2/v3/v4.0 server. A dedicated
 * NFSv4.1 server might want to use more memory for a DRC than a machine
 * with mutiple services.
 *
 * Impose a hard limit on the number of pages for the DRC which varies
 * according to the machines free pages. This is of course only a default.
 *
 * For now this is a #defined shift which could be under admin control
 * in the future.
 */
static void set_max_drc(void)
{
248
	#define NFSD_DRC_SIZE_SHIFT	10
249 250 251
	nfsd_drc_max_mem = (nr_free_buffer_pages()
					>> NFSD_DRC_SIZE_SHIFT) * PAGE_SIZE;
	nfsd_drc_mem_used = 0;
252
	spin_lock_init(&nfsd_drc_lock);
253
	dprintk("%s nfsd_drc_max_mem %u \n", __func__, nfsd_drc_max_mem);
A
Andy Adamson 已提交
254
}
255

256
int nfsd_create_serv(void)
257 258
{
	int err = 0;
259 260

	WARN_ON(!mutex_is_locked(&nfsd_mutex));
261
	if (nfsd_serv) {
G
Greg Banks 已提交
262
		svc_get(nfsd_serv);
263 264
		return 0;
	}
265 266 267 268 269 270 271 272 273 274 275
	if (nfsd_max_blksize == 0) {
		/* choose a suitable default */
		struct sysinfo i;
		si_meminfo(&i);
		/* Aim for 1/4096 of memory per thread
		 * This gives 1MB on 4Gig machines
		 * But only uses 32K on 128M machines.
		 * Bottom out at 8K on 32M and smaller.
		 * Of course, this is only a default.
		 */
		nfsd_max_blksize = NFSSVC_MAXBLKSIZE;
276
		i.totalram <<= PAGE_SHIFT - 12;
277 278 279 280
		while (nfsd_max_blksize > i.totalram &&
		       nfsd_max_blksize >= 8*1024*2)
			nfsd_max_blksize /= 2;
	}
281

282
	nfsd_serv = svc_create_pooled(&nfsd_program, nfsd_max_blksize,
283
				      nfsd_last_thread, nfsd, THIS_MODULE);
284 285
	if (nfsd_serv == NULL)
		err = -ENOMEM;
A
Andy Adamson 已提交
286 287
	else
		set_max_drc();
288

289 290 291 292 293 294 295 296 297 298
	do_gettimeofday(&nfssvc_boot);		/* record boot time */
	return err;
}

static int nfsd_init_socks(int port)
{
	int error;
	if (!list_empty(&nfsd_serv->sv_permsocks))
		return 0;

299
	error = svc_create_xprt(nfsd_serv, "udp", PF_INET, port,
300
					SVC_SOCK_DEFAULTS);
301 302 303
	if (error < 0)
		return error;

304 305 306 307
	error = lockd_up();
	if (error < 0)
		return error;

308
	error = svc_create_xprt(nfsd_serv, "tcp", PF_INET, port,
309
					SVC_SOCK_DEFAULTS);
310 311
	if (error < 0)
		return error;
312 313 314 315 316

	error = lockd_up();
	if (error < 0)
		return error;

317 318 319
	return 0;
}

320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345
int nfsd_nrpools(void)
{
	if (nfsd_serv == NULL)
		return 0;
	else
		return nfsd_serv->sv_nrpools;
}

int nfsd_get_nrthreads(int n, int *nthreads)
{
	int i = 0;

	if (nfsd_serv != NULL) {
		for (i = 0; i < nfsd_serv->sv_nrpools && i < n; i++)
			nthreads[i] = nfsd_serv->sv_pools[i].sp_nrthreads;
	}

	return 0;
}

int nfsd_set_nrthreads(int n, int *nthreads)
{
	int i = 0;
	int tot = 0;
	int err = 0;

346 347
	WARN_ON(!mutex_is_locked(&nfsd_mutex));

348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393
	if (nfsd_serv == NULL || n <= 0)
		return 0;

	if (n > nfsd_serv->sv_nrpools)
		n = nfsd_serv->sv_nrpools;

	/* enforce a global maximum number of threads */
	tot = 0;
	for (i = 0; i < n; i++) {
		if (nthreads[i] > NFSD_MAXSERVS)
			nthreads[i] = NFSD_MAXSERVS;
		tot += nthreads[i];
	}
	if (tot > NFSD_MAXSERVS) {
		/* total too large: scale down requested numbers */
		for (i = 0; i < n && tot > 0; i++) {
		    	int new = nthreads[i] * NFSD_MAXSERVS / tot;
			tot -= (nthreads[i] - new);
			nthreads[i] = new;
		}
		for (i = 0; i < n && tot > 0; i++) {
			nthreads[i]--;
			tot--;
		}
	}

	/*
	 * There must always be a thread in pool 0; the admin
	 * can't shut down NFS completely using pool_threads.
	 */
	if (nthreads[0] == 0)
		nthreads[0] = 1;

	/* apply the new numbers */
	svc_get(nfsd_serv);
	for (i = 0; i < n; i++) {
		err = svc_set_num_threads(nfsd_serv, &nfsd_serv->sv_pools[i],
				    	  nthreads[i]);
		if (err)
			break;
	}
	svc_destroy(nfsd_serv);

	return err;
}

L
Linus Torvalds 已提交
394 395 396 397
int
nfsd_svc(unsigned short port, int nrservs)
{
	int	error;
398 399

	mutex_lock(&nfsd_mutex);
400
	dprintk("nfsd: creating service\n");
L
Linus Torvalds 已提交
401 402 403 404
	if (nrservs <= 0)
		nrservs = 0;
	if (nrservs > NFSD_MAXSERVS)
		nrservs = NFSD_MAXSERVS;
405 406 407 408
	error = 0;
	if (nrservs == 0 && nfsd_serv == NULL)
		goto out;

L
Linus Torvalds 已提交
409 410 411 412
	/* Readahead param cache - will no-op if it already exists */
	error =	nfsd_racache_init(2*nrservs);
	if (error<0)
		goto out;
413
	nfs4_state_start();
414 415 416 417 418 419 420 421 422 423 424

	nfsd_reset_versions();

	error = nfsd_create_serv();

	if (error)
		goto out;
	error = nfsd_init_socks(port);
	if (error)
		goto failure;

425
	error = svc_set_num_threads(nfsd_serv, NULL, nrservs);
426 427 428 429 430 431
	if (error == 0)
		/* We are holding a reference to nfsd_serv which
		 * we don't want to count in the return value,
		 * so subtract 1
		 */
		error = nfsd_serv->sv_nrthreads - 1;
L
Linus Torvalds 已提交
432 433 434
 failure:
	svc_destroy(nfsd_serv);		/* Release server */
 out:
435
	mutex_unlock(&nfsd_mutex);
L
Linus Torvalds 已提交
436 437 438 439 440 441 442
	return error;
}


/*
 * This is the NFS server kernel thread
 */
443 444
static int
nfsd(void *vrqstp)
L
Linus Torvalds 已提交
445
{
446 447
	struct svc_rqst *rqstp = (struct svc_rqst *) vrqstp;
	int err, preverr = 0;
L
Linus Torvalds 已提交
448 449

	/* Lock module and set up kernel thread */
450
	mutex_lock(&nfsd_mutex);
L
Linus Torvalds 已提交
451

452
	/* At this point, the thread shares current->fs
L
Linus Torvalds 已提交
453 454
	 * with the init process. We need to create files with a
	 * umask of 0 instead of init's umask. */
455
	if (unshare_fs_struct() < 0) {
L
Linus Torvalds 已提交
456 457 458
		printk("Unable to start nfsd thread: out of memory\n");
		goto out;
	}
459

L
Linus Torvalds 已提交
460 461
	current->fs->umask = 0;

462 463
	/*
	 * thread is spawned with all signals set to SIG_IGN, re-enable
464
	 * the ones that will bring down the thread
465
	 */
466 467 468 469
	allow_signal(SIGKILL);
	allow_signal(SIGHUP);
	allow_signal(SIGINT);
	allow_signal(SIGQUIT);
470

L
Linus Torvalds 已提交
471
	nfsdstats.th_cnt++;
472 473
	mutex_unlock(&nfsd_mutex);

L
Linus Torvalds 已提交
474 475 476 477 478 479
	/*
	 * We want less throttling in balance_dirty_pages() so that nfs to
	 * localhost doesn't cause nfsd to lock up due to all the client's
	 * dirty pages.
	 */
	current->flags |= PF_LESS_THROTTLE;
480
	set_freezable();
L
Linus Torvalds 已提交
481 482 483 484 485 486 487 488 489

	/*
	 * The main request loop
	 */
	for (;;) {
		/*
		 * Find a socket with data available and call its
		 * recvfrom routine.
		 */
490
		while ((err = svc_recv(rqstp, 60*60*HZ)) == -EAGAIN)
L
Linus Torvalds 已提交
491
			;
492
		if (err == -EINTR)
L
Linus Torvalds 已提交
493
			break;
494 495 496 497 498 499 500 501 502 503
		else if (err < 0) {
			if (err != preverr) {
				printk(KERN_WARNING "%s: unexpected error "
					"from svc_recv (%d)\n", __func__, -err);
				preverr = err;
			}
			schedule_timeout_uninterruptible(HZ);
			continue;
		}

L
Linus Torvalds 已提交
504 505 506 507

		/* Lock the export hash tables for reading. */
		exp_readlock();

508
		svc_process(rqstp);
L
Linus Torvalds 已提交
509 510 511 512 513

		/* Unlock export hash tables */
		exp_readunlock();
	}

514
	/* Clear signals before calling svc_exit_thread() */
515
	flush_signals(current);
L
Linus Torvalds 已提交
516

517
	mutex_lock(&nfsd_mutex);
L
Linus Torvalds 已提交
518 519 520 521 522 523 524
	nfsdstats.th_cnt --;

out:
	/* Release the thread */
	svc_exit_thread(rqstp);

	/* Release module */
525
	mutex_unlock(&nfsd_mutex);
L
Linus Torvalds 已提交
526
	module_put_and_exit(0);
527
	return 0;
L
Linus Torvalds 已提交
528 529
}

530 531 532 533 534 535 536 537 538
static __be32 map_new_errors(u32 vers, __be32 nfserr)
{
	if (nfserr == nfserr_jukebox && vers == 2)
		return nfserr_dropit;
	if (nfserr == nfserr_wrongsec && vers < 4)
		return nfserr_acces;
	return nfserr;
}

L
Linus Torvalds 已提交
539
int
540
nfsd_dispatch(struct svc_rqst *rqstp, __be32 *statp)
L
Linus Torvalds 已提交
541 542 543
{
	struct svc_procedure	*proc;
	kxdrproc_t		xdr;
A
Al Viro 已提交
544 545
	__be32			nfserr;
	__be32			*nfserrp;
L
Linus Torvalds 已提交
546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563

	dprintk("nfsd_dispatch: vers %d proc %d\n",
				rqstp->rq_vers, rqstp->rq_proc);
	proc = rqstp->rq_procinfo;

	/* Check whether we have this call in the cache. */
	switch (nfsd_cache_lookup(rqstp, proc->pc_cachetype)) {
	case RC_INTR:
	case RC_DROPIT:
		return 0;
	case RC_REPLY:
		return 1;
	case RC_DOIT:;
		/* do it */
	}

	/* Decode arguments */
	xdr = proc->pc_decode;
A
Al Viro 已提交
564
	if (xdr && !xdr(rqstp, (__be32*)rqstp->rq_arg.head[0].iov_base,
L
Linus Torvalds 已提交
565 566 567 568 569 570 571 572 573 574 575 576
			rqstp->rq_argp)) {
		dprintk("nfsd: failed to decode arguments!\n");
		nfsd_cache_update(rqstp, RC_NOCACHE, NULL);
		*statp = rpc_garbage_args;
		return 1;
	}

	/* need to grab the location to store the status, as
	 * nfsv4 does some encoding while processing 
	 */
	nfserrp = rqstp->rq_res.head[0].iov_base
		+ rqstp->rq_res.head[0].iov_len;
A
Al Viro 已提交
577
	rqstp->rq_res.head[0].iov_len += sizeof(__be32);
L
Linus Torvalds 已提交
578

579 580 581 582
	/* NFSv4.1 DRC requires statp */
	if (rqstp->rq_vers == 4)
		nfsd4_set_statp(rqstp, statp);

L
Linus Torvalds 已提交
583 584
	/* Now call the procedure handler, and encode NFS status. */
	nfserr = proc->pc_func(rqstp, rqstp->rq_argp, rqstp->rq_resp);
585
	nfserr = map_new_errors(rqstp->rq_vers, nfserr);
L
Linus Torvalds 已提交
586
	if (nfserr == nfserr_dropit) {
587
		dprintk("nfsd: Dropping request; may be revisited later\n");
L
Linus Torvalds 已提交
588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613
		nfsd_cache_update(rqstp, RC_NOCACHE, NULL);
		return 0;
	}

	if (rqstp->rq_proc != 0)
		*nfserrp++ = nfserr;

	/* Encode result.
	 * For NFSv2, additional info is never returned in case of an error.
	 */
	if (!(nfserr && rqstp->rq_vers == 2)) {
		xdr = proc->pc_encode;
		if (xdr && !xdr(rqstp, nfserrp,
				rqstp->rq_resp)) {
			/* Failed to encode result. Release cache entry */
			dprintk("nfsd: failed to encode result!\n");
			nfsd_cache_update(rqstp, RC_NOCACHE, NULL);
			*statp = rpc_system_err;
			return 1;
		}
	}

	/* Store reply in cache. */
	nfsd_cache_update(rqstp, proc->pc_cachetype, statp + 1);
	return 1;
}
614 615 616 617 618 619 620

int nfsd_pool_stats_open(struct inode *inode, struct file *file)
{
	if (nfsd_serv == NULL)
		return -ENODEV;
	return svc_pool_stats_open(nfsd_serv, file);
}