svc.h 11.6 KB
Newer Older
L
Linus Torvalds 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15
/*
 * linux/include/linux/sunrpc/svc.h
 *
 * RPC server declarations.
 *
 * Copyright (C) 1995, 1996 Olaf Kirch <okir@monad.swb.de>
 */


#ifndef SUNRPC_SVC_H
#define SUNRPC_SVC_H

#include <linux/in.h>
#include <linux/sunrpc/types.h>
#include <linux/sunrpc/xdr.h>
16
#include <linux/sunrpc/auth.h>
L
Linus Torvalds 已提交
17 18 19 20
#include <linux/sunrpc/svcauth.h>
#include <linux/wait.h>
#include <linux/mm.h>

21 22 23 24
/*
 * This is the RPC server thread function prototype
 */
typedef void		(*svc_thread_fn)(struct svc_rqst *);
25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41

/*
 *
 * RPC service thread pool.
 *
 * Pool of threads and temporary sockets.  Generally there is only
 * a single one of these per RPC service, but on NUMA machines those
 * services that can benefit from it (i.e. nfs but not lockd) will
 * have one pool per NUMA node.  This optimisation reduces cross-
 * node traffic on multi-node NUMA NFS servers.
 */
struct svc_pool {
	unsigned int		sp_id;	    	/* pool id; also node id on NUMA */
	spinlock_t		sp_lock;	/* protects all fields */
	struct list_head	sp_threads;	/* idle server threads */
	struct list_head	sp_sockets;	/* pending sockets */
	unsigned int		sp_nrthreads;	/* # of threads in pool */
42
	struct list_head	sp_all_threads;	/* all server threads */
43 44
} ____cacheline_aligned_in_smp;

L
Linus Torvalds 已提交
45 46 47 48 49 50 51 52 53 54 55 56 57 58 59
/*
 * RPC service.
 *
 * An RPC service is a ``daemon,'' possibly multithreaded, which
 * receives and processes incoming RPC messages.
 * It has one or more transport sockets associated with it, and maintains
 * a list of idle threads waiting for input.
 *
 * We currently do not support more than one RPC program per daemon.
 */
struct svc_serv {
	struct svc_program *	sv_program;	/* RPC program */
	struct svc_stat *	sv_stats;	/* RPC statistics */
	spinlock_t		sv_lock;
	unsigned int		sv_nrthreads;	/* # of server threads */
60 61
	unsigned int		sv_max_payload;	/* datagram payload size */
	unsigned int		sv_max_mesg;	/* max_payload + 1 page for overheads */
L
Linus Torvalds 已提交
62 63 64 65 66
	unsigned int		sv_xdrsize;	/* XDR buffer size */

	struct list_head	sv_permsocks;	/* all permanent sockets */
	struct list_head	sv_tempsocks;	/* all temporary sockets */
	int			sv_tmpcnt;	/* count of temporary sockets */
67
	struct timer_list	sv_temptimer;	/* timer for aging temporary sockets */
L
Linus Torvalds 已提交
68 69

	char *			sv_name;	/* service name */
70

71 72 73
	unsigned int		sv_nrpools;	/* number of thread pools */
	struct svc_pool *	sv_pools;	/* array of thread pools */

74 75 76 77
	void			(*sv_shutdown)(struct svc_serv *serv);
						/* Callback to use when last thread
						 * exits.
						 */
78 79 80 81 82

	struct module *		sv_module;	/* optional module to count when
						 * adding threads */
	svc_thread_fn		sv_function;	/* main function for threads */
	int			sv_kill_signal;	/* signal to kill threads */
L
Linus Torvalds 已提交
83 84
};

G
Greg Banks 已提交
85 86 87 88 89 90 91 92 93 94 95
/*
 * We use sv_nrthreads as a reference count.  svc_destroy() drops
 * this refcount, so we need to bump it up around operations that
 * change the number of threads.  Horrible, but there it is.
 * Should be called with the BKL held.
 */
static inline void svc_get(struct svc_serv *serv)
{
	serv->sv_nrthreads++;
}

L
Linus Torvalds 已提交
96 97 98 99
/*
 * Maximum payload size supported by a kernel RPC server.
 * This is use to determine the max number of pages nfsd is
 * willing to return in a single READ operation.
100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115
 *
 * These happen to all be powers of 2, which is not strictly
 * necessary but helps enforce the real limitation, which is
 * that they should be multiples of PAGE_CACHE_SIZE.
 *
 * For UDP transports, a block plus NFS,RPC, and UDP headers
 * has to fit into the IP datagram limit of 64K.  The largest
 * feasible number for all known page sizes is probably 48K,
 * but we choose 32K here.  This is the same as the historical
 * Linux limit; someone who cares more about NFS/UDP performance
 * can test a larger number.
 *
 * For TCP transports we have more freedom.  A size of 1MB is
 * chosen to match the client limit.  Other OSes are known to
 * have larger limits, but those numbers are probably beyond
 * the point of diminishing returns.
L
Linus Torvalds 已提交
116
 */
117 118 119 120 121
#define RPCSVC_MAXPAYLOAD	(1*1024*1024u)
#define RPCSVC_MAXPAYLOAD_TCP	RPCSVC_MAXPAYLOAD
#define RPCSVC_MAXPAYLOAD_UDP	(32*1024u)

extern u32 svc_max_payload(const struct svc_rqst *rqstp);
L
Linus Torvalds 已提交
122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149

/*
 * RPC Requsts and replies are stored in one or more pages.
 * We maintain an array of pages for each server thread.
 * Requests are copied into these pages as they arrive.  Remaining
 * pages are available to write the reply into.
 *
 * Pages are sent using ->sendpage so each server thread needs to
 * allocate more to replace those used in sending.  To help keep track
 * of these pages we have a receive list where all pages initialy live,
 * and a send list where pages are moved to when there are to be part
 * of a reply.
 *
 * We use xdr_buf for holding responses as it fits well with NFS
 * read responses (that have a header, and some data pages, and possibly
 * a tail) and means we can share some client side routines.
 *
 * The xdr_buf.head kvec always points to the first page in the rq_*pages
 * list.  The xdr_buf.pages pointer points to the second page on that
 * list.  xdr_buf.tail points to the end of the first page.
 * This assumes that the non-page part of an rpc reply will fit
 * in a page - NFSd ensures this.  lockd also has no trouble.
 *
 * Each request/reply pair can have at most one "payload", plus two pages,
 * one for the request, and one for the reply.
 */
#define RPCSVC_MAXPAGES		((RPCSVC_MAXPAYLOAD+PAGE_SIZE-1)/PAGE_SIZE + 2)

A
Alexey Dobriyan 已提交
150
static inline u32 svc_getnl(struct kvec *iov)
L
Linus Torvalds 已提交
151
{
A
Alexey Dobriyan 已提交
152
	__be32 val, *vp;
L
Linus Torvalds 已提交
153 154 155
	vp = iov->iov_base;
	val = *vp++;
	iov->iov_base = (void*)vp;
A
Alexey Dobriyan 已提交
156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173
	iov->iov_len -= sizeof(__be32);
	return ntohl(val);
}

static inline void svc_putnl(struct kvec *iov, u32 val)
{
	__be32 *vp = iov->iov_base + iov->iov_len;
	*vp = htonl(val);
	iov->iov_len += sizeof(__be32);
}

static inline __be32 svc_getu32(struct kvec *iov)
{
	__be32 val, *vp;
	vp = iov->iov_base;
	val = *vp++;
	iov->iov_base = (void*)vp;
	iov->iov_len -= sizeof(__be32);
L
Linus Torvalds 已提交
174 175 176 177 178
	return val;
}

static inline void svc_ungetu32(struct kvec *iov)
{
A
Alexey Dobriyan 已提交
179
	__be32 *vp = (__be32 *)iov->iov_base;
L
Linus Torvalds 已提交
180 181 182 183
	iov->iov_base = (void *)(vp - 1);
	iov->iov_len += sizeof(*vp);
}

A
Alexey Dobriyan 已提交
184
static inline void svc_putu32(struct kvec *iov, __be32 val)
L
Linus Torvalds 已提交
185
{
A
Alexey Dobriyan 已提交
186
	__be32 *vp = iov->iov_base + iov->iov_len;
L
Linus Torvalds 已提交
187
	*vp = val;
A
Alexey Dobriyan 已提交
188
	iov->iov_len += sizeof(__be32);
L
Linus Torvalds 已提交
189 190 191 192 193 194 195 196 197
}

	
/*
 * The context of a single thread, including the request currently being
 * processed.
 */
struct svc_rqst {
	struct list_head	rq_list;	/* idle list */
198
	struct list_head	rq_all;		/* all threads list */
L
Linus Torvalds 已提交
199 200 201 202 203
	struct svc_sock *	rq_sock;	/* socket */
	struct sockaddr_in	rq_addr;	/* peer address */
	int			rq_addrlen;

	struct svc_serv *	rq_server;	/* RPC service definition */
204
	struct svc_pool *	rq_pool;	/* thread pool */
L
Linus Torvalds 已提交
205 206 207 208 209 210 211 212
	struct svc_procedure *	rq_procinfo;	/* procedure info */
	struct auth_ops *	rq_authop;	/* authentication flavour */
	struct svc_cred		rq_cred;	/* auth info */
	struct sk_buff *	rq_skbuff;	/* fast recv inet buffer */
	struct svc_deferred_req*rq_deferred;	/* deferred request we are replaying */

	struct xdr_buf		rq_arg;
	struct xdr_buf		rq_res;
213 214 215
	struct page *		rq_pages[RPCSVC_MAXPAGES];
	struct page *		*rq_respages;	/* points into rq_pages */
	int			rq_resused;	/* number of pages used for result */
L
Linus Torvalds 已提交
216

217 218
	struct kvec		rq_vec[RPCSVC_MAXPAGES]; /* generally useful.. */

219
	__be32			rq_xid;		/* transmission id */
L
Linus Torvalds 已提交
220 221 222 223 224 225 226 227
	u32			rq_prog;	/* program number */
	u32			rq_vers;	/* program version */
	u32			rq_proc;	/* procedure number */
	u32			rq_prot;	/* IP protocol */
	unsigned short
				rq_secure  : 1;	/* secure port */


228
	__be32			rq_daddr;	/* dest addr of request - reply from here */
L
Linus Torvalds 已提交
229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247

	void *			rq_argp;	/* decoded arguments */
	void *			rq_resp;	/* xdr'd results */
	void *			rq_auth_data;	/* flavor-specific data */

	int			rq_reserved;	/* space on socket outq
						 * reserved for this request
						 */

	struct cache_req	rq_chandle;	/* handle passed to caches for 
						 * request delaying 
						 */
	/* Catering to nfsd */
	struct auth_domain *	rq_client;	/* RPC peer info */
	struct svc_cacherep *	rq_cacherep;	/* cache info */
	struct knfsd_fh *	rq_reffh;	/* Referrence filehandle, used to
						 * determine what device number
						 * to report (real or virtual)
						 */
248 249 250
	int			rq_sendfile_ok; /* turned off in gss privacy
						 * to prevent encrypting page
						 * cache pages */
L
Linus Torvalds 已提交
251
	wait_queue_head_t	rq_wait;	/* synchronization */
252
	struct task_struct	*rq_task;	/* service thread */
L
Linus Torvalds 已提交
253 254 255 256 257 258
};

/*
 * Check buffer bounds after decoding arguments
 */
static inline int
259
xdr_argsize_check(struct svc_rqst *rqstp, __be32 *p)
L
Linus Torvalds 已提交
260 261 262
{
	char *cp = (char *)p;
	struct kvec *vec = &rqstp->rq_arg.head[0];
263 264
	return cp >= (char*)vec->iov_base
		&& cp <= (char*)vec->iov_base + vec->iov_len;
L
Linus Torvalds 已提交
265 266 267
}

static inline int
268
xdr_ressize_check(struct svc_rqst *rqstp, __be32 *p)
L
Linus Torvalds 已提交
269 270 271 272 273 274 275 276 277
{
	struct kvec *vec = &rqstp->rq_res.head[0];
	char *cp = (char*)p;

	vec->iov_len = cp - (char*)vec->iov_base;

	return vec->iov_len <= PAGE_SIZE;
}

278
static inline void svc_free_res_pages(struct svc_rqst *rqstp)
279
{
280 281 282 283 284 285
	while (rqstp->rq_resused) {
		struct page **pp = (rqstp->rq_respages +
				    --rqstp->rq_resused);
		if (*pp) {
			put_page(*pp);
			*pp = NULL;
L
Linus Torvalds 已提交
286 287 288 289 290 291 292 293
		}
	}
}

struct svc_deferred_req {
	u32			prot;	/* protocol (UDP or TCP) */
	struct sockaddr_in	addr;
	struct svc_sock		*svsk;	/* where reply must go */
294
	__be32			daddr;	/* where reply must come from */
L
Linus Torvalds 已提交
295 296
	struct cache_deferred_req handle;
	int			argslen;
297
	__be32			args[0];
L
Linus Torvalds 已提交
298 299 300
};

/*
301
 * List of RPC programs on the same transport endpoint
L
Linus Torvalds 已提交
302 303
 */
struct svc_program {
304
	struct svc_program *	pg_next;	/* other programs (same xprt) */
L
Linus Torvalds 已提交
305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324
	u32			pg_prog;	/* program number */
	unsigned int		pg_lovers;	/* lowest version */
	unsigned int		pg_hivers;	/* lowest version */
	unsigned int		pg_nvers;	/* number of versions */
	struct svc_version **	pg_vers;	/* version array */
	char *			pg_name;	/* service name */
	char *			pg_class;	/* class name: services sharing authentication */
	struct svc_stat *	pg_stats;	/* rpc statistics */
	int			(*pg_authenticate)(struct svc_rqst *);
};

/*
 * RPC program version
 */
struct svc_version {
	u32			vs_vers;	/* version number */
	u32			vs_nproc;	/* number of procedures */
	struct svc_procedure *	vs_proc;	/* per-procedure info */
	u32			vs_xdrsize;	/* xdrsize needed for this version */

325 326 327
	unsigned int		vs_hidden : 1;	/* Don't register with portmapper.
						 * Only used for nfsacl so far. */

L
Linus Torvalds 已提交
328 329 330 331
	/* Override dispatch function (e.g. when caching replies).
	 * A return value of 0 means drop the request. 
	 * vs_dispatch == NULL means use default dispatcher.
	 */
332
	int			(*vs_dispatch)(struct svc_rqst *, __be32 *);
L
Linus Torvalds 已提交
333 334 335 336 337
};

/*
 * RPC procedure info
 */
A
Al Viro 已提交
338
typedef __be32	(*svc_procfunc)(struct svc_rqst *, void *argp, void *resp);
L
Linus Torvalds 已提交
339 340 341 342 343 344 345 346 347 348 349 350 351 352 353
struct svc_procedure {
	svc_procfunc		pc_func;	/* process the request */
	kxdrproc_t		pc_decode;	/* XDR decode args */
	kxdrproc_t		pc_encode;	/* XDR encode result */
	kxdrproc_t		pc_release;	/* XDR free result */
	unsigned int		pc_argsize;	/* argument struct size */
	unsigned int		pc_ressize;	/* result struct size */
	unsigned int		pc_count;	/* call count */
	unsigned int		pc_cachetype;	/* cache info (NFS) */
	unsigned int		pc_xdrressize;	/* maximum size of XDR reply */
};

/*
 * Function prototypes.
 */
354 355
struct svc_serv *  svc_create(struct svc_program *, unsigned int,
			      void (*shutdown)(struct svc_serv*));
L
Linus Torvalds 已提交
356 357
int		   svc_create_thread(svc_thread_fn, struct svc_serv *);
void		   svc_exit_thread(struct svc_rqst *);
358 359 360 361
struct svc_serv *  svc_create_pooled(struct svc_program *, unsigned int,
			void (*shutdown)(struct svc_serv*),
			svc_thread_fn, int sig, struct module *);
int		   svc_set_num_threads(struct svc_serv *, struct svc_pool *, int);
L
Linus Torvalds 已提交
362
void		   svc_destroy(struct svc_serv *);
363
int		   svc_process(struct svc_rqst *);
L
Linus Torvalds 已提交
364 365 366
int		   svc_register(struct svc_serv *, int, unsigned short);
void		   svc_wake_up(struct svc_serv *);
void		   svc_reserve(struct svc_rqst *rqstp, int space);
367
struct svc_pool *  svc_pool_for_cpu(struct svc_serv *serv, int cpu);
L
Linus Torvalds 已提交
368 369

#endif /* SUNRPC_SVC_H */