read.c 17.9 KB
Newer Older
L
Linus Torvalds 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20
/*
 * linux/fs/nfs/read.c
 *
 * Block I/O for NFS
 *
 * Partial copy of Linus' read cache modifications to fs/nfs/file.c
 * modified for async RPC by okir@monad.swb.de
 */

#include <linux/time.h>
#include <linux/kernel.h>
#include <linux/errno.h>
#include <linux/fcntl.h>
#include <linux/stat.h>
#include <linux/mm.h>
#include <linux/slab.h>
#include <linux/pagemap.h>
#include <linux/sunrpc/clnt.h>
#include <linux/nfs_fs.h>
#include <linux/nfs_page.h>
A
Andy Adamson 已提交
21
#include <linux/module.h>
L
Linus Torvalds 已提交
22 23

#include <asm/system.h>
24
#include "pnfs.h"
L
Linus Torvalds 已提交
25

26
#include "nfs4_fs.h"
27
#include "internal.h"
C
Chuck Lever 已提交
28
#include "iostat.h"
29
#include "fscache.h"
C
Chuck Lever 已提交
30

L
Linus Torvalds 已提交
31 32
#define NFSDBG_FACILITY		NFSDBG_PAGECACHE

33
static const struct nfs_pageio_ops nfs_pageio_read_ops;
T
Trond Myklebust 已提交
34 35
static const struct rpc_call_ops nfs_read_partial_ops;
static const struct rpc_call_ops nfs_read_full_ops;
L
Linus Torvalds 已提交
36

37
static struct kmem_cache *nfs_rdata_cachep;
38
static mempool_t *nfs_rdata_mempool;
L
Linus Torvalds 已提交
39 40 41

#define MIN_POOL_READ	(32)

42
struct nfs_read_data *nfs_readdata_alloc(unsigned int pagecount)
43
{
44
	struct nfs_read_data *p = mempool_alloc(nfs_rdata_mempool, GFP_KERNEL);
45 46 47 48

	if (p) {
		memset(p, 0, sizeof(*p));
		INIT_LIST_HEAD(&p->pages);
49
		p->npages = pagecount;
50 51
		if (pagecount <= ARRAY_SIZE(p->page_array))
			p->pagevec = p->page_array;
52
		else {
53
			p->pagevec = kcalloc(pagecount, sizeof(struct page *), GFP_KERNEL);
54
			if (!p->pagevec) {
55 56 57 58 59 60 61 62
				mempool_free(p, nfs_rdata_mempool);
				p = NULL;
			}
		}
	}
	return p;
}

T
Trond Myklebust 已提交
63
void nfs_readdata_free(struct nfs_read_data *p)
64 65 66 67 68 69
{
	if (p && (p->pagevec != &p->page_array[0]))
		kfree(p->pagevec);
	mempool_free(p, nfs_rdata_mempool);
}

T
Trond Myklebust 已提交
70
static void nfs_readdata_release(struct nfs_read_data *rdata)
L
Linus Torvalds 已提交
71
{
72
	put_lseg(rdata->lseg);
73 74
	put_nfs_open_context(rdata->args.context);
	nfs_readdata_free(rdata);
L
Linus Torvalds 已提交
75 76 77 78 79
}

static
int nfs_return_empty_page(struct page *page)
{
80
	zero_user(page, 0, PAGE_CACHE_SIZE);
L
Linus Torvalds 已提交
81 82 83 84 85
	SetPageUptodate(page);
	unlock_page(page);
	return 0;
}

86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101
static void nfs_readpage_truncate_uninitialised_page(struct nfs_read_data *data)
{
	unsigned int remainder = data->args.count - data->res.count;
	unsigned int base = data->args.pgbase + data->res.count;
	unsigned int pglen;
	struct page **pages;

	if (data->res.eof == 0 || remainder == 0)
		return;
	/*
	 * Note: "remainder" can never be negative, since we check for
	 * 	this in the XDR code.
	 */
	pages = &data->args.pages[base >> PAGE_CACHE_SHIFT];
	base &= ~PAGE_CACHE_MASK;
	pglen = PAGE_CACHE_SIZE - base;
102 103
	for (;;) {
		if (remainder <= pglen) {
104
			zero_user(*pages, base, remainder);
105 106
			break;
		}
107
		zero_user(*pages, base, pglen);
108 109 110 111 112
		pages++;
		remainder -= pglen;
		pglen = PAGE_CACHE_SIZE;
		base = 0;
	}
113 114
}

115
void nfs_pageio_init_read_mds(struct nfs_pageio_descriptor *pgio,
116 117 118 119 120
		struct inode *inode)
{
	nfs_pageio_init(pgio, inode, &nfs_pageio_read_ops,
			NFS_SERVER(inode)->rsize, 0);
}
121
EXPORT_SYMBOL_GPL(nfs_pageio_init_read_mds);
122 123 124 125 126 127 128 129

static void nfs_pageio_init_read(struct nfs_pageio_descriptor *pgio,
		struct inode *inode)
{
	if (!pnfs_pageio_init_read(pgio, inode))
		nfs_pageio_init_read_mds(pgio, inode);
}

130 131
int nfs_readpage_async(struct nfs_open_context *ctx, struct inode *inode,
		       struct page *page)
L
Linus Torvalds 已提交
132 133 134
{
	struct nfs_page	*new;
	unsigned int len;
F
Fred Isaman 已提交
135
	struct nfs_pageio_descriptor pgio;
L
Linus Torvalds 已提交
136

137
	len = nfs_page_length(page);
L
Linus Torvalds 已提交
138 139 140 141 142 143 144 145
	if (len == 0)
		return nfs_return_empty_page(page);
	new = nfs_create_request(ctx, inode, page, 0, len);
	if (IS_ERR(new)) {
		unlock_page(page);
		return PTR_ERR(new);
	}
	if (len < PAGE_CACHE_SIZE)
146
		zero_user_segment(page, len, PAGE_CACHE_SIZE);
L
Linus Torvalds 已提交
147

148
	nfs_pageio_init_read(&pgio, inode);
149
	nfs_pageio_add_request(&pgio, new);
150
	nfs_pageio_complete(&pgio);
L
Linus Torvalds 已提交
151 152 153 154 155
	return 0;
}

static void nfs_readpage_release(struct nfs_page *req)
{
156 157 158 159 160
	struct inode *d_inode = req->wb_context->path.dentry->d_inode;

	if (PageUptodate(req->wb_page))
		nfs_readpage_to_fscache(d_inode, req->wb_page, 0);

L
Linus Torvalds 已提交
161 162 163
	unlock_page(req->wb_page);

	dprintk("NFS: read done (%s/%Ld %d@%Ld)\n",
164 165
			req->wb_context->path.dentry->d_inode->i_sb->s_id,
			(long long)NFS_FILEID(req->wb_context->path.dentry->d_inode),
L
Linus Torvalds 已提交
166 167
			req->wb_bytes,
			(long long)req_offset(req));
168
	nfs_release_request(req);
L
Linus Torvalds 已提交
169 170
}

A
Andy Adamson 已提交
171
int nfs_initiate_read(struct nfs_read_data *data, struct rpc_clnt *clnt,
A
Andy Adamson 已提交
172
		      const struct rpc_call_ops *call_ops)
L
Linus Torvalds 已提交
173
{
A
Andy Adamson 已提交
174
	struct inode *inode = data->inode;
175
	int swap_flags = IS_SWAPFILE(inode) ? NFS_RPC_SWAPFLAGS : 0;
176
	struct rpc_task *task;
177 178 179
	struct rpc_message msg = {
		.rpc_argp = &data->args,
		.rpc_resp = &data->res,
A
Andy Adamson 已提交
180
		.rpc_cred = data->cred,
181
	};
182
	struct rpc_task_setup task_setup_data = {
183
		.task = &data->task,
A
Andy Adamson 已提交
184
		.rpc_client = clnt,
185
		.rpc_message = &msg,
186 187
		.callback_ops = call_ops,
		.callback_data = data,
188
		.workqueue = nfsiod_workqueue,
189 190
		.flags = RPC_TASK_ASYNC | swap_flags,
	};
L
Linus Torvalds 已提交
191

A
Andy Adamson 已提交
192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208
	/* Set up the initial task struct. */
	NFS_PROTO(inode)->read_setup(data, &msg);

	dprintk("NFS: %5u initiated read call (req %s/%lld, %u bytes @ "
			"offset %llu)\n",
			data->task.tk_pid,
			inode->i_sb->s_id,
			(long long)NFS_FILEID(inode),
			data->args.count,
			(unsigned long long)data->args.offset);

	task = rpc_run_task(&task_setup_data);
	if (IS_ERR(task))
		return PTR_ERR(task);
	rpc_put_task(task);
	return 0;
}
A
Andy Adamson 已提交
209
EXPORT_SYMBOL_GPL(nfs_initiate_read);
A
Andy Adamson 已提交
210 211 212 213

/*
 * Set up the NFS read request struct
 */
214 215
static void nfs_read_rpcsetup(struct nfs_page *req, struct nfs_read_data *data,
		unsigned int count, unsigned int offset)
A
Andy Adamson 已提交
216 217 218
{
	struct inode *inode = req->wb_context->path.dentry->d_inode;

L
Linus Torvalds 已提交
219
	data->req	  = req;
220
	data->inode	  = inode;
A
Andy Adamson 已提交
221
	data->cred	  = req->wb_context->cred;
L
Linus Torvalds 已提交
222 223 224 225 226 227

	data->args.fh     = NFS_FH(inode);
	data->args.offset = req_offset(req) + offset;
	data->args.pgbase = req->wb_pgbase + offset;
	data->args.pages  = data->pagevec;
	data->args.count  = count;
228
	data->args.context = get_nfs_open_context(req->wb_context);
229
	data->args.lock_context = req->wb_lock_context;
L
Linus Torvalds 已提交
230 231 232 233

	data->res.fattr   = &data->fattr;
	data->res.count   = count;
	data->res.eof     = 0;
234
	nfs_fattr_init(&data->fattr);
235
}
L
Linus Torvalds 已提交
236

237 238 239 240 241 242 243 244 245 246 247 248 249
static int nfs_do_read(struct nfs_read_data *data,
		const struct rpc_call_ops *call_ops,
		struct pnfs_layout_segment *lseg)
{
	struct inode *inode = data->args.context->path.dentry->d_inode;

	if (lseg) {
		data->lseg = get_lseg(lseg);
		if (pnfs_try_to_read_data(data, call_ops) == PNFS_ATTEMPTED)
			return 0;
		put_lseg(data->lseg);
		data->lseg = NULL;
	}
250

A
Andy Adamson 已提交
251
	return nfs_initiate_read(data, NFS_CLIENT(inode), call_ops);
L
Linus Torvalds 已提交
252 253
}

254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274
static int
nfs_do_multiple_reads(struct list_head *head,
		const struct rpc_call_ops *call_ops,
		struct pnfs_layout_segment *lseg)
{
	struct nfs_read_data *data;
	int ret = 0;

	while (!list_empty(head)) {
		int ret2;

		data = list_entry(head->next, struct nfs_read_data, list);
		list_del_init(&data->list);

		ret2 = nfs_do_read(data, call_ops, lseg);
		if (ret == 0)
			ret = ret2;
	}
	return ret;
}

L
Linus Torvalds 已提交
275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300
static void
nfs_async_read_error(struct list_head *head)
{
	struct nfs_page	*req;

	while (!list_empty(head)) {
		req = nfs_list_entry(head->next);
		nfs_list_remove_request(req);
		SetPageError(req->wb_page);
		nfs_readpage_release(req);
	}
}

/*
 * Generate multiple requests to fill a single page.
 *
 * We optimize to reduce the number of read operations on the wire.  If we
 * detect that we're reading a page, or an area of a page, that is past the
 * end of file, we do not generate NFS read operations but just clear the
 * parts of the page that would have come back zero from the server anyway.
 *
 * We rely on the cached value of i_size to make this determination; another
 * client can fill pages on the server past our cached end-of-file, but we
 * won't see the new data until our attribute cache is updated.  This is more
 * or less conventional NFS client behavior.
 */
301
static int nfs_pagein_multi(struct nfs_pageio_descriptor *desc, struct list_head *res)
L
Linus Torvalds 已提交
302
{
F
Fred Isaman 已提交
303
	struct nfs_page *req = nfs_list_entry(desc->pg_list.next);
L
Linus Torvalds 已提交
304 305
	struct page *page = req->wb_page;
	struct nfs_read_data *data;
F
Fred Isaman 已提交
306
	size_t rsize = NFS_SERVER(desc->pg_inode)->rsize, nbytes;
307
	unsigned int offset;
L
Linus Torvalds 已提交
308
	int requests = 0;
309
	int ret = 0;
L
Linus Torvalds 已提交
310 311 312

	nfs_list_remove_request(req);

313
	offset = 0;
F
Fred Isaman 已提交
314
	nbytes = desc->pg_count;
315 316 317
	do {
		size_t len = min(nbytes,rsize);

318
		data = nfs_readdata_alloc(1);
L
Linus Torvalds 已提交
319 320
		if (!data)
			goto out_bad;
321 322 323
		data->pagevec[0] = page;
		nfs_read_rpcsetup(req, data, len, offset);
		list_add(&data->list, res);
L
Linus Torvalds 已提交
324
		requests++;
325
		nbytes -= len;
326
		offset += len;
327
	} while(nbytes != 0);
L
Linus Torvalds 已提交
328 329
	atomic_set(&req->wb_complete, requests);
	ClearPageError(page);
330
	return ret;
L
Linus Torvalds 已提交
331
out_bad:
332 333
	while (!list_empty(res)) {
		data = list_entry(res->next, struct nfs_read_data, list);
334
		list_del(&data->list);
L
Linus Torvalds 已提交
335 336 337 338 339 340 341
		nfs_readdata_free(data);
	}
	SetPageError(page);
	nfs_readpage_release(req);
	return -ENOMEM;
}

342
static int nfs_pagein_one(struct nfs_pageio_descriptor *desc, struct list_head *res)
L
Linus Torvalds 已提交
343 344 345 346
{
	struct nfs_page		*req;
	struct page		**pages;
	struct nfs_read_data	*data;
F
Fred Isaman 已提交
347
	struct list_head *head = &desc->pg_list;
348
	int ret = 0;
L
Linus Torvalds 已提交
349

F
Fred Isaman 已提交
350 351
	data = nfs_readdata_alloc(nfs_page_array_len(desc->pg_base,
						     desc->pg_count));
352 353
	if (!data) {
		nfs_async_read_error(head);
354
		ret = -ENOMEM;
355 356
		goto out;
	}
L
Linus Torvalds 已提交
357 358 359 360 361 362 363 364 365 366 367

	pages = data->pagevec;
	while (!list_empty(head)) {
		req = nfs_list_entry(head->next);
		nfs_list_remove_request(req);
		nfs_list_add_request(req, &data->pages);
		ClearPageError(req->wb_page);
		*pages++ = req->wb_page;
	}
	req = nfs_list_entry(data->pages.next);

368
	nfs_read_rpcsetup(req, data, desc->pg_count, 0);
369
	list_add(&data->list, res);
370
out:
371
	return ret;
L
Linus Torvalds 已提交
372 373
}

374 375
int nfs_generic_pg_readpages(struct nfs_pageio_descriptor *desc)
{
376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394
	LIST_HEAD(head);
	int ret;

	if (desc->pg_bsize < PAGE_CACHE_SIZE) {
		ret = nfs_pagein_multi(desc, &head);
		if (ret == 0)
			ret = nfs_do_multiple_reads(&head,
					&nfs_read_partial_ops,
					desc->pg_lseg);
	} else {
		ret = nfs_pagein_one(desc, &head);
		if (ret == 0)
			ret = nfs_do_multiple_reads(&head,
					&nfs_read_full_ops,
					desc->pg_lseg);
	}
	put_lseg(desc->pg_lseg);
	desc->pg_lseg = NULL;
	return ret;
395 396 397 398 399 400 401 402 403
}
EXPORT_SYMBOL_GPL(nfs_generic_pg_readpages);


static const struct nfs_pageio_ops nfs_pageio_read_ops = {
	.pg_test = nfs_generic_pg_test,
	.pg_doio = nfs_generic_pg_readpages,
};

404 405 406 407 408 409 410 411
/*
 * This is the callback from RPC telling us whether a reply was
 * received or some error occurred (timeout or socket shutdown).
 */
int nfs_readpage_result(struct rpc_task *task, struct nfs_read_data *data)
{
	int status;

412
	dprintk("NFS: %s: %5u, (status %d)\n", __func__, task->tk_pid,
413 414 415 416 417 418 419 420 421
			task->tk_status);

	status = NFS_PROTO(data->inode)->read_done(task, data);
	if (status != 0)
		return status;

	nfs_add_stats(data->inode, NFSIOS_SERVERREADBYTES, data->res.count);

	if (task->tk_status == -ESTALE) {
B
Benny Halevy 已提交
422
		set_bit(NFS_INO_STALE, &NFS_I(data->inode)->flags);
423 424 425 426 427
		nfs_mark_for_revalidate(data->inode);
	}
	return 0;
}

428
static void nfs_readpage_retry(struct rpc_task *task, struct nfs_read_data *data)
429 430 431 432 433
{
	struct nfs_readargs *argp = &data->args;
	struct nfs_readres *resp = &data->res;

	if (resp->eof || resp->count == argp->count)
434
		return;
435 436 437 438 439

	/* This is a short read! */
	nfs_inc_stats(data->inode, NFSIOS_SHORTREAD);
	/* Has the server at least made some progress? */
	if (resp->count == 0)
440
		return;
441 442

	/* Yes, so retry the read at the end of the data */
443
	data->mds_offset += resp->count;
444 445 446
	argp->offset += resp->count;
	argp->pgbase += resp->count;
	argp->count -= resp->count;
447
	nfs_restart_rpc(task, NFS_SERVER(data->inode)->nfs_client);
448 449
}

L
Linus Torvalds 已提交
450 451 452
/*
 * Handle a read reply that fills part of a page.
 */
T
Trond Myklebust 已提交
453
static void nfs_readpage_result_partial(struct rpc_task *task, void *calldata)
L
Linus Torvalds 已提交
454
{
T
Trond Myklebust 已提交
455
	struct nfs_read_data *data = calldata;
L
Linus Torvalds 已提交
456
 
T
Trond Myklebust 已提交
457 458
	if (nfs_readpage_result(task, data) != 0)
		return;
459 460
	if (task->tk_status < 0)
		return;
461

462 463 464 465 466 467 468 469 470 471 472 473
	nfs_readpage_truncate_uninitialised_page(data);
	nfs_readpage_retry(task, data);
}

static void nfs_readpage_release_partial(void *calldata)
{
	struct nfs_read_data *data = calldata;
	struct nfs_page *req = data->req;
	struct page *page = req->wb_page;
	int status = data->task.tk_status;

	if (status < 0)
474
		SetPageError(page);
475

L
Linus Torvalds 已提交
476 477 478 479 480
	if (atomic_dec_and_test(&req->wb_complete)) {
		if (!PageError(page))
			SetPageUptodate(page);
		nfs_readpage_release(req);
	}
481
	nfs_readdata_release(calldata);
L
Linus Torvalds 已提交
482 483
}

484 485 486 487 488
#if defined(CONFIG_NFS_V4_1)
void nfs_read_prepare(struct rpc_task *task, void *calldata)
{
	struct nfs_read_data *data = calldata;

489
	if (nfs4_setup_sequence(NFS_SERVER(data->inode),
490 491 492 493 494 495 496
				&data->args.seq_args, &data->res.seq_res,
				0, task))
		return;
	rpc_call_start(task);
}
#endif /* CONFIG_NFS_V4_1 */

T
Trond Myklebust 已提交
497
static const struct rpc_call_ops nfs_read_partial_ops = {
498 499 500
#if defined(CONFIG_NFS_V4_1)
	.rpc_call_prepare = nfs_read_prepare,
#endif /* CONFIG_NFS_V4_1 */
T
Trond Myklebust 已提交
501
	.rpc_call_done = nfs_readpage_result_partial,
502
	.rpc_release = nfs_readpage_release_partial,
T
Trond Myklebust 已提交
503 504
};

505 506 507 508 509 510
static void nfs_readpage_set_pages_uptodate(struct nfs_read_data *data)
{
	unsigned int count = data->res.count;
	unsigned int base = data->args.pgbase;
	struct page **pages;

511 512
	if (data->res.eof)
		count = data->args.count;
513 514 515 516 517 518 519
	if (unlikely(count == 0))
		return;
	pages = &data->args.pages[base >> PAGE_CACHE_SHIFT];
	base &= ~PAGE_CACHE_MASK;
	count += base;
	for (;count >= PAGE_CACHE_SIZE; count -= PAGE_CACHE_SIZE, pages++)
		SetPageUptodate(*pages);
520 521 522 523
	if (count == 0)
		return;
	/* Was this a short read? */
	if (data->res.eof || data->res.count == data->args.count)
524 525 526
		SetPageUptodate(*pages);
}

L
Linus Torvalds 已提交
527 528 529 530
/*
 * This is the callback from RPC telling us whether a reply was
 * received or some error occurred (timeout or socket shutdown).
 */
T
Trond Myklebust 已提交
531
static void nfs_readpage_result_full(struct rpc_task *task, void *calldata)
L
Linus Torvalds 已提交
532
{
T
Trond Myklebust 已提交
533
	struct nfs_read_data *data = calldata;
L
Linus Torvalds 已提交
534

535 536
	if (nfs_readpage_result(task, data) != 0)
		return;
537 538
	if (task->tk_status < 0)
		return;
539
	/*
540
	 * Note: nfs_readpage_retry may change the values of
541
	 * data->args. In the multi-page case, we therefore need
542 543
	 * to ensure that we call nfs_readpage_set_pages_uptodate()
	 * first.
544
	 */
545 546 547 548 549 550 551 552 553
	nfs_readpage_truncate_uninitialised_page(data);
	nfs_readpage_set_pages_uptodate(data);
	nfs_readpage_retry(task, data);
}

static void nfs_readpage_release_full(void *calldata)
{
	struct nfs_read_data *data = calldata;

L
Linus Torvalds 已提交
554 555 556
	while (!list_empty(&data->pages)) {
		struct nfs_page *req = nfs_list_entry(data->pages.next);

557
		nfs_list_remove_request(req);
L
Linus Torvalds 已提交
558 559
		nfs_readpage_release(req);
	}
560
	nfs_readdata_release(calldata);
L
Linus Torvalds 已提交
561 562
}

T
Trond Myklebust 已提交
563
static const struct rpc_call_ops nfs_read_full_ops = {
564 565 566
#if defined(CONFIG_NFS_V4_1)
	.rpc_call_prepare = nfs_read_prepare,
#endif /* CONFIG_NFS_V4_1 */
T
Trond Myklebust 已提交
567
	.rpc_call_done = nfs_readpage_result_full,
568
	.rpc_release = nfs_readpage_release_full,
T
Trond Myklebust 已提交
569 570
};

L
Linus Torvalds 已提交
571 572 573 574 575 576 577 578 579 580 581 582 583 584
/*
 * Read a page over NFS.
 * We read the page synchronously in the following case:
 *  -	The error flag is set for this page. This happens only when a
 *	previous async read operation failed.
 */
int nfs_readpage(struct file *file, struct page *page)
{
	struct nfs_open_context *ctx;
	struct inode *inode = page->mapping->host;
	int		error;

	dprintk("NFS: nfs_readpage (%p %ld@%lu)\n",
		page, PAGE_CACHE_SIZE, page->index);
C
Chuck Lever 已提交
585 586 587
	nfs_inc_stats(inode, NFSIOS_VFSREADPAGE);
	nfs_add_stats(inode, NFSIOS_READPAGES, 1);

L
Linus Torvalds 已提交
588 589 590 591 592 593 594 595 596
	/*
	 * Try to flush any pending writes to the file..
	 *
	 * NOTE! Because we own the page lock, there cannot
	 * be any new pending writes generated at this point
	 * for this page (other pages can be written to).
	 */
	error = nfs_wb_page(inode, page);
	if (error)
597 598 599
		goto out_unlock;
	if (PageUptodate(page))
		goto out_unlock;
L
Linus Torvalds 已提交
600

601 602
	error = -ESTALE;
	if (NFS_STALE(inode))
603
		goto out_unlock;
604

L
Linus Torvalds 已提交
605
	if (file == NULL) {
606
		error = -EBADF;
607
		ctx = nfs_find_open_context(inode, NULL, FMODE_READ);
L
Linus Torvalds 已提交
608
		if (ctx == NULL)
609
			goto out_unlock;
L
Linus Torvalds 已提交
610
	} else
611
		ctx = get_nfs_open_context(nfs_file_open_context(file));
L
Linus Torvalds 已提交
612

613 614 615 616 617 618
	if (!IS_SYNC(inode)) {
		error = nfs_readpage_from_fscache(ctx, inode, page);
		if (error == 0)
			goto out;
	}

619 620
	error = nfs_readpage_async(ctx, inode, page);

621
out:
L
Linus Torvalds 已提交
622 623
	put_nfs_open_context(ctx);
	return error;
624
out_unlock:
L
Linus Torvalds 已提交
625 626 627 628 629
	unlock_page(page);
	return error;
}

struct nfs_readdesc {
630
	struct nfs_pageio_descriptor *pgio;
L
Linus Torvalds 已提交
631 632 633 634 635 636 637 638 639 640
	struct nfs_open_context *ctx;
};

static int
readpage_async_filler(void *data, struct page *page)
{
	struct nfs_readdesc *desc = (struct nfs_readdesc *)data;
	struct inode *inode = page->mapping->host;
	struct nfs_page *new;
	unsigned int len;
641 642
	int error;

643
	len = nfs_page_length(page);
L
Linus Torvalds 已提交
644 645
	if (len == 0)
		return nfs_return_empty_page(page);
646

L
Linus Torvalds 已提交
647
	new = nfs_create_request(desc->ctx, inode, page, 0, len);
648 649 650
	if (IS_ERR(new))
		goto out_error;

L
Linus Torvalds 已提交
651
	if (len < PAGE_CACHE_SIZE)
652
		zero_user_segment(page, len, PAGE_CACHE_SIZE);
653 654 655 656
	if (!nfs_pageio_add_request(desc->pgio, new)) {
		error = desc->pgio->pg_error;
		goto out_unlock;
	}
L
Linus Torvalds 已提交
657
	return 0;
658 659 660 661 662 663
out_error:
	error = PTR_ERR(new);
	SetPageError(page);
out_unlock:
	unlock_page(page);
	return error;
L
Linus Torvalds 已提交
664 665 666 667 668
}

int nfs_readpages(struct file *filp, struct address_space *mapping,
		struct list_head *pages, unsigned nr_pages)
{
669
	struct nfs_pageio_descriptor pgio;
L
Linus Torvalds 已提交
670
	struct nfs_readdesc desc = {
671
		.pgio = &pgio,
L
Linus Torvalds 已提交
672 673
	};
	struct inode *inode = mapping->host;
674
	unsigned long npages;
675
	int ret = -ESTALE;
L
Linus Torvalds 已提交
676 677 678 679 680

	dprintk("NFS: nfs_readpages (%s/%Ld %d)\n",
			inode->i_sb->s_id,
			(long long)NFS_FILEID(inode),
			nr_pages);
C
Chuck Lever 已提交
681
	nfs_inc_stats(inode, NFSIOS_VFSREADPAGES);
L
Linus Torvalds 已提交
682

683 684 685
	if (NFS_STALE(inode))
		goto out;

L
Linus Torvalds 已提交
686
	if (filp == NULL) {
687
		desc.ctx = nfs_find_open_context(inode, NULL, FMODE_READ);
L
Linus Torvalds 已提交
688 689 690
		if (desc.ctx == NULL)
			return -EBADF;
	} else
691
		desc.ctx = get_nfs_open_context(nfs_file_open_context(filp));
692 693 694 695 696 697 698 699 700

	/* attempt to read as many of the pages as possible from the cache
	 * - this returns -ENOBUFS immediately if the cookie is negative
	 */
	ret = nfs_readpages_from_fscache(desc.ctx, inode, mapping,
					 pages, &nr_pages);
	if (ret == 0)
		goto read_complete; /* all pages were read */

701
	nfs_pageio_init_read(&pgio, inode);
702

L
Linus Torvalds 已提交
703
	ret = read_cache_pages(mapping, pages, readpage_async_filler, &desc);
704 705 706 707

	nfs_pageio_complete(&pgio);
	npages = (pgio.pg_bytes_written + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
	nfs_add_stats(inode, NFSIOS_READPAGES, npages);
708
read_complete:
L
Linus Torvalds 已提交
709
	put_nfs_open_context(desc.ctx);
710
out:
L
Linus Torvalds 已提交
711 712 713
	return ret;
}

D
David Howells 已提交
714
int __init nfs_init_readpagecache(void)
L
Linus Torvalds 已提交
715 716 717 718
{
	nfs_rdata_cachep = kmem_cache_create("nfs_read_data",
					     sizeof(struct nfs_read_data),
					     0, SLAB_HWCACHE_ALIGN,
719
					     NULL);
L
Linus Torvalds 已提交
720 721 722
	if (nfs_rdata_cachep == NULL)
		return -ENOMEM;

723 724
	nfs_rdata_mempool = mempool_create_slab_pool(MIN_POOL_READ,
						     nfs_rdata_cachep);
L
Linus Torvalds 已提交
725 726 727 728 729 730
	if (nfs_rdata_mempool == NULL)
		return -ENOMEM;

	return 0;
}

731
void nfs_destroy_readpagecache(void)
L
Linus Torvalds 已提交
732 733
{
	mempool_destroy(nfs_rdata_mempool);
734
	kmem_cache_destroy(nfs_rdata_cachep);
L
Linus Torvalds 已提交
735
}