write.c 57.5 KB
Newer Older
1
// SPDX-License-Identifier: GPL-2.0-only
L
Linus Torvalds 已提交
2 3 4
/*
 * linux/fs/nfs/write.c
 *
5
 * Write file data over NFS.
L
Linus Torvalds 已提交
6 7 8 9 10 11 12 13 14 15
 *
 * Copyright (C) 1996, 1997, Olaf Kirch <okir@monad.swb.de>
 */

#include <linux/types.h>
#include <linux/slab.h>
#include <linux/mm.h>
#include <linux/pagemap.h>
#include <linux/file.h>
#include <linux/writeback.h>
16
#include <linux/swap.h>
17
#include <linux/migrate.h>
L
Linus Torvalds 已提交
18 19 20 21 22

#include <linux/sunrpc/clnt.h>
#include <linux/nfs_fs.h>
#include <linux/nfs_mount.h>
#include <linux/nfs_page.h>
23
#include <linux/backing-dev.h>
24
#include <linux/export.h>
25 26
#include <linux/freezer.h>
#include <linux/wait.h>
27
#include <linux/iversion.h>
28

29
#include <linux/uaccess.h>
30
#include <linux/sched/mm.h>
L
Linus Torvalds 已提交
31 32

#include "delegation.h"
33
#include "internal.h"
34
#include "iostat.h"
35
#include "nfs4_fs.h"
36
#include "fscache.h"
37
#include "pnfs.h"
L
Linus Torvalds 已提交
38

39 40
#include "nfstrace.h"

L
Linus Torvalds 已提交
41 42 43 44 45
#define NFSDBG_FACILITY		NFSDBG_PAGECACHE

#define MIN_POOL_WRITE		(32)
#define MIN_POOL_COMMIT		(4)

46 47 48 49 50 51
struct nfs_io_completion {
	void (*complete)(void *data);
	void *data;
	struct kref refcount;
};

L
Linus Torvalds 已提交
52 53 54
/*
 * Local function declarations
 */
55
static void nfs_redirty_request(struct nfs_page *req);
56
static const struct rpc_call_ops nfs_commit_ops;
57
static const struct nfs_pgio_completion_ops nfs_async_write_completion_ops;
58
static const struct nfs_commit_completion_ops nfs_commit_completion_ops;
59
static const struct nfs_rw_ops nfs_rw_write_ops;
60
static void nfs_inode_remove_request(struct nfs_page *req);
61
static void nfs_clear_request_commit(struct nfs_page *req);
62 63
static void nfs_init_cinfo_from_inode(struct nfs_commit_info *cinfo,
				      struct inode *inode);
64 65 66
static struct nfs_page *
nfs_page_search_commits_for_head_request_locked(struct nfs_inode *nfsi,
						struct page *page);
L
Linus Torvalds 已提交
67

68
static struct kmem_cache *nfs_wdata_cachep;
69
static mempool_t *nfs_wdata_mempool;
70
static struct kmem_cache *nfs_cdata_cachep;
L
Linus Torvalds 已提交
71 72
static mempool_t *nfs_commit_mempool;

N
NeilBrown 已提交
73
struct nfs_commit_data *nfs_commitdata_alloc(bool never_fail)
L
Linus Torvalds 已提交
74
{
N
NeilBrown 已提交
75
	struct nfs_commit_data *p;
76

N
NeilBrown 已提交
77 78 79 80 81 82 83 84 85 86 87 88 89 90
	if (never_fail)
		p = mempool_alloc(nfs_commit_mempool, GFP_NOIO);
	else {
		/* It is OK to do some reclaim, not no safe to wait
		 * for anything to be returned to the pool.
		 * mempool_alloc() cannot handle that particular combination,
		 * so we need two separate attempts.
		 */
		p = mempool_alloc(nfs_commit_mempool, GFP_NOWAIT);
		if (!p)
			p = kmem_cache_alloc(nfs_cdata_cachep, GFP_NOIO |
					     __GFP_NOWARN | __GFP_NORETRY);
		if (!p)
			return NULL;
L
Linus Torvalds 已提交
91
	}
N
NeilBrown 已提交
92 93 94

	memset(p, 0, sizeof(*p));
	INIT_LIST_HEAD(&p->pages);
L
Linus Torvalds 已提交
95 96
	return p;
}
97
EXPORT_SYMBOL_GPL(nfs_commitdata_alloc);
L
Linus Torvalds 已提交
98

99
void nfs_commit_free(struct nfs_commit_data *p)
L
Linus Torvalds 已提交
100 101 102
{
	mempool_free(p, nfs_commit_mempool);
}
103
EXPORT_SYMBOL_GPL(nfs_commit_free);
L
Linus Torvalds 已提交
104

105
static struct nfs_pgio_header *nfs_writehdr_alloc(void)
106
{
107
	struct nfs_pgio_header *p = mempool_alloc(nfs_wdata_mempool, GFP_KERNEL);
108

109 110
	memset(p, 0, sizeof(*p));
	p->rw_mode = FMODE_WRITE;
111 112
	return p;
}
113

114
static void nfs_writehdr_free(struct nfs_pgio_header *hdr)
115
{
116
	mempool_free(hdr, nfs_wdata_mempool);
117
}
L
Linus Torvalds 已提交
118

119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151
static struct nfs_io_completion *nfs_io_completion_alloc(gfp_t gfp_flags)
{
	return kmalloc(sizeof(struct nfs_io_completion), gfp_flags);
}

static void nfs_io_completion_init(struct nfs_io_completion *ioc,
		void (*complete)(void *), void *data)
{
	ioc->complete = complete;
	ioc->data = data;
	kref_init(&ioc->refcount);
}

static void nfs_io_completion_release(struct kref *kref)
{
	struct nfs_io_completion *ioc = container_of(kref,
			struct nfs_io_completion, refcount);
	ioc->complete(ioc->data);
	kfree(ioc);
}

static void nfs_io_completion_get(struct nfs_io_completion *ioc)
{
	if (ioc != NULL)
		kref_get(&ioc->refcount);
}

static void nfs_io_completion_put(struct nfs_io_completion *ioc)
{
	if (ioc != NULL)
		kref_put(&ioc->refcount, nfs_io_completion_release);
}

152 153 154 155 156 157 158 159
static struct nfs_page *
nfs_page_private_request(struct page *page)
{
	if (!PagePrivate(page))
		return NULL;
	return (struct nfs_page *)page_private(page);
}

160 161 162 163 164 165 166
/*
 * nfs_page_find_head_request_locked - find head request associated with @page
 *
 * must be called while holding the inode lock.
 *
 * returns matching head request with reference held, or NULL if not found.
 */
167
static struct nfs_page *
168
nfs_page_find_private_request(struct page *page)
169
{
170
	struct address_space *mapping = page_file_mapping(page);
171
	struct nfs_page *req;
172

173 174
	if (!PagePrivate(page))
		return NULL;
175
	spin_lock(&mapping->private_lock);
176
	req = nfs_page_private_request(page);
177 178
	if (req) {
		WARN_ON_ONCE(req->wb_head != req);
179
		kref_get(&req->wb_kref);
180
	}
181
	spin_unlock(&mapping->private_lock);
182 183
	return req;
}
184

185 186 187 188 189 190 191 192
static struct nfs_page *
nfs_page_find_swap_request(struct page *page)
{
	struct inode *inode = page_file_mapping(page)->host;
	struct nfs_inode *nfsi = NFS_I(inode);
	struct nfs_page *req = NULL;
	if (!PageSwapCache(page))
		return NULL;
193
	mutex_lock(&nfsi->commit_mutex);
194 195 196 197 198 199 200 201
	if (PageSwapCache(page)) {
		req = nfs_page_search_commits_for_head_request_locked(nfsi,
			page);
		if (req) {
			WARN_ON_ONCE(req->wb_head != req);
			kref_get(&req->wb_kref);
		}
	}
202
	mutex_unlock(&nfsi->commit_mutex);
203 204 205
	return req;
}

206 207 208 209 210 211
/*
 * nfs_page_find_head_request - find head request associated with @page
 *
 * returns matching head request with reference held, or NULL if not found.
 */
static struct nfs_page *nfs_page_find_head_request(struct page *page)
212
{
213
	struct nfs_page *req;
214

215 216 217
	req = nfs_page_find_private_request(page);
	if (!req)
		req = nfs_page_find_swap_request(page);
218 219 220
	return req;
}

L
Linus Torvalds 已提交
221 222 223
/* Adjust the file length if we're writing beyond the end */
static void nfs_grow_file(struct page *page, unsigned int offset, unsigned int count)
{
224
	struct inode *inode = page_file_mapping(page)->host;
225 226
	loff_t end, i_size;
	pgoff_t end_index;
L
Linus Torvalds 已提交
227

228 229
	spin_lock(&inode->i_lock);
	i_size = i_size_read(inode);
230
	end_index = (i_size - 1) >> PAGE_SHIFT;
H
Huang Ying 已提交
231
	if (i_size > 0 && page_index(page) < end_index)
232
		goto out;
233
	end = page_file_offset(page) + ((loff_t)offset+count);
L
Linus Torvalds 已提交
234
	if (i_size >= end)
235
		goto out;
L
Linus Torvalds 已提交
236
	i_size_write(inode, end);
237
	NFS_I(inode)->cache_validity &= ~NFS_INO_INVALID_SIZE;
238 239 240
	nfs_inc_stats(inode, NFSIOS_EXTENDWRITE);
out:
	spin_unlock(&inode->i_lock);
L
Linus Torvalds 已提交
241 242
}

243
/* A writeback failed: mark the page as bad, and invalidate the page cache */
244
static void nfs_set_pageerror(struct address_space *mapping)
245
{
246
	nfs_zap_mapping(mapping->host, mapping);
247 248
}

249 250 251 252 253 254
static void nfs_mapping_set_error(struct page *page, int error)
{
	SetPageError(page);
	mapping_set_error(page_file_mapping(page), error);
}

255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297
/*
 * nfs_page_group_search_locked
 * @head - head request of page group
 * @page_offset - offset into page
 *
 * Search page group with head @head to find a request that contains the
 * page offset @page_offset.
 *
 * Returns a pointer to the first matching nfs request, or NULL if no
 * match is found.
 *
 * Must be called with the page group lock held
 */
static struct nfs_page *
nfs_page_group_search_locked(struct nfs_page *head, unsigned int page_offset)
{
	struct nfs_page *req;

	req = head;
	do {
		if (page_offset >= req->wb_pgbase &&
		    page_offset < (req->wb_pgbase + req->wb_bytes))
			return req;

		req = req->wb_this_page;
	} while (req != head);

	return NULL;
}

/*
 * nfs_page_group_covers_page
 * @head - head request of page group
 *
 * Return true if the page group with head @head covers the whole page,
 * returns false otherwise
 */
static bool nfs_page_group_covers_page(struct nfs_page *req)
{
	struct nfs_page *tmp;
	unsigned int pos = 0;
	unsigned int len = nfs_page_length(req->wb_page);

298
	nfs_page_group_lock(req);
299

300
	for (;;) {
301
		tmp = nfs_page_group_search_locked(req->wb_head, pos);
302 303 304 305
		if (!tmp)
			break;
		pos = tmp->wb_pgbase + tmp->wb_bytes;
	}
306 307

	nfs_page_group_unlock(req);
308
	return pos >= len;
309 310
}

L
Linus Torvalds 已提交
311 312 313
/* We can set the PG_uptodate flag if we see that a write request
 * covers the full page.
 */
314
static void nfs_mark_uptodate(struct nfs_page *req)
L
Linus Torvalds 已提交
315
{
316
	if (PageUptodate(req->wb_page))
L
Linus Torvalds 已提交
317
		return;
318
	if (!nfs_page_group_covers_page(req))
L
Linus Torvalds 已提交
319
		return;
320
	SetPageUptodate(req->wb_page);
L
Linus Torvalds 已提交
321 322 323 324
}

static int wb_priority(struct writeback_control *wbc)
{
325
	int ret = 0;
326

327 328 329
	if (wbc->sync_mode == WB_SYNC_ALL)
		ret = FLUSH_COND_STABLE;
	return ret;
L
Linus Torvalds 已提交
330 331
}

332 333 334 335 336 337 338 339 340 341
/*
 * NFS congestion control
 */

int nfs_congestion_kb;

#define NFS_CONGESTION_ON_THRESH 	(nfs_congestion_kb >> (PAGE_SHIFT-10))
#define NFS_CONGESTION_OFF_THRESH	\
	(NFS_CONGESTION_ON_THRESH - (NFS_CONGESTION_ON_THRESH >> 2))

342
static void nfs_set_page_writeback(struct page *page)
343
{
344 345
	struct inode *inode = page_file_mapping(page)->host;
	struct nfs_server *nfss = NFS_SERVER(inode);
346 347
	int ret = test_set_page_writeback(page);

348
	WARN_ON_ONCE(ret != 0);
349

350
	if (atomic_long_inc_return(&nfss->writeback) >
351 352
			NFS_CONGESTION_ON_THRESH)
		set_bdi_congested(inode_to_bdi(inode), BLK_RW_ASYNC);
353 354
}

355
static void nfs_end_page_writeback(struct nfs_page *req)
356
{
357
	struct inode *inode = page_file_mapping(req->wb_page)->host;
358
	struct nfs_server *nfss = NFS_SERVER(inode);
359
	bool is_done;
360

361 362 363
	is_done = nfs_page_group_sync_on_bit(req, PG_WB_END);
	nfs_unlock_request(req);
	if (!is_done)
364 365 366
		return;

	end_page_writeback(req->wb_page);
367
	if (atomic_long_dec_return(&nfss->writeback) < NFS_CONGESTION_OFF_THRESH)
368
		clear_bdi_congested(inode_to_bdi(inode), BLK_RW_ASYNC);
369 370
}

371 372 373 374 375 376 377 378 379
/*
 * nfs_unroll_locks_and_wait -  unlock all newly locked reqs and wait on @req
 *
 * this is a helper function for nfs_lock_and_join_requests
 *
 * @inode - inode associated with request page group, must be holding inode lock
 * @head  - head request of page group, must be holding head lock
 * @req   - request that couldn't lock and needs to wait on the req bit lock
 *
380 381
 * NOTE: this must be called holding page_group bit lock
 *       which will be released before returning.
382 383 384
 *
 * returns 0 on success, < 0 on error.
 */
385 386
static void
nfs_unroll_locks(struct inode *inode, struct nfs_page *head,
387
			  struct nfs_page *req)
388 389 390 391
{
	struct nfs_page *tmp;

	/* relinquish all the locks successfully grabbed this run */
392 393 394 395 396
	for (tmp = head->wb_this_page ; tmp != req; tmp = tmp->wb_this_page) {
		if (!kref_read(&tmp->wb_kref))
			continue;
		nfs_unlock_and_release_request(tmp);
	}
397 398 399 400 401 402 403 404 405 406 407 408 409 410
}

/*
 * nfs_destroy_unlinked_subrequests - destroy recently unlinked subrequests
 *
 * @destroy_list - request list (using wb_this_page) terminated by @old_head
 * @old_head - the old head of the list
 *
 * All subrequests must be locked and removed from all lists, so at this point
 * they are only "active" in this function, and possibly in nfs_wait_on_request
 * with a reference held by some other context.
 */
static void
nfs_destroy_unlinked_subrequests(struct nfs_page *destroy_list,
411 412
				 struct nfs_page *old_head,
				 struct inode *inode)
413 414 415 416 417 418 419 420 421 422 423 424
{
	while (destroy_list) {
		struct nfs_page *subreq = destroy_list;

		destroy_list = (subreq->wb_this_page == old_head) ?
				   NULL : subreq->wb_this_page;

		WARN_ON_ONCE(old_head != subreq->wb_head);

		/* make sure old group is not used */
		subreq->wb_this_page = subreq;

425 426
		clear_bit(PG_REMOVE, &subreq->wb_flags);

427 428 429
		/* Note: races with nfs_page_group_destroy() */
		if (!kref_read(&subreq->wb_kref)) {
			/* Check if we raced with nfs_page_group_destroy() */
430
			if (test_and_clear_bit(PG_TEARDOWN, &subreq->wb_flags))
431 432 433
				nfs_free_request(subreq);
			continue;
		}
434

435 436 437 438
		subreq->wb_head = subreq;

		if (test_and_clear_bit(PG_INODE_REF, &subreq->wb_flags)) {
			nfs_release_request(subreq);
439
			atomic_long_dec(&NFS_I(inode)->nrequests);
440
		}
441 442 443 444

		/* subreq is now totally disconnected from page group or any
		 * write / commit lists. last chance to wake any waiters */
		nfs_unlock_and_release_request(subreq);
445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466
	}
}

/*
 * nfs_lock_and_join_requests - join all subreqs to the head req and return
 *                              a locked reference, cancelling any pending
 *                              operations for this page.
 *
 * @page - the page used to lookup the "page group" of nfs_page structures
 *
 * This function joins all sub requests to the head request by first
 * locking all requests in the group, cancelling any pending operations
 * and finally updating the head request to cover the whole range covered by
 * the (former) group.  All subrequests are removed from any write or commit
 * lists, unlinked from the group and destroyed.
 *
 * Returns a locked, referenced pointer to the head request - which after
 * this call is guaranteed to be the only request associated with the page.
 * Returns NULL if no requests are found for @page, or a ERR_PTR if an
 * error was encountered.
 */
static struct nfs_page *
467
nfs_lock_and_join_requests(struct page *page)
468
{
469
	struct inode *inode = page_file_mapping(page)->host;
470 471 472
	struct nfs_page *head, *subreq;
	struct nfs_page *destroy_list = NULL;
	unsigned int total_bytes;
473 474
	int ret;

475 476 477 478 479 480
try_again:
	/*
	 * A reference is taken only on the head request which acts as a
	 * reference to the whole page group - the group will not be destroyed
	 * until the head reference is released.
	 */
481 482
	head = nfs_page_find_head_request(page);
	if (!head)
483 484
		return NULL;

485 486 487 488 489 490 491 492
	/* lock the page head first in order to avoid an ABBA inefficiency */
	if (!nfs_lock_request(head)) {
		ret = nfs_wait_on_request(head);
		nfs_release_request(head);
		if (ret < 0)
			return ERR_PTR(ret);
		goto try_again;
	}
493 494 495 496 497 498

	/* Ensure that nobody removed the request before we locked it */
	if (head != nfs_page_private_request(page) && !PageSwapCache(page)) {
		nfs_unlock_and_release_request(head);
		goto try_again;
	}
499

500
	ret = nfs_page_group_lock(head);
501 502
	if (ret < 0)
		goto release_request;
503 504

	/* lock each request in the page group */
505 506 507
	total_bytes = head->wb_bytes;
	for (subreq = head->wb_this_page; subreq != head;
			subreq = subreq->wb_this_page) {
508

509 510 511
		if (!kref_get_unless_zero(&subreq->wb_kref)) {
			if (subreq->wb_offset == head->wb_offset + total_bytes)
				total_bytes += subreq->wb_bytes;
512
			continue;
513 514
		}

515
		while (!nfs_lock_request(subreq)) {
516
			/*
517 518
			 * Unlock page to allow nfs_page_group_sync_on_bit()
			 * to succeed
519
			 */
520 521 522
			nfs_page_group_unlock(head);
			ret = nfs_wait_on_request(subreq);
			if (!ret)
523
				ret = nfs_page_group_lock(head);
524 525
			if (ret < 0) {
				nfs_unroll_locks(inode, head, subreq);
526
				nfs_release_request(subreq);
527
				goto release_request;
528
			}
529
		}
530 531
		/*
		 * Subrequests are always contiguous, non overlapping
532
		 * and in order - but may be repeated (mirrored writes).
533
		 */
534 535 536 537 538 539
		if (subreq->wb_offset == (head->wb_offset + total_bytes)) {
			/* keep track of how many bytes this group covers */
			total_bytes += subreq->wb_bytes;
		} else if (WARN_ON_ONCE(subreq->wb_offset < head->wb_offset ||
			    ((subreq->wb_offset + subreq->wb_bytes) >
			     (head->wb_offset + total_bytes)))) {
540
			nfs_page_group_unlock(head);
541
			nfs_unroll_locks(inode, head, subreq);
542
			nfs_unlock_and_release_request(subreq);
543 544
			ret = -EIO;
			goto release_request;
545
		}
546
	}
547 548 549 550 551

	/* Now that all requests are locked, make sure they aren't on any list.
	 * Commit list removal accounting is done after locks are dropped */
	subreq = head;
	do {
552
		nfs_clear_request_commit(subreq);
553 554 555 556 557 558 559 560 561 562 563 564
		subreq = subreq->wb_this_page;
	} while (subreq != head);

	/* unlink subrequests from head, destroy them later */
	if (head->wb_this_page != head) {
		/* destroy list will be terminated by head */
		destroy_list = head->wb_this_page;
		head->wb_this_page = head;

		/* change head request to cover whole range that
		 * the former page group covered */
		head->wb_bytes = total_bytes;
565
	}
566

567 568 569 570
	/* Postpone destruction of this request */
	if (test_and_clear_bit(PG_REMOVE, &head->wb_flags)) {
		set_bit(PG_INODE_REF, &head->wb_flags);
		kref_get(&head->wb_kref);
571
		atomic_long_inc(&NFS_I(inode)->nrequests);
572 573
	}

574 575
	nfs_page_group_unlock(head);

576
	nfs_destroy_unlinked_subrequests(destroy_list, head, inode);
577

578 579 580 581 582 583
	/* Did we lose a race with nfs_inode_remove_request()? */
	if (!(PagePrivate(page) || PageSwapCache(page))) {
		nfs_unlock_and_release_request(head);
		return NULL;
	}

584
	/* still holds ref on head from nfs_page_find_head_request
585 586
	 * and still has lock on head from lock loop */
	return head;
587 588 589 590

release_request:
	nfs_unlock_and_release_request(head);
	return ERR_PTR(ret);
591 592
}

593
static void nfs_write_error(struct nfs_page *req, int error)
594
{
595
	nfs_set_pageerror(page_file_mapping(req->wb_page));
596
	trace_nfs_write_error(req, error);
597
	nfs_mapping_set_error(req->wb_page, error);
598
	nfs_inode_remove_request(req);
599
	nfs_end_page_writeback(req);
600
	nfs_release_request(req);
601 602
}

603 604 605 606 607
/*
 * Find an associated nfs write request, and prepare to flush it out
 * May return an error if the user signalled nfs_wait_on_request().
 */
static int nfs_page_async_flush(struct nfs_pageio_descriptor *pgio,
608
				struct page *page)
609 610 611 612
{
	struct nfs_page *req;
	int ret = 0;

613
	req = nfs_lock_and_join_requests(page);
614 615 616 617 618 619
	if (!req)
		goto out;
	ret = PTR_ERR(req);
	if (IS_ERR(req))
		goto out;

620 621
	nfs_set_page_writeback(page);
	WARN_ON_ONCE(test_bit(PG_CLEAN, &req->wb_flags));
622

623
	/* If there is a fatal error that covers this write, just exit */
624 625
	ret = pgio->pg_error;
	if (nfs_error_is_fatal_on_server(ret))
626 627
		goto out_launder;

628
	ret = 0;
629
	if (!nfs_pageio_add_request(pgio, req)) {
630
		ret = pgio->pg_error;
631
		/*
632
		 * Remove the problematic req upon fatal errors on the server
633 634
		 */
		if (nfs_error_is_fatal(ret)) {
635
			if (nfs_error_is_fatal_on_server(ret))
636
				goto out_launder;
637 638
		} else
			ret = -EAGAIN;
639
		nfs_redirty_request(req);
640
		pgio->pg_error = 0;
641 642 643
	} else
		nfs_add_stats(page_file_mapping(page)->host,
				NFSIOS_WRITEPAGES, 1);
644 645
out:
	return ret;
646
out_launder:
647
	nfs_write_error(req, ret);
648
	return 0;
649 650
}

651
static int nfs_do_writepage(struct page *page, struct writeback_control *wbc,
652
			    struct nfs_pageio_descriptor *pgio)
L
Linus Torvalds 已提交
653
{
654
	int ret;
L
Linus Torvalds 已提交
655

H
Huang Ying 已提交
656
	nfs_pageio_cond_complete(pgio, page_index(page));
657
	ret = nfs_page_async_flush(pgio, page);
658 659
	if (ret == -EAGAIN) {
		redirty_page_for_writepage(wbc, page);
660
		ret = AOP_WRITEPAGE_ACTIVATE;
661 662
	}
	return ret;
663
}
664

665 666 667
/*
 * Write an mmapped page to the server.
 */
668
static int nfs_writepage_locked(struct page *page,
669
				struct writeback_control *wbc)
670 671
{
	struct nfs_pageio_descriptor pgio;
672
	struct inode *inode = page_file_mapping(page)->host;
673
	int err;
674

675
	nfs_inc_stats(inode, NFSIOS_VFSWRITEPAGE);
676
	nfs_pageio_init_write(&pgio, inode, 0,
677
				false, &nfs_async_write_completion_ops);
678
	err = nfs_do_writepage(page, wbc, &pgio);
679
	pgio.pg_error = 0;
680 681 682
	nfs_pageio_complete(&pgio);
	if (err < 0)
		return err;
683
	if (nfs_error_is_fatal(pgio.pg_error))
684 685
		return pgio.pg_error;
	return 0;
686 687 688 689
}

int nfs_writepage(struct page *page, struct writeback_control *wbc)
{
690
	int ret;
691

692
	ret = nfs_writepage_locked(page, wbc);
693 694
	if (ret != AOP_WRITEPAGE_ACTIVATE)
		unlock_page(page);
695 696 697 698 699 700 701
	return ret;
}

static int nfs_writepages_callback(struct page *page, struct writeback_control *wbc, void *data)
{
	int ret;

702
	ret = nfs_do_writepage(page, wbc, data);
703 704
	if (ret != AOP_WRITEPAGE_ACTIVATE)
		unlock_page(page);
705
	return ret;
L
Linus Torvalds 已提交
706 707
}

708 709 710 711 712
static void nfs_io_completion_commit(void *inode)
{
	nfs_commit_inode(inode, 0);
}

L
Linus Torvalds 已提交
713 714 715
int nfs_writepages(struct address_space *mapping, struct writeback_control *wbc)
{
	struct inode *inode = mapping->host;
716
	struct nfs_pageio_descriptor pgio;
717
	struct nfs_io_completion *ioc;
L
Linus Torvalds 已提交
718 719
	int err;

720 721
	nfs_inc_stats(inode, NFSIOS_VFSWRITEPAGES);

722
	ioc = nfs_io_completion_alloc(GFP_KERNEL);
723 724 725
	if (ioc)
		nfs_io_completion_init(ioc, nfs_io_completion_commit, inode);

726 727
	nfs_pageio_init_write(&pgio, inode, wb_priority(wbc), false,
				&nfs_async_write_completion_ops);
728
	pgio.pg_io_completion = ioc;
729
	err = write_cache_pages(mapping, wbc, nfs_writepages_callback, &pgio);
730
	pgio.pg_error = 0;
731
	nfs_pageio_complete(&pgio);
732
	nfs_io_completion_put(ioc);
733

734
	if (err < 0)
735 736
		goto out_err;
	err = pgio.pg_error;
737
	if (nfs_error_is_fatal(err))
738
		goto out_err;
739
	return 0;
740 741
out_err:
	return err;
L
Linus Torvalds 已提交
742 743 744 745 746
}

/*
 * Insert a write request into an inode
 */
747
static void nfs_inode_add_request(struct inode *inode, struct nfs_page *req)
L
Linus Torvalds 已提交
748
{
749
	struct address_space *mapping = page_file_mapping(req->wb_page);
L
Linus Torvalds 已提交
750
	struct nfs_inode *nfsi = NFS_I(inode);
751

752 753
	WARN_ON_ONCE(req->wb_this_page != req);

754
	/* Lock the request! */
755
	nfs_lock_request(req);
756

757 758 759 760
	/*
	 * Swap-space should not get truncated. Hence no need to plug the race
	 * with invalidate/truncate.
	 */
761 762
	spin_lock(&mapping->private_lock);
	if (!nfs_have_writebacks(inode) &&
763 764
	    NFS_PROTO(inode)->have_delegation(inode, FMODE_WRITE))
		inode_inc_iversion_raw(inode);
765 766 767 768 769
	if (likely(!PageSwapCache(req->wb_page))) {
		set_bit(PG_MAPPED, &req->wb_flags);
		SetPagePrivate(req->wb_page);
		set_page_private(req->wb_page, (unsigned long)req);
	}
770
	spin_unlock(&mapping->private_lock);
771
	atomic_long_inc(&nfsi->nrequests);
772
	/* this a head request for a page group - mark it as having an
773 774 775
	 * extra reference so sub groups can follow suit.
	 * This flag also informs pgio layer when to bump nrequests when
	 * adding subrequests. */
776
	WARN_ON(test_and_set_bit(PG_INODE_REF, &req->wb_flags));
777
	kref_get(&req->wb_kref);
L
Linus Torvalds 已提交
778 779 780
}

/*
781
 * Remove a write request from an inode
L
Linus Torvalds 已提交
782 783 784
 */
static void nfs_inode_remove_request(struct nfs_page *req)
{
785 786
	struct address_space *mapping = page_file_mapping(req->wb_page);
	struct inode *inode = mapping->host;
L
Linus Torvalds 已提交
787
	struct nfs_inode *nfsi = NFS_I(inode);
788
	struct nfs_page *head;
L
Linus Torvalds 已提交
789

790 791 792
	if (nfs_page_group_sync_on_bit(req, PG_REMOVE)) {
		head = req->wb_head;

793
		spin_lock(&mapping->private_lock);
794
		if (likely(head->wb_page && !PageSwapCache(head->wb_page))) {
795 796 797 798
			set_page_private(head->wb_page, 0);
			ClearPagePrivate(head->wb_page);
			clear_bit(PG_MAPPED, &head->wb_flags);
		}
799
		spin_unlock(&mapping->private_lock);
800
	}
801

802
	if (test_and_clear_bit(PG_INODE_REF, &req->wb_flags)) {
803
		nfs_release_request(req);
804 805
		atomic_long_dec(&nfsi->nrequests);
	}
L
Linus Torvalds 已提交
806 807
}

808
static void
F
Fred 已提交
809
nfs_mark_request_dirty(struct nfs_page *req)
810
{
811 812
	if (req->wb_page)
		__set_page_dirty_nobuffers(req->wb_page);
813 814
}

815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846
/*
 * nfs_page_search_commits_for_head_request_locked
 *
 * Search through commit lists on @inode for the head request for @page.
 * Must be called while holding the inode (which is cinfo) lock.
 *
 * Returns the head request if found, or NULL if not found.
 */
static struct nfs_page *
nfs_page_search_commits_for_head_request_locked(struct nfs_inode *nfsi,
						struct page *page)
{
	struct nfs_page *freq, *t;
	struct nfs_commit_info cinfo;
	struct inode *inode = &nfsi->vfs_inode;

	nfs_init_cinfo_from_inode(&cinfo, inode);

	/* search through pnfs commit lists */
	freq = pnfs_search_commit_reqs(inode, &cinfo, page);
	if (freq)
		return freq->wb_head;

	/* Linearly search the commit list for the correct request */
	list_for_each_entry_safe(freq, t, &cinfo.mds->list, wb_list) {
		if (freq->wb_page == page)
			return freq->wb_head;
	}

	return NULL;
}

847 848 849 850 851 852 853 854 855 856
/**
 * nfs_request_add_commit_list_locked - add request to a commit list
 * @req: pointer to a struct nfs_page
 * @dst: commit list head
 * @cinfo: holds list lock and accounting info
 *
 * This sets the PG_CLEAN bit, updates the cinfo count of
 * number of outstanding requests requiring a commit as well as
 * the MM page stats.
 *
857 858
 * The caller must hold NFS_I(cinfo->inode)->commit_mutex, and the
 * nfs_page lock.
859 860 861 862 863 864 865
 */
void
nfs_request_add_commit_list_locked(struct nfs_page *req, struct list_head *dst,
			    struct nfs_commit_info *cinfo)
{
	set_bit(PG_CLEAN, &req->wb_flags);
	nfs_list_add_request(req, dst);
866
	atomic_long_inc(&cinfo->mds->ncommit);
867 868 869
}
EXPORT_SYMBOL_GPL(nfs_request_add_commit_list_locked);

870 871 872
/**
 * nfs_request_add_commit_list - add request to a commit list
 * @req: pointer to a struct nfs_page
873
 * @cinfo: holds list lock and accounting info
874
 *
875
 * This sets the PG_CLEAN bit, updates the cinfo count of
876 877 878
 * number of outstanding requests requiring a commit as well as
 * the MM page stats.
 *
879
 * The caller must _not_ hold the cinfo->lock, but must be
880
 * holding the nfs_page lock.
L
Linus Torvalds 已提交
881
 */
882
void
883
nfs_request_add_commit_list(struct nfs_page *req, struct nfs_commit_info *cinfo)
L
Linus Torvalds 已提交
884
{
885
	mutex_lock(&NFS_I(cinfo->inode)->commit_mutex);
886
	nfs_request_add_commit_list_locked(req, &cinfo->mds->list, cinfo);
887
	mutex_unlock(&NFS_I(cinfo->inode)->commit_mutex);
888 889
	if (req->wb_page)
		nfs_mark_page_unstable(req->wb_page, cinfo);
L
Linus Torvalds 已提交
890
}
891 892 893 894 895
EXPORT_SYMBOL_GPL(nfs_request_add_commit_list);

/**
 * nfs_request_remove_commit_list - Remove request from a commit list
 * @req: pointer to a nfs_page
896
 * @cinfo: holds list lock and accounting info
897
 *
898
 * This clears the PG_CLEAN bit, and updates the cinfo's count of
899 900 901
 * number of outstanding requests requiring a commit
 * It does not update the MM page stats.
 *
902
 * The caller _must_ hold the cinfo->lock and the nfs_page lock.
903 904
 */
void
905 906
nfs_request_remove_commit_list(struct nfs_page *req,
			       struct nfs_commit_info *cinfo)
907 908 909 910
{
	if (!test_and_clear_bit(PG_CLEAN, &(req)->wb_flags))
		return;
	nfs_list_remove_request(req);
911
	atomic_long_dec(&cinfo->mds->ncommit);
912 913 914
}
EXPORT_SYMBOL_GPL(nfs_request_remove_commit_list);

915 916 917
static void nfs_init_cinfo_from_inode(struct nfs_commit_info *cinfo,
				      struct inode *inode)
{
918
	cinfo->inode = inode;
919 920
	cinfo->mds = &NFS_I(inode)->commit_info;
	cinfo->ds = pnfs_get_ds_info(inode);
921
	cinfo->dreq = NULL;
922
	cinfo->completion_ops = &nfs_commit_completion_ops;
923 924 925 926 927 928
}

void nfs_init_cinfo(struct nfs_commit_info *cinfo,
		    struct inode *inode,
		    struct nfs_direct_req *dreq)
{
929 930 931 932
	if (dreq)
		nfs_init_cinfo_from_dreq(cinfo, dreq);
	else
		nfs_init_cinfo_from_inode(cinfo, inode);
933 934
}
EXPORT_SYMBOL_GPL(nfs_init_cinfo);
935 936 937 938

/*
 * Add a request to the inode's commit list.
 */
939
void
940
nfs_mark_request_commit(struct nfs_page *req, struct pnfs_layout_segment *lseg,
941
			struct nfs_commit_info *cinfo, u32 ds_commit_idx)
942
{
943
	if (pnfs_mark_request_commit(req, lseg, cinfo, ds_commit_idx))
944
		return;
945
	nfs_request_add_commit_list(req, cinfo);
946
}
947

948 949 950
static void
nfs_clear_page_commit(struct page *page)
{
951
	dec_node_page_state(page, NR_UNSTABLE_NFS);
952 953
	dec_wb_stat(&inode_to_bdi(page_file_mapping(page)->host)->wb,
		    WB_RECLAIMABLE);
954 955
}

956
/* Called holding the request lock on @req */
957
static void
958 959
nfs_clear_request_commit(struct nfs_page *req)
{
960
	if (test_bit(PG_CLEAN, &req->wb_flags)) {
961 962
		struct nfs_open_context *ctx = nfs_req_openctx(req);
		struct inode *inode = d_inode(ctx->dentry);
963
		struct nfs_commit_info cinfo;
964

965
		nfs_init_cinfo_from_inode(&cinfo, inode);
966
		mutex_lock(&NFS_I(inode)->commit_mutex);
967 968
		if (!pnfs_clear_request_commit(req, &cinfo)) {
			nfs_request_remove_commit_list(req, &cinfo);
969
		}
970
		mutex_unlock(&NFS_I(inode)->commit_mutex);
971
		nfs_clear_page_commit(req->wb_page);
972 973 974
	}
}

975
int nfs_write_need_commit(struct nfs_pgio_header *hdr)
976
{
977
	if (hdr->verf.committed == NFS_DATA_SYNC)
978
		return hdr->lseg == NULL;
979
	return hdr->verf.committed != NFS_FILE_SYNC;
980 981
}

982 983 984 985 986
static void nfs_async_write_init(struct nfs_pgio_header *hdr)
{
	nfs_io_completion_get(hdr->io_completion);
}

987
static void nfs_write_completion(struct nfs_pgio_header *hdr)
988
{
989
	struct nfs_commit_info cinfo;
990 991 992 993
	unsigned long bytes = 0;

	if (test_bit(NFS_IOHDR_REDO, &hdr->flags))
		goto out;
994
	nfs_init_cinfo_from_inode(&cinfo, hdr->inode);
995 996 997 998 999 1000 1001
	while (!list_empty(&hdr->pages)) {
		struct nfs_page *req = nfs_list_entry(hdr->pages.next);

		bytes += req->wb_bytes;
		nfs_list_remove_request(req);
		if (test_bit(NFS_IOHDR_ERROR, &hdr->flags) &&
		    (hdr->good_bytes < bytes)) {
1002
			nfs_set_pageerror(page_file_mapping(req->wb_page));
1003
			trace_nfs_comp_error(req, hdr->error);
1004
			nfs_mapping_set_error(req->wb_page, hdr->error);
1005 1006
			goto remove_req;
		}
1007
		if (nfs_write_need_commit(hdr)) {
1008 1009
			/* Reset wb_nio, since the write was successful. */
			req->wb_nio = 0;
1010
			memcpy(&req->wb_verf, &hdr->verf.verifier, sizeof(req->wb_verf));
1011
			nfs_mark_request_commit(req, hdr->lseg, &cinfo,
1012
				hdr->pgio_mirror_idx);
1013 1014 1015 1016 1017
			goto next;
		}
remove_req:
		nfs_inode_remove_request(req);
next:
1018
		nfs_end_page_writeback(req);
1019
		nfs_release_request(req);
1020 1021
	}
out:
1022
	nfs_io_completion_put(hdr->io_completion);
1023
	hdr->release(hdr);
1024
}
L
Linus Torvalds 已提交
1025

1026
unsigned long
1027
nfs_reqs_to_commit(struct nfs_commit_info *cinfo)
1028
{
1029
	return atomic_long_read(&cinfo->mds->ncommit);
1030 1031
}

1032
/* NFS_I(cinfo->inode)->commit_mutex held by caller */
1033
int
1034 1035
nfs_scan_commit_list(struct list_head *src, struct list_head *dst,
		     struct nfs_commit_info *cinfo, int max)
1036
{
1037
	struct nfs_page *req, *tmp;
1038 1039
	int ret = 0;

1040 1041
restart:
	list_for_each_entry_safe(req, tmp, src, wb_list) {
1042
		kref_get(&req->wb_kref);
1043 1044
		if (!nfs_lock_request(req)) {
			int status;
1045 1046 1047 1048 1049 1050 1051

			/* Prevent deadlock with nfs_lock_and_join_requests */
			if (!list_empty(dst)) {
				nfs_release_request(req);
				continue;
			}
			/* Ensure we make progress to prevent livelock */
1052 1053 1054 1055 1056 1057
			mutex_unlock(&NFS_I(cinfo->inode)->commit_mutex);
			status = nfs_wait_on_request(req);
			nfs_release_request(req);
			mutex_lock(&NFS_I(cinfo->inode)->commit_mutex);
			if (status < 0)
				break;
1058
			goto restart;
1059
		}
1060
		nfs_request_remove_commit_list(req, cinfo);
1061
		clear_bit(PG_COMMIT_TO_DS, &req->wb_flags);
1062 1063
		nfs_list_add_request(req, dst);
		ret++;
1064
		if ((ret == max) && !cinfo->dreq)
1065
			break;
1066
		cond_resched();
1067 1068
	}
	return ret;
1069
}
1070
EXPORT_SYMBOL_GPL(nfs_scan_commit_list);
1071

L
Linus Torvalds 已提交
1072 1073 1074
/*
 * nfs_scan_commit - Scan an inode for commit requests
 * @inode: NFS inode to scan
1075 1076
 * @dst: mds destination list
 * @cinfo: mds and ds lists of reqs ready to commit
L
Linus Torvalds 已提交
1077 1078 1079 1080
 *
 * Moves requests from the inode's 'commit' request list.
 * The requests are *not* checked to ensure that they form a contiguous set.
 */
1081
int
1082 1083
nfs_scan_commit(struct inode *inode, struct list_head *dst,
		struct nfs_commit_info *cinfo)
L
Linus Torvalds 已提交
1084
{
1085
	int ret = 0;
1086

1087 1088
	if (!atomic_long_read(&cinfo->mds->ncommit))
		return 0;
1089
	mutex_lock(&NFS_I(cinfo->inode)->commit_mutex);
1090
	if (atomic_long_read(&cinfo->mds->ncommit) > 0) {
1091
		const int max = INT_MAX;
1092

1093 1094 1095
		ret = nfs_scan_commit_list(&cinfo->mds->list, dst,
					   cinfo, max);
		ret += pnfs_scan_commit_lists(inode, cinfo, max - ret);
1096
	}
1097
	mutex_unlock(&NFS_I(cinfo->inode)->commit_mutex);
1098
	return ret;
L
Linus Torvalds 已提交
1099
}
1100

L
Linus Torvalds 已提交
1101
/*
1102 1103
 * Search for an existing write request, and attempt to update
 * it to reflect a new dirty region on a given page.
L
Linus Torvalds 已提交
1104
 *
1105 1106
 * If the attempt fails, then the existing request is flushed out
 * to disk.
L
Linus Torvalds 已提交
1107
 */
1108 1109 1110 1111
static struct nfs_page *nfs_try_to_update_request(struct inode *inode,
		struct page *page,
		unsigned int offset,
		unsigned int bytes)
L
Linus Torvalds 已提交
1112
{
1113 1114 1115 1116 1117
	struct nfs_page *req;
	unsigned int rqend;
	unsigned int end;
	int error;

L
Linus Torvalds 已提交
1118 1119
	end = offset + bytes;

1120 1121 1122
	req = nfs_lock_and_join_requests(page);
	if (IS_ERR_OR_NULL(req))
		return req;
L
Linus Torvalds 已提交
1123

1124 1125 1126 1127 1128 1129 1130 1131 1132
	rqend = req->wb_offset + req->wb_bytes;
	/*
	 * Tell the caller to flush out the request if
	 * the offsets are non-contiguous.
	 * Note: nfs_flush_incompatible() will already
	 * have flushed out requests having wrong owners.
	 */
	if (offset > rqend || end < req->wb_offset)
		goto out_flushme;
L
Linus Torvalds 已提交
1133 1134 1135 1136 1137 1138 1139 1140

	/* Okay, the request matches. Update the region */
	if (offset < req->wb_offset) {
		req->wb_offset = offset;
		req->wb_pgbase = offset;
	}
	if (end > rqend)
		req->wb_bytes = end - req->wb_offset;
1141 1142
	else
		req->wb_bytes = rqend - req->wb_offset;
1143
	req->wb_nio = 0;
1144 1145
	return req;
out_flushme:
1146 1147 1148 1149 1150 1151 1152
	/*
	 * Note: we mark the request dirty here because
	 * nfs_lock_and_join_requests() cannot preserve
	 * commit flags, so we have to replay the write.
	 */
	nfs_mark_request_dirty(req);
	nfs_unlock_and_release_request(req);
1153
	error = nfs_wb_page(inode, page);
1154
	return (error < 0) ? ERR_PTR(error) : NULL;
1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166
}

/*
 * Try to update an existing write request, or create one if there is none.
 *
 * Note: Should always be called with the Page Lock held to prevent races
 * if we have to add a new request. Also assumes that the caller has
 * already called nfs_flush_incompatible() if necessary.
 */
static struct nfs_page * nfs_setup_write_request(struct nfs_open_context* ctx,
		struct page *page, unsigned int offset, unsigned int bytes)
{
1167
	struct inode *inode = page_file_mapping(page)->host;
1168
	struct nfs_page	*req;
L
Linus Torvalds 已提交
1169

1170 1171 1172
	req = nfs_try_to_update_request(inode, page, offset, bytes);
	if (req != NULL)
		goto out;
1173
	req = nfs_create_request(ctx, page, offset, bytes);
1174 1175
	if (IS_ERR(req))
		goto out;
1176
	nfs_inode_add_request(inode, req);
1177
out:
1178
	return req;
L
Linus Torvalds 已提交
1179 1180
}

1181 1182 1183 1184 1185 1186 1187 1188 1189 1190
static int nfs_writepage_setup(struct nfs_open_context *ctx, struct page *page,
		unsigned int offset, unsigned int count)
{
	struct nfs_page	*req;

	req = nfs_setup_write_request(ctx, page, offset, count);
	if (IS_ERR(req))
		return PTR_ERR(req);
	/* Update file length */
	nfs_grow_file(page, offset, count);
1191
	nfs_mark_uptodate(req);
1192
	nfs_mark_request_dirty(req);
1193
	nfs_unlock_and_release_request(req);
1194 1195 1196
	return 0;
}

L
Linus Torvalds 已提交
1197 1198
int nfs_flush_incompatible(struct file *file, struct page *page)
{
1199
	struct nfs_open_context *ctx = nfs_file_open_context(file);
1200
	struct nfs_lock_context *l_ctx;
1201
	struct file_lock_context *flctx = file_inode(file)->i_flctx;
L
Linus Torvalds 已提交
1202
	struct nfs_page	*req;
1203
	int do_flush, status;
L
Linus Torvalds 已提交
1204 1205 1206 1207 1208 1209 1210 1211
	/*
	 * Look for a request corresponding to this page. If there
	 * is one, and it belongs to another file, we flush it out
	 * before we try to copy anything into the page. Do this
	 * due to the lack of an ACCESS-type call in NFSv2.
	 * Also do the same if we find a request from an existing
	 * dropped page.
	 */
1212
	do {
1213
		req = nfs_page_find_head_request(page);
1214 1215
		if (req == NULL)
			return 0;
1216
		l_ctx = req->wb_lock_context;
1217
		do_flush = req->wb_page != page ||
1218
			!nfs_match_open_context(nfs_req_openctx(req), ctx);
1219 1220 1221
		if (l_ctx && flctx &&
		    !(list_empty_careful(&flctx->flc_posix) &&
		      list_empty_careful(&flctx->flc_flock))) {
1222
			do_flush |= l_ctx->lockowner != current->files;
1223
		}
L
Linus Torvalds 已提交
1224
		nfs_release_request(req);
1225 1226
		if (!do_flush)
			return 0;
1227
		status = nfs_wb_page(page_file_mapping(page)->host, page);
1228 1229
	} while (status == 0);
	return status;
L
Linus Torvalds 已提交
1230 1231
}

1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246
/*
 * Avoid buffered writes when a open context credential's key would
 * expire soon.
 *
 * Returns -EACCES if the key will expire within RPC_KEY_EXPIRE_FAIL.
 *
 * Return 0 and set a credential flag which triggers the inode to flush
 * and performs  NFS_FILE_SYNC writes if the key will expired within
 * RPC_KEY_EXPIRE_TIMEO.
 */
int
nfs_key_timeout_notify(struct file *filp, struct inode *inode)
{
	struct nfs_open_context *ctx = nfs_file_open_context(filp);

1247 1248 1249 1250 1251
	if (nfs_ctx_key_to_expire(ctx, inode) &&
	    !ctx->ll_cred)
		/* Already expired! */
		return -EACCES;
	return 0;
1252 1253 1254 1255 1256
}

/*
 * Test if the open context credential key is marked to expire soon.
 */
1257
bool nfs_ctx_key_to_expire(struct nfs_open_context *ctx, struct inode *inode)
1258
{
1259
	struct rpc_auth *auth = NFS_SERVER(inode)->client->cl_auth;
1260 1261
	struct rpc_cred *cred = ctx->ll_cred;
	struct auth_cred acred = {
1262
		.cred = ctx->cred,
1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276
	};

	if (cred && !cred->cr_ops->crmatch(&acred, cred, 0)) {
		put_rpccred(cred);
		ctx->ll_cred = NULL;
		cred = NULL;
	}
	if (!cred)
		cred = auth->au_ops->lookup_cred(auth, &acred, 0);
	if (!cred || IS_ERR(cred))
		return true;
	ctx->ll_cred = cred;
	return !!(cred->cr_ops->crkey_timeout &&
		  cred->cr_ops->crkey_timeout(cred));
1277 1278
}

1279 1280 1281 1282 1283
/*
 * If the page cache is marked as unsafe or invalid, then we can't rely on
 * the PageUptodate() flag. In this case, we will need to turn off
 * write optimisations that depend on the page contents being correct.
 */
1284
static bool nfs_write_pageuptodate(struct page *page, struct inode *inode)
1285
{
1286 1287
	struct nfs_inode *nfsi = NFS_I(inode);

1288 1289
	if (nfs_have_delegated_attributes(inode))
		goto out;
1290
	if (nfsi->cache_validity & NFS_INO_REVAL_PAGECACHE)
1291
		return false;
1292
	smp_rmb();
1293
	if (test_bit(NFS_INO_INVALIDATING, &nfsi->flags))
1294 1295
		return false;
out:
1296 1297
	if (nfsi->cache_validity & NFS_INO_INVALID_DATA)
		return false;
1298
	return PageUptodate(page) != 0;
1299 1300
}

1301 1302 1303 1304 1305 1306 1307
static bool
is_whole_file_wrlock(struct file_lock *fl)
{
	return fl->fl_start == 0 && fl->fl_end == OFFSET_MAX &&
			fl->fl_type == F_WRLCK;
}

1308 1309 1310 1311 1312
/* If we know the page is up to date, and we're not using byte range locks (or
 * if we have the whole file locked for writing), it may be more efficient to
 * extend the write to cover the entire page in order to avoid fragmentation
 * inefficiencies.
 *
1313 1314
 * If the file is opened for synchronous writes then we can just skip the rest
 * of the checks.
1315 1316 1317
 */
static int nfs_can_extend_write(struct file *file, struct page *page, struct inode *inode)
{
1318 1319 1320 1321
	int ret;
	struct file_lock_context *flctx = inode->i_flctx;
	struct file_lock *fl;

1322 1323
	if (file->f_flags & O_DSYNC)
		return 0;
1324 1325
	if (!nfs_write_pageuptodate(page, inode))
		return 0;
1326 1327
	if (NFS_PROTO(inode)->have_delegation(inode, FMODE_WRITE))
		return 1;
1328 1329
	if (!flctx || (list_empty_careful(&flctx->flc_flock) &&
		       list_empty_careful(&flctx->flc_posix)))
1330
		return 1;
1331 1332 1333

	/* Check to see if there are whole file write locks */
	ret = 0;
1334
	spin_lock(&flctx->flc_lock);
1335 1336 1337 1338 1339 1340
	if (!list_empty(&flctx->flc_posix)) {
		fl = list_first_entry(&flctx->flc_posix, struct file_lock,
					fl_list);
		if (is_whole_file_wrlock(fl))
			ret = 1;
	} else if (!list_empty(&flctx->flc_flock)) {
1341 1342 1343 1344 1345
		fl = list_first_entry(&flctx->flc_flock, struct file_lock,
					fl_list);
		if (fl->fl_type == F_WRLCK)
			ret = 1;
	}
1346
	spin_unlock(&flctx->flc_lock);
1347
	return ret;
1348 1349
}

L
Linus Torvalds 已提交
1350 1351 1352 1353 1354 1355 1356 1357 1358
/*
 * Update and possibly write a cached page of an NFS file.
 *
 * XXX: Keep an eye on generic_file_read to make sure it doesn't do bad
 * things with a page scheduled for an RPC call (e.g. invalidate it).
 */
int nfs_updatepage(struct file *file, struct page *page,
		unsigned int offset, unsigned int count)
{
1359
	struct nfs_open_context *ctx = nfs_file_open_context(file);
1360 1361
	struct address_space *mapping = page_file_mapping(page);
	struct inode	*inode = mapping->host;
L
Linus Torvalds 已提交
1362 1363
	int		status = 0;

1364 1365
	nfs_inc_stats(inode, NFSIOS_VFSUPDATEPAGE);

1366 1367
	dprintk("NFS:       nfs_updatepage(%pD2 %d@%lld)\n",
		file, count, (long long)(page_file_offset(page) + offset));
L
Linus Torvalds 已提交
1368

1369 1370 1371
	if (!count)
		goto out;

1372
	if (nfs_can_extend_write(file, page, inode)) {
1373
		count = max(count + offset, nfs_page_length(page));
L
Linus Torvalds 已提交
1374 1375 1376
		offset = 0;
	}

1377
	status = nfs_writepage_setup(ctx, page, offset, count);
1378
	if (status < 0)
1379
		nfs_set_pageerror(mapping);
1380 1381
	else
		__set_page_dirty_nobuffers(page);
1382
out:
1383
	dprintk("NFS:       nfs_updatepage returns %d (isize %lld)\n",
L
Linus Torvalds 已提交
1384 1385 1386 1387
			status, (long long)i_size_read(inode));
	return status;
}

1388
static int flush_task_priority(int how)
L
Linus Torvalds 已提交
1389 1390 1391 1392 1393 1394 1395 1396 1397 1398
{
	switch (how & (FLUSH_HIGHPRI|FLUSH_LOWPRI)) {
		case FLUSH_HIGHPRI:
			return RPC_PRIORITY_HIGH;
		case FLUSH_LOWPRI:
			return RPC_PRIORITY_LOW;
	}
	return RPC_PRIORITY_NORMAL;
}

1399 1400
static void nfs_initiate_write(struct nfs_pgio_header *hdr,
			       struct rpc_message *msg,
1401
			       const struct nfs_rpc_ops *rpc_ops,
1402
			       struct rpc_task_setup *task_setup_data, int how)
L
Linus Torvalds 已提交
1403
{
1404
	int priority = flush_task_priority(how);
1405

1406
	task_setup_data->priority = priority;
1407
	rpc_ops->write_setup(hdr, msg, &task_setup_data->rpc_client);
1408 1409
	trace_nfs_initiate_write(hdr->inode, hdr->io_start, hdr->good_bytes,
				 hdr->args.stable);
1410 1411
}

F
Fred 已提交
1412 1413 1414 1415 1416 1417
/* If a nfs_flush_* function fails, it should remove reqs from @head and
 * call this on each, which will prepare them to be retried on next
 * writeback using standard nfs.
 */
static void nfs_redirty_request(struct nfs_page *req)
{
1418 1419
	/* Bump the transmission count */
	req->wb_nio++;
F
Fred 已提交
1420
	nfs_mark_request_dirty(req);
1421
	set_bit(NFS_CONTEXT_RESEND_WRITES, &nfs_req_openctx(req)->flags);
1422
	nfs_end_page_writeback(req);
1423
	nfs_release_request(req);
F
Fred 已提交
1424 1425
}

1426
static void nfs_async_write_error(struct list_head *head, int error)
1427 1428 1429 1430 1431 1432
{
	struct nfs_page	*req;

	while (!list_empty(head)) {
		req = nfs_list_entry(head->next);
		nfs_list_remove_request(req);
1433 1434 1435 1436
		if (nfs_error_is_fatal(error))
			nfs_write_error(req, error);
		else
			nfs_redirty_request(req);
1437 1438 1439
	}
}

1440 1441
static void nfs_async_write_reschedule_io(struct nfs_pgio_header *hdr)
{
1442
	nfs_async_write_error(&hdr->pages, 0);
1443 1444
	filemap_fdatawrite_range(hdr->inode->i_mapping, hdr->args.offset,
			hdr->args.offset + hdr->args.count - 1);
1445 1446
}

1447
static const struct nfs_pgio_completion_ops nfs_async_write_completion_ops = {
1448
	.init_hdr = nfs_async_write_init,
1449 1450
	.error_cleanup = nfs_async_write_error,
	.completion = nfs_write_completion,
1451
	.reschedule_io = nfs_async_write_reschedule_io,
1452 1453
};

1454
void nfs_pageio_init_write(struct nfs_pageio_descriptor *pgio,
1455
			       struct inode *inode, int ioflags, bool force_mds,
1456
			       const struct nfs_pgio_completion_ops *compl_ops)
L
Linus Torvalds 已提交
1457
{
1458
	struct nfs_server *server = NFS_SERVER(inode);
1459
	const struct nfs_pageio_ops *pg_ops = &nfs_pgio_rw_ops;
1460 1461 1462 1463 1464

#ifdef CONFIG_NFS_V4_1
	if (server->pnfs_curr_ld && !force_mds)
		pg_ops = server->pnfs_curr_ld->pg_write_ops;
#endif
1465
	nfs_pageio_init(pgio, inode, pg_ops, compl_ops, &nfs_rw_write_ops,
1466
			server->wsize, ioflags);
1467
}
1468
EXPORT_SYMBOL_GPL(nfs_pageio_init_write);
L
Linus Torvalds 已提交
1469

1470 1471
void nfs_pageio_reset_write_mds(struct nfs_pageio_descriptor *pgio)
{
1472 1473
	struct nfs_pgio_mirror *mirror;

1474 1475 1476
	if (pgio->pg_ops && pgio->pg_ops->pg_cleanup)
		pgio->pg_ops->pg_cleanup(pgio);

1477
	pgio->pg_ops = &nfs_pgio_rw_ops;
1478 1479 1480 1481 1482

	nfs_pageio_stop_mirroring(pgio);

	mirror = &pgio->pg_mirrors[0];
	mirror->pg_bsize = NFS_SERVER(pgio->pg_inode)->wsize;
1483
}
1484
EXPORT_SYMBOL_GPL(nfs_pageio_reset_write_mds);
1485

L
Linus Torvalds 已提交
1486

1487 1488 1489 1490 1491 1492 1493
void nfs_commit_prepare(struct rpc_task *task, void *calldata)
{
	struct nfs_commit_data *data = calldata;

	NFS_PROTO(data->inode)->commit_rpc_prepare(task, data);
}

1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504
/*
 * Special version of should_remove_suid() that ignores capabilities.
 */
static int nfs_should_remove_suid(const struct inode *inode)
{
	umode_t mode = inode->i_mode;
	int kill = 0;

	/* suid always must be killed */
	if (unlikely(mode & S_ISUID))
		kill = ATTR_KILL_SUID;
1505

1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517
	/*
	 * sgid without any exec bits is just a mandatory locking mark; leave
	 * it alone.  If some exec bits are set, it's a real sgid; kill it.
	 */
	if (unlikely((mode & S_ISGID) && (mode & S_IXGRP)))
		kill |= ATTR_KILL_SGID;

	if (unlikely(kill && S_ISREG(mode)))
		return kill;

	return 0;
}
1518

1519 1520 1521 1522 1523
static void nfs_writeback_check_extend(struct nfs_pgio_header *hdr,
		struct nfs_fattr *fattr)
{
	struct nfs_pgio_args *argp = &hdr->args;
	struct nfs_pgio_res *resp = &hdr->res;
1524
	u64 size = argp->offset + resp->count;
1525 1526

	if (!(fattr->valid & NFS_ATTR_FATTR_SIZE))
1527 1528 1529
		fattr->size = size;
	if (nfs_size_to_loff_t(fattr->size) < i_size_read(hdr->inode)) {
		fattr->valid &= ~NFS_ATTR_FATTR_SIZE;
1530
		return;
1531 1532
	}
	if (size != fattr->size)
1533 1534 1535
		return;
	/* Set attribute barrier */
	nfs_fattr_set_barrier(fattr);
1536 1537
	/* ...and update size */
	fattr->valid |= NFS_ATTR_FATTR_SIZE;
1538 1539 1540 1541
}

void nfs_writeback_update_inode(struct nfs_pgio_header *hdr)
{
1542
	struct nfs_fattr *fattr = &hdr->fattr;
1543 1544 1545 1546 1547 1548 1549 1550 1551
	struct inode *inode = hdr->inode;

	spin_lock(&inode->i_lock);
	nfs_writeback_check_extend(hdr, fattr);
	nfs_post_op_update_inode_force_wcc_locked(inode, fattr);
	spin_unlock(&inode->i_lock);
}
EXPORT_SYMBOL_GPL(nfs_writeback_update_inode);

L
Linus Torvalds 已提交
1552 1553 1554
/*
 * This function is called when the WRITE call is complete.
 */
1555 1556
static int nfs_writeback_done(struct rpc_task *task,
			      struct nfs_pgio_header *hdr,
1557
			      struct inode *inode)
L
Linus Torvalds 已提交
1558
{
1559
	int status;
L
Linus Torvalds 已提交
1560

1561 1562 1563 1564 1565 1566 1567
	/*
	 * ->write_done will attempt to use post-op attributes to detect
	 * conflicting writes by other clients.  A strict interpretation
	 * of close-to-open would allow us to continue caching even if
	 * another writer had changed the file, but some applications
	 * depend on tighter cache coherency when writing.
	 */
1568
	status = NFS_PROTO(inode)->write_done(task, hdr);
1569
	if (status != 0)
1570
		return status;
1571

1572
	nfs_add_stats(inode, NFSIOS_SERVERWRITTENBYTES, hdr->res.count);
1573 1574
	trace_nfs_writeback_done(inode, task->tk_status,
				 hdr->args.offset, hdr->res.verf);
1575

1576 1577
	if (hdr->res.verf->committed < hdr->args.stable &&
	    task->tk_status >= 0) {
L
Linus Torvalds 已提交
1578 1579 1580 1581 1582 1583 1584 1585 1586 1587
		/* We tried a write call, but the server did not
		 * commit data to stable storage even though we
		 * requested it.
		 * Note: There is a known bug in Tru64 < 5.0 in which
		 *	 the server reports NFS_DATA_SYNC, but performs
		 *	 NFS_FILE_SYNC. We therefore implement this checking
		 *	 as a dprintk() in order to avoid filling syslog.
		 */
		static unsigned long    complain;

1588
		/* Note this will print the MDS for a DS write */
L
Linus Torvalds 已提交
1589
		if (time_before(complain, jiffies)) {
1590
			dprintk("NFS:       faulty NFS server %s:"
L
Linus Torvalds 已提交
1591
				" (committed = %d) != (stable = %d)\n",
1592
				NFS_SERVER(inode)->nfs_client->cl_hostname,
1593
				hdr->res.verf->committed, hdr->args.stable);
L
Linus Torvalds 已提交
1594 1595 1596
			complain = jiffies + 300 * HZ;
		}
	}
1597 1598

	/* Deal with the suid/sgid bit corner case */
1599 1600 1601 1602 1603
	if (nfs_should_remove_suid(inode)) {
		spin_lock(&inode->i_lock);
		NFS_I(inode)->cache_validity |= NFS_INO_INVALID_OTHER;
		spin_unlock(&inode->i_lock);
	}
1604 1605 1606 1607 1608 1609
	return 0;
}

/*
 * This function is called when the WRITE call is complete.
 */
1610 1611
static void nfs_writeback_result(struct rpc_task *task,
				 struct nfs_pgio_header *hdr)
1612
{
1613 1614
	struct nfs_pgio_args	*argp = &hdr->args;
	struct nfs_pgio_res	*resp = &hdr->res;
1615 1616

	if (resp->count < argp->count) {
L
Linus Torvalds 已提交
1617 1618
		static unsigned long    complain;

1619
		/* This a short write! */
1620
		nfs_inc_stats(hdr->inode, NFSIOS_SHORTWRITE);
1621

L
Linus Torvalds 已提交
1622
		/* Has the server at least made some progress? */
1623 1624 1625 1626 1627 1628
		if (resp->count == 0) {
			if (time_before(complain, jiffies)) {
				printk(KERN_WARNING
				       "NFS: Server wrote zero bytes, expected %u.\n",
				       argp->count);
				complain = jiffies + 300 * HZ;
L
Linus Torvalds 已提交
1629
			}
1630
			nfs_set_pgio_error(hdr, -EIO, argp->offset);
1631
			task->tk_status = -EIO;
1632
			return;
L
Linus Torvalds 已提交
1633
		}
1634 1635 1636 1637 1638 1639 1640

		/* For non rpc-based layout drivers, retry-through-MDS */
		if (!task->tk_ops) {
			hdr->pnfs_error = -EAGAIN;
			return;
		}

1641 1642 1643
		/* Was this an NFSv2 write or an NFSv3 stable write? */
		if (resp->verf->committed != NFS_UNSTABLE) {
			/* Resend from where the server left off */
1644
			hdr->mds_offset += resp->count;
1645 1646 1647 1648 1649 1650 1651 1652
			argp->offset += resp->count;
			argp->pgbase += resp->count;
			argp->count -= resp->count;
		} else {
			/* Resend as a stable write in order to avoid
			 * headaches in the case of a server crash.
			 */
			argp->stable = NFS_FILE_SYNC;
L
Linus Torvalds 已提交
1653
		}
1654
		rpc_restart_call_prepare(task);
L
Linus Torvalds 已提交
1655 1656 1657
	}
}

1658
static int wait_on_commit(struct nfs_mds_commit_info *cinfo)
1659
{
1660 1661
	return wait_var_event_killable(&cinfo->rpcs_out,
				       !atomic_read(&cinfo->rpcs_out));
1662
}
1663

1664 1665 1666
static void nfs_commit_begin(struct nfs_mds_commit_info *cinfo)
{
	atomic_inc(&cinfo->rpcs_out);
1667 1668
}

1669
static void nfs_commit_end(struct nfs_mds_commit_info *cinfo)
1670
{
1671
	if (atomic_dec_and_test(&cinfo->rpcs_out))
1672
		wake_up_var(&cinfo->rpcs_out);
1673 1674
}

1675
void nfs_commitdata_release(struct nfs_commit_data *data)
L
Linus Torvalds 已提交
1676
{
1677 1678
	put_nfs_open_context(data->context);
	nfs_commit_free(data);
L
Linus Torvalds 已提交
1679
}
1680
EXPORT_SYMBOL_GPL(nfs_commitdata_release);
L
Linus Torvalds 已提交
1681

1682
int nfs_initiate_commit(struct rpc_clnt *clnt, struct nfs_commit_data *data,
1683
			const struct nfs_rpc_ops *nfs_ops,
1684
			const struct rpc_call_ops *call_ops,
1685
			int how, int flags)
L
Linus Torvalds 已提交
1686
{
1687
	struct rpc_task *task;
1688
	int priority = flush_task_priority(how);
1689 1690 1691
	struct rpc_message msg = {
		.rpc_argp = &data->args,
		.rpc_resp = &data->res,
1692
		.rpc_cred = data->cred,
1693
	};
1694
	struct rpc_task_setup task_setup_data = {
1695
		.task = &data->task,
1696
		.rpc_client = clnt,
1697
		.rpc_message = &msg,
1698
		.callback_ops = call_ops,
1699
		.callback_data = data,
1700
		.workqueue = nfsiod_workqueue,
1701
		.flags = RPC_TASK_ASYNC | flags,
1702
		.priority = priority,
1703
	};
1704
	/* Set up the initial task struct.  */
1705
	nfs_ops->commit_setup(data, &msg, &task_setup_data.rpc_client);
1706
	trace_nfs_initiate_commit(data);
1707

1708
	dprintk("NFS: initiated commit call\n");
1709 1710 1711 1712 1713 1714 1715 1716 1717

	task = rpc_run_task(&task_setup_data);
	if (IS_ERR(task))
		return PTR_ERR(task);
	if (how & FLUSH_SYNC)
		rpc_wait_for_completion_task(task);
	rpc_put_task(task);
	return 0;
}
1718
EXPORT_SYMBOL_GPL(nfs_initiate_commit);
1719

1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731
static loff_t nfs_get_lwb(struct list_head *head)
{
	loff_t lwb = 0;
	struct nfs_page *req;

	list_for_each_entry(req, head, wb_list)
		if (lwb < (req_offset(req) + req->wb_bytes))
			lwb = req_offset(req) + req->wb_bytes;

	return lwb;
}

1732 1733 1734
/*
 * Set up the argument/result storage required for the RPC call.
 */
1735
void nfs_init_commit(struct nfs_commit_data *data,
1736 1737 1738
		     struct list_head *head,
		     struct pnfs_layout_segment *lseg,
		     struct nfs_commit_info *cinfo)
1739 1740
{
	struct nfs_page *first = nfs_list_entry(head->next);
1741 1742
	struct nfs_open_context *ctx = nfs_req_openctx(first);
	struct inode *inode = d_inode(ctx->dentry);
L
Linus Torvalds 已提交
1743 1744 1745 1746 1747 1748 1749

	/* Set up the RPC argument and reply structs
	 * NB: take care not to mess about with data->commit et al. */

	list_splice_init(head, &data->pages);

	data->inode	  = inode;
1750
	data->cred	  = ctx->cred;
1751
	data->lseg	  = lseg; /* reference transferred */
1752 1753 1754
	/* only set lwb for pnfs commit */
	if (lseg)
		data->lwb = nfs_get_lwb(&data->pages);
1755
	data->mds_ops     = &nfs_commit_ops;
1756
	data->completion_ops = cinfo->completion_ops;
1757
	data->dreq	  = cinfo->dreq;
L
Linus Torvalds 已提交
1758 1759

	data->args.fh     = NFS_FH(data->inode);
1760 1761 1762
	/* Note: we always request a commit of the entire inode */
	data->args.offset = 0;
	data->args.count  = 0;
1763
	data->context     = get_nfs_open_context(ctx);
L
Linus Torvalds 已提交
1764 1765
	data->res.fattr   = &data->fattr;
	data->res.verf    = &data->verf;
1766
	nfs_fattr_init(&data->fattr);
L
Linus Torvalds 已提交
1767
}
1768
EXPORT_SYMBOL_GPL(nfs_init_commit);
L
Linus Torvalds 已提交
1769

1770
void nfs_retry_commit(struct list_head *page_list,
1771
		      struct pnfs_layout_segment *lseg,
1772 1773
		      struct nfs_commit_info *cinfo,
		      u32 ds_commit_idx)
1774 1775 1776 1777 1778 1779
{
	struct nfs_page *req;

	while (!list_empty(page_list)) {
		req = nfs_list_entry(page_list->next);
		nfs_list_remove_request(req);
1780
		nfs_mark_request_commit(req, lseg, cinfo, ds_commit_idx);
1781 1782
		if (!cinfo->dreq)
			nfs_clear_page_commit(req->wb_page);
1783
		nfs_unlock_and_release_request(req);
1784 1785
	}
}
1786
EXPORT_SYMBOL_GPL(nfs_retry_commit);
1787

1788 1789 1790 1791 1792 1793 1794
static void
nfs_commit_resched_write(struct nfs_commit_info *cinfo,
		struct nfs_page *req)
{
	__set_page_dirty_nobuffers(req->wb_page);
}

L
Linus Torvalds 已提交
1795 1796 1797 1798
/*
 * Commit dirty pages
 */
static int
1799 1800
nfs_commit_list(struct inode *inode, struct list_head *head, int how,
		struct nfs_commit_info *cinfo)
L
Linus Torvalds 已提交
1801
{
1802
	struct nfs_commit_data	*data;
L
Linus Torvalds 已提交
1803

1804 1805 1806 1807
	/* another commit raced with us */
	if (list_empty(head))
		return 0;

N
NeilBrown 已提交
1808
	data = nfs_commitdata_alloc(true);
L
Linus Torvalds 已提交
1809 1810

	/* Set up the argument struct */
1811 1812
	nfs_init_commit(data, head, NULL, cinfo);
	atomic_inc(&cinfo->mds->rpcs_out);
1813 1814
	return nfs_initiate_commit(NFS_CLIENT(inode), data, NFS_PROTO(inode),
				   data->mds_ops, how, 0);
1815 1816
}

L
Linus Torvalds 已提交
1817 1818 1819
/*
 * COMMIT call returned
 */
1820
static void nfs_commit_done(struct rpc_task *task, void *calldata)
L
Linus Torvalds 已提交
1821
{
1822
	struct nfs_commit_data	*data = calldata;
L
Linus Torvalds 已提交
1823

1824
        dprintk("NFS: %5u nfs_commit_done (status %d)\n",
L
Linus Torvalds 已提交
1825 1826
                                task->tk_pid, task->tk_status);

1827
	/* Call the NFS version-specific code */
1828
	NFS_PROTO(data->inode)->commit_done(task, data);
1829
	trace_nfs_commit_done(data);
1830 1831
}

1832
static void nfs_commit_release_pages(struct nfs_commit_data *data)
1833
{
1834
	struct nfs_page	*req;
1835
	int status = data->task.tk_status;
1836
	struct nfs_commit_info cinfo;
1837
	struct nfs_server *nfss;
1838

L
Linus Torvalds 已提交
1839 1840 1841
	while (!list_empty(&data->pages)) {
		req = nfs_list_entry(data->pages.next);
		nfs_list_remove_request(req);
1842 1843
		if (req->wb_page)
			nfs_clear_page_commit(req->wb_page);
L
Linus Torvalds 已提交
1844

1845
		dprintk("NFS:       commit (%s/%llu %d@%lld)",
1846 1847
			nfs_req_openctx(req)->dentry->d_sb->s_id,
			(unsigned long long)NFS_FILEID(d_inode(nfs_req_openctx(req)->dentry)),
L
Linus Torvalds 已提交
1848 1849
			req->wb_bytes,
			(long long)req_offset(req));
1850
		if (status < 0) {
1851
			if (req->wb_page) {
1852
				trace_nfs_commit_error(req, status);
1853
				nfs_mapping_set_error(req->wb_page, status);
1854
				nfs_inode_remove_request(req);
1855
			}
1856
			dprintk_cont(", error = %d\n", status);
L
Linus Torvalds 已提交
1857 1858 1859 1860 1861
			goto next;
		}

		/* Okay, COMMIT succeeded, apparently. Check the verifier
		 * returned by the server against all stored verfs. */
1862
		if (!nfs_write_verifier_cmp(&req->wb_verf, &data->verf.verifier)) {
L
Linus Torvalds 已提交
1863
			/* We have a match */
1864 1865
			if (req->wb_page)
				nfs_inode_remove_request(req);
1866
			dprintk_cont(" OK\n");
L
Linus Torvalds 已提交
1867 1868 1869
			goto next;
		}
		/* We have a mismatch. Write the page again */
1870
		dprintk_cont(" mismatch\n");
F
Fred 已提交
1871
		nfs_mark_request_dirty(req);
1872
		set_bit(NFS_CONTEXT_RESEND_WRITES, &nfs_req_openctx(req)->flags);
L
Linus Torvalds 已提交
1873
	next:
1874
		nfs_unlock_and_release_request(req);
1875 1876
		/* Latency breaker */
		cond_resched();
L
Linus Torvalds 已提交
1877
	}
1878 1879
	nfss = NFS_SERVER(data->inode);
	if (atomic_long_read(&nfss->writeback) < NFS_CONGESTION_OFF_THRESH)
1880
		clear_bdi_congested(inode_to_bdi(data->inode), BLK_RW_ASYNC);
1881

1882
	nfs_init_cinfo(&cinfo, data->inode, data->dreq);
1883
	nfs_commit_end(cinfo.mds);
1884 1885 1886 1887
}

static void nfs_commit_release(void *calldata)
{
1888
	struct nfs_commit_data *data = calldata;
1889

1890
	data->completion_ops->completion(data);
1891
	nfs_commitdata_release(calldata);
L
Linus Torvalds 已提交
1892
}
1893 1894

static const struct rpc_call_ops nfs_commit_ops = {
1895
	.rpc_call_prepare = nfs_commit_prepare,
1896 1897 1898
	.rpc_call_done = nfs_commit_done,
	.rpc_release = nfs_commit_release,
};
L
Linus Torvalds 已提交
1899

1900 1901
static const struct nfs_commit_completion_ops nfs_commit_completion_ops = {
	.completion = nfs_commit_release_pages,
1902
	.resched_write = nfs_commit_resched_write,
1903 1904
};

1905 1906
int nfs_generic_commit_list(struct inode *inode, struct list_head *head,
			    int how, struct nfs_commit_info *cinfo)
1907 1908 1909
{
	int status;

1910
	status = pnfs_commit_list(inode, head, how, cinfo);
1911
	if (status == PNFS_NOT_ATTEMPTED)
1912
		status = nfs_commit_list(inode, head, how, cinfo);
1913 1914 1915
	return status;
}

1916 1917
static int __nfs_commit_inode(struct inode *inode, int how,
		struct writeback_control *wbc)
L
Linus Torvalds 已提交
1918 1919
{
	LIST_HEAD(head);
1920
	struct nfs_commit_info cinfo;
1921
	int may_wait = how & FLUSH_SYNC;
1922
	int ret, nscan;
L
Linus Torvalds 已提交
1923

1924
	nfs_init_cinfo_from_inode(&cinfo, inode);
1925
	nfs_commit_begin(cinfo.mds);
1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943
	for (;;) {
		ret = nscan = nfs_scan_commit(inode, &head, &cinfo);
		if (ret <= 0)
			break;
		ret = nfs_generic_commit_list(inode, &head, how, &cinfo);
		if (ret < 0)
			break;
		ret = 0;
		if (wbc && wbc->sync_mode == WB_SYNC_NONE) {
			if (nscan < wbc->nr_to_write)
				wbc->nr_to_write -= nscan;
			else
				wbc->nr_to_write = 0;
		}
		if (nscan < INT_MAX)
			break;
		cond_resched();
	}
1944
	nfs_commit_end(cinfo.mds);
1945 1946 1947 1948 1949 1950 1951 1952
	if (ret || !may_wait)
		return ret;
	return wait_on_commit(cinfo.mds);
}

int nfs_commit_inode(struct inode *inode, int how)
{
	return __nfs_commit_inode(inode, how, NULL);
L
Linus Torvalds 已提交
1953
}
1954
EXPORT_SYMBOL_GPL(nfs_commit_inode);
1955

1956
int nfs_write_inode(struct inode *inode, struct writeback_control *wbc)
1957
{
1958 1959 1960
	struct nfs_inode *nfsi = NFS_I(inode);
	int flags = FLUSH_SYNC;
	int ret = 0;
1961

1962
	if (wbc->sync_mode == WB_SYNC_NONE) {
1963 1964 1965 1966
		/* no commits means nothing needs to be done */
		if (!atomic_long_read(&nfsi->commit_info.ncommit))
			goto check_requests_outstanding;

1967 1968 1969
		/* Don't commit yet if this is a non-blocking flush and there
		 * are a lot of outstanding writes for this mapping.
		 */
1970
		if (mapping_tagged(inode->i_mapping, PAGECACHE_TAG_WRITEBACK))
1971
			goto out_mark_dirty;
1972

1973
		/* don't wait for the COMMIT response */
1974
		flags = 0;
1975 1976
	}

1977 1978 1979 1980 1981 1982 1983 1984 1985 1986
	ret = __nfs_commit_inode(inode, flags, wbc);
	if (!ret) {
		if (flags & FLUSH_SYNC)
			return 0;
	} else if (atomic_long_read(&nfsi->commit_info.ncommit))
		goto out_mark_dirty;

check_requests_outstanding:
	if (!atomic_read(&nfsi->commit_info.rpcs_out))
		return ret;
1987
out_mark_dirty:
1988 1989 1990
	__mark_inode_dirty(inode, I_DIRTY_DATASYNC);
	return ret;
}
1991
EXPORT_SYMBOL_GPL(nfs_write_inode);
1992

1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010
/*
 * Wrapper for filemap_write_and_wait_range()
 *
 * Needed for pNFS in order to ensure data becomes visible to the
 * client.
 */
int nfs_filemap_write_and_wait_range(struct address_space *mapping,
		loff_t lstart, loff_t lend)
{
	int ret;

	ret = filemap_write_and_wait_range(mapping, lstart, lend);
	if (ret == 0)
		ret = pnfs_sync_inode(mapping->host, true);
	return ret;
}
EXPORT_SYMBOL_GPL(nfs_filemap_write_and_wait_range);

2011 2012 2013 2014
/*
 * flush the inode to disk.
 */
int nfs_wb_all(struct inode *inode)
2015
{
2016 2017 2018 2019
	int ret;

	trace_nfs_writeback_inode_enter(inode);

2020
	ret = filemap_write_and_wait(inode->i_mapping);
2021 2022 2023 2024 2025 2026 2027
	if (ret)
		goto out;
	ret = nfs_commit_inode(inode, FLUSH_SYNC);
	if (ret < 0)
		goto out;
	pnfs_sync_inode(inode, true);
	ret = 0;
2028

2029
out:
2030 2031
	trace_nfs_writeback_inode_exit(inode, ret);
	return ret;
2032
}
2033
EXPORT_SYMBOL_GPL(nfs_wb_all);
2034

2035 2036 2037 2038 2039
int nfs_wb_page_cancel(struct inode *inode, struct page *page)
{
	struct nfs_page *req;
	int ret = 0;

2040 2041 2042 2043
	wait_on_page_writeback(page);

	/* blocking call to cancel all requests and join to a single (head)
	 * request */
2044
	req = nfs_lock_and_join_requests(page);
2045 2046 2047 2048 2049 2050 2051 2052 2053 2054

	if (IS_ERR(req)) {
		ret = PTR_ERR(req);
	} else if (req) {
		/* all requests from this page have been cancelled by
		 * nfs_lock_and_join_requests, so just remove the head
		 * request from the inode / page_private pointer and
		 * release it */
		nfs_inode_remove_request(req);
		nfs_unlock_and_release_request(req);
2055
	}
2056

2057 2058 2059
	return ret;
}

2060 2061 2062
/*
 * Write back all requests on one page - we do this before reading it.
 */
2063
int nfs_wb_page(struct inode *inode, struct page *page)
2064
{
2065
	loff_t range_start = page_file_offset(page);
2066
	loff_t range_end = range_start + (loff_t)(PAGE_SIZE - 1);
2067 2068
	struct writeback_control wbc = {
		.sync_mode = WB_SYNC_ALL,
2069
		.nr_to_write = 0,
2070 2071 2072 2073
		.range_start = range_start,
		.range_end = range_end,
	};
	int ret;
2074

2075 2076
	trace_nfs_writeback_page_enter(inode);

2077
	for (;;) {
2078
		wait_on_page_writeback(page);
2079
		if (clear_page_dirty_for_io(page)) {
2080
			ret = nfs_writepage_locked(page, &wbc);
2081 2082
			if (ret < 0)
				goto out_error;
2083
			continue;
2084
		}
2085
		ret = 0;
2086 2087 2088
		if (!PagePrivate(page))
			break;
		ret = nfs_commit_inode(inode, FLUSH_SYNC);
2089
		if (ret < 0)
2090
			goto out_error;
2091
	}
2092
out_error:
2093
	trace_nfs_writeback_page_exit(inode, ret);
2094
	return ret;
2095 2096
}

2097 2098
#ifdef CONFIG_MIGRATION
int nfs_migrate_page(struct address_space *mapping, struct page *newpage,
2099
		struct page *page, enum migrate_mode mode)
2100
{
2101 2102 2103 2104 2105 2106 2107 2108 2109 2110
	/*
	 * If PagePrivate is set, then the page is currently associated with
	 * an in-progress read or write request. Don't try to migrate it.
	 *
	 * FIXME: we could do this in principle, but we'll need a way to ensure
	 *        that we can safely release the inode reference while holding
	 *        the page lock.
	 */
	if (PagePrivate(page))
		return -EBUSY;
2111

2112 2113
	if (!nfs_fscache_release_page(page, GFP_KERNEL))
		return -EBUSY;
2114

2115
	return migrate_page(mapping, newpage, page, mode);
2116 2117 2118
}
#endif

D
David Howells 已提交
2119
int __init nfs_init_writepagecache(void)
L
Linus Torvalds 已提交
2120 2121
{
	nfs_wdata_cachep = kmem_cache_create("nfs_write_data",
2122
					     sizeof(struct nfs_pgio_header),
L
Linus Torvalds 已提交
2123
					     0, SLAB_HWCACHE_ALIGN,
2124
					     NULL);
L
Linus Torvalds 已提交
2125 2126 2127
	if (nfs_wdata_cachep == NULL)
		return -ENOMEM;

2128 2129
	nfs_wdata_mempool = mempool_create_slab_pool(MIN_POOL_WRITE,
						     nfs_wdata_cachep);
L
Linus Torvalds 已提交
2130
	if (nfs_wdata_mempool == NULL)
2131
		goto out_destroy_write_cache;
L
Linus Torvalds 已提交
2132

2133 2134 2135 2136 2137
	nfs_cdata_cachep = kmem_cache_create("nfs_commit_data",
					     sizeof(struct nfs_commit_data),
					     0, SLAB_HWCACHE_ALIGN,
					     NULL);
	if (nfs_cdata_cachep == NULL)
2138
		goto out_destroy_write_mempool;
2139

2140
	nfs_commit_mempool = mempool_create_slab_pool(MIN_POOL_COMMIT,
2141
						      nfs_cdata_cachep);
L
Linus Torvalds 已提交
2142
	if (nfs_commit_mempool == NULL)
2143
		goto out_destroy_commit_cache;
L
Linus Torvalds 已提交
2144

2145 2146 2147 2148 2149 2150 2151 2152 2153 2154 2155 2156 2157 2158 2159 2160
	/*
	 * NFS congestion size, scale with available memory.
	 *
	 *  64MB:    8192k
	 * 128MB:   11585k
	 * 256MB:   16384k
	 * 512MB:   23170k
	 *   1GB:   32768k
	 *   2GB:   46340k
	 *   4GB:   65536k
	 *   8GB:   92681k
	 *  16GB:  131072k
	 *
	 * This allows larger machines to have larger/more transfers.
	 * Limit the default to 256M
	 */
2161
	nfs_congestion_kb = (16*int_sqrt(totalram_pages())) << (PAGE_SHIFT-10);
2162 2163 2164
	if (nfs_congestion_kb > 256*1024)
		nfs_congestion_kb = 256*1024;

L
Linus Torvalds 已提交
2165
	return 0;
2166 2167 2168 2169 2170 2171 2172 2173

out_destroy_commit_cache:
	kmem_cache_destroy(nfs_cdata_cachep);
out_destroy_write_mempool:
	mempool_destroy(nfs_wdata_mempool);
out_destroy_write_cache:
	kmem_cache_destroy(nfs_wdata_cachep);
	return -ENOMEM;
L
Linus Torvalds 已提交
2174 2175
}

2176
void nfs_destroy_writepagecache(void)
L
Linus Torvalds 已提交
2177 2178
{
	mempool_destroy(nfs_commit_mempool);
2179
	kmem_cache_destroy(nfs_cdata_cachep);
L
Linus Torvalds 已提交
2180
	mempool_destroy(nfs_wdata_mempool);
2181
	kmem_cache_destroy(nfs_wdata_cachep);
L
Linus Torvalds 已提交
2182 2183
}

2184 2185 2186
static const struct nfs_rw_ops nfs_rw_write_ops = {
	.rw_alloc_header	= nfs_writehdr_alloc,
	.rw_free_header		= nfs_writehdr_free,
2187 2188
	.rw_done		= nfs_writeback_done,
	.rw_result		= nfs_writeback_result,
2189
	.rw_initiate		= nfs_initiate_write,
2190
};
反馈
建议
客服 返回
顶部