write.c 51.6 KB
Newer Older
L
Linus Torvalds 已提交
1 2 3
/*
 * linux/fs/nfs/write.c
 *
4
 * Write file data over NFS.
L
Linus Torvalds 已提交
5 6 7 8 9 10 11 12 13 14
 *
 * Copyright (C) 1996, 1997, Olaf Kirch <okir@monad.swb.de>
 */

#include <linux/types.h>
#include <linux/slab.h>
#include <linux/mm.h>
#include <linux/pagemap.h>
#include <linux/file.h>
#include <linux/writeback.h>
15
#include <linux/swap.h>
16
#include <linux/migrate.h>
L
Linus Torvalds 已提交
17 18 19 20 21

#include <linux/sunrpc/clnt.h>
#include <linux/nfs_fs.h>
#include <linux/nfs_mount.h>
#include <linux/nfs_page.h>
22
#include <linux/backing-dev.h>
23
#include <linux/export.h>
24

L
Linus Torvalds 已提交
25 26 27
#include <asm/uaccess.h>

#include "delegation.h"
28
#include "internal.h"
C
Chuck Lever 已提交
29
#include "iostat.h"
30
#include "nfs4_fs.h"
31
#include "fscache.h"
32
#include "pnfs.h"
L
Linus Torvalds 已提交
33

34 35
#include "nfstrace.h"

L
Linus Torvalds 已提交
36 37 38 39 40 41 42 43
#define NFSDBG_FACILITY		NFSDBG_PAGECACHE

#define MIN_POOL_WRITE		(32)
#define MIN_POOL_COMMIT		(4)

/*
 * Local function declarations
 */
44
static void nfs_redirty_request(struct nfs_page *req);
45
static const struct rpc_call_ops nfs_commit_ops;
46
static const struct nfs_pgio_completion_ops nfs_async_write_completion_ops;
47
static const struct nfs_commit_completion_ops nfs_commit_completion_ops;
48
static const struct nfs_rw_ops nfs_rw_write_ops;
49
static void nfs_clear_request_commit(struct nfs_page *req);
50 51
static void nfs_init_cinfo_from_inode(struct nfs_commit_info *cinfo,
				      struct inode *inode);
L
Linus Torvalds 已提交
52

53
static struct kmem_cache *nfs_wdata_cachep;
54
static mempool_t *nfs_wdata_mempool;
55
static struct kmem_cache *nfs_cdata_cachep;
L
Linus Torvalds 已提交
56 57
static mempool_t *nfs_commit_mempool;

58
struct nfs_commit_data *nfs_commitdata_alloc(void)
L
Linus Torvalds 已提交
59
{
60
	struct nfs_commit_data *p = mempool_alloc(nfs_commit_mempool, GFP_NOIO);
61

L
Linus Torvalds 已提交
62 63 64 65 66 67
	if (p) {
		memset(p, 0, sizeof(*p));
		INIT_LIST_HEAD(&p->pages);
	}
	return p;
}
68
EXPORT_SYMBOL_GPL(nfs_commitdata_alloc);
L
Linus Torvalds 已提交
69

70
void nfs_commit_free(struct nfs_commit_data *p)
L
Linus Torvalds 已提交
71 72 73
{
	mempool_free(p, nfs_commit_mempool);
}
74
EXPORT_SYMBOL_GPL(nfs_commit_free);
L
Linus Torvalds 已提交
75

76
static struct nfs_pgio_header *nfs_writehdr_alloc(void)
77
{
78
	struct nfs_pgio_header *p = mempool_alloc(nfs_wdata_mempool, GFP_NOIO);
79

80
	if (p)
81 82 83
		memset(p, 0, sizeof(*p));
	return p;
}
84

85
static void nfs_writehdr_free(struct nfs_pgio_header *hdr)
86
{
87
	mempool_free(hdr, nfs_wdata_mempool);
88
}
L
Linus Torvalds 已提交
89

90 91 92 93 94 95 96
static void nfs_context_set_write_error(struct nfs_open_context *ctx, int error)
{
	ctx->error = error;
	smp_wmb();
	set_bit(NFS_CONTEXT_ERROR_WRITE, &ctx->flags);
}

97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128
/*
 * nfs_page_search_commits_for_head_request_locked
 *
 * Search through commit lists on @inode for the head request for @page.
 * Must be called while holding the inode (which is cinfo) lock.
 *
 * Returns the head request if found, or NULL if not found.
 */
static struct nfs_page *
nfs_page_search_commits_for_head_request_locked(struct nfs_inode *nfsi,
						struct page *page)
{
	struct nfs_page *freq, *t;
	struct nfs_commit_info cinfo;
	struct inode *inode = &nfsi->vfs_inode;

	nfs_init_cinfo_from_inode(&cinfo, inode);

	/* search through pnfs commit lists */
	freq = pnfs_search_commit_reqs(inode, &cinfo, page);
	if (freq)
		return freq->wb_head;

	/* Linearly search the commit list for the correct request */
	list_for_each_entry_safe(freq, t, &cinfo.mds->list, wb_list) {
		if (freq->wb_page == page)
			return freq->wb_head;
	}

	return NULL;
}

129 130 131 132 133 134 135
/*
 * nfs_page_find_head_request_locked - find head request associated with @page
 *
 * must be called while holding the inode lock.
 *
 * returns matching head request with reference held, or NULL if not found.
 */
136
static struct nfs_page *
137
nfs_page_find_head_request_locked(struct nfs_inode *nfsi, struct page *page)
138 139 140
{
	struct nfs_page *req = NULL;

141
	if (PagePrivate(page))
142
		req = (struct nfs_page *)page_private(page);
143 144 145
	else if (unlikely(PageSwapCache(page)))
		req = nfs_page_search_commits_for_head_request_locked(nfsi,
			page);
146

147 148
	if (req) {
		WARN_ON_ONCE(req->wb_head != req);
149
		kref_get(&req->wb_kref);
150
	}
151

152 153 154
	return req;
}

155 156 157 158 159 160
/*
 * nfs_page_find_head_request - find head request associated with @page
 *
 * returns matching head request with reference held, or NULL if not found.
 */
static struct nfs_page *nfs_page_find_head_request(struct page *page)
161
{
162
	struct inode *inode = page_file_mapping(page)->host;
163 164
	struct nfs_page *req = NULL;

165
	spin_lock(&inode->i_lock);
166
	req = nfs_page_find_head_request_locked(NFS_I(inode), page);
167
	spin_unlock(&inode->i_lock);
168 169 170
	return req;
}

L
Linus Torvalds 已提交
171 172 173
/* Adjust the file length if we're writing beyond the end */
static void nfs_grow_file(struct page *page, unsigned int offset, unsigned int count)
{
174
	struct inode *inode = page_file_mapping(page)->host;
175 176
	loff_t end, i_size;
	pgoff_t end_index;
L
Linus Torvalds 已提交
177

178 179 180
	spin_lock(&inode->i_lock);
	i_size = i_size_read(inode);
	end_index = (i_size - 1) >> PAGE_CACHE_SHIFT;
181
	if (i_size > 0 && page_file_index(page) < end_index)
182
		goto out;
183
	end = page_file_offset(page) + ((loff_t)offset+count);
L
Linus Torvalds 已提交
184
	if (i_size >= end)
185
		goto out;
L
Linus Torvalds 已提交
186
	i_size_write(inode, end);
187 188 189
	nfs_inc_stats(inode, NFSIOS_EXTENDWRITE);
out:
	spin_unlock(&inode->i_lock);
L
Linus Torvalds 已提交
190 191
}

192 193 194
/* A writeback failed: mark the page as bad, and invalidate the page cache */
static void nfs_set_pageerror(struct page *page)
{
195
	nfs_zap_mapping(page_file_mapping(page)->host, page_file_mapping(page));
196 197
}

198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243
/*
 * nfs_page_group_search_locked
 * @head - head request of page group
 * @page_offset - offset into page
 *
 * Search page group with head @head to find a request that contains the
 * page offset @page_offset.
 *
 * Returns a pointer to the first matching nfs request, or NULL if no
 * match is found.
 *
 * Must be called with the page group lock held
 */
static struct nfs_page *
nfs_page_group_search_locked(struct nfs_page *head, unsigned int page_offset)
{
	struct nfs_page *req;

	WARN_ON_ONCE(head != head->wb_head);
	WARN_ON_ONCE(!test_bit(PG_HEADLOCK, &head->wb_head->wb_flags));

	req = head;
	do {
		if (page_offset >= req->wb_pgbase &&
		    page_offset < (req->wb_pgbase + req->wb_bytes))
			return req;

		req = req->wb_this_page;
	} while (req != head);

	return NULL;
}

/*
 * nfs_page_group_covers_page
 * @head - head request of page group
 *
 * Return true if the page group with head @head covers the whole page,
 * returns false otherwise
 */
static bool nfs_page_group_covers_page(struct nfs_page *req)
{
	struct nfs_page *tmp;
	unsigned int pos = 0;
	unsigned int len = nfs_page_length(req->wb_page);

244
	nfs_page_group_lock(req, false);
245 246 247 248 249 250 251 252 253 254 255 256 257 258 259

	do {
		tmp = nfs_page_group_search_locked(req->wb_head, pos);
		if (tmp) {
			/* no way this should happen */
			WARN_ON_ONCE(tmp->wb_pgbase != pos);
			pos += tmp->wb_bytes - (pos - tmp->wb_pgbase);
		}
	} while (tmp && pos < len);

	nfs_page_group_unlock(req);
	WARN_ON_ONCE(pos > len);
	return pos == len;
}

L
Linus Torvalds 已提交
260 261 262
/* We can set the PG_uptodate flag if we see that a write request
 * covers the full page.
 */
263
static void nfs_mark_uptodate(struct nfs_page *req)
L
Linus Torvalds 已提交
264
{
265
	if (PageUptodate(req->wb_page))
L
Linus Torvalds 已提交
266
		return;
267
	if (!nfs_page_group_covers_page(req))
L
Linus Torvalds 已提交
268
		return;
269
	SetPageUptodate(req->wb_page);
L
Linus Torvalds 已提交
270 271 272 273 274
}

static int wb_priority(struct writeback_control *wbc)
{
	if (wbc->for_reclaim)
275
		return FLUSH_HIGHPRI | FLUSH_STABLE;
276
	if (wbc->for_kupdate || wbc->for_background)
277 278
		return FLUSH_LOWPRI | FLUSH_COND_STABLE;
	return FLUSH_COND_STABLE;
L
Linus Torvalds 已提交
279 280
}

281 282 283 284 285 286 287 288 289 290
/*
 * NFS congestion control
 */

int nfs_congestion_kb;

#define NFS_CONGESTION_ON_THRESH 	(nfs_congestion_kb >> (PAGE_SHIFT-10))
#define NFS_CONGESTION_OFF_THRESH	\
	(NFS_CONGESTION_ON_THRESH - (NFS_CONGESTION_ON_THRESH >> 2))

291
static void nfs_set_page_writeback(struct page *page)
292
{
293
	struct nfs_server *nfss = NFS_SERVER(page_file_mapping(page)->host);
294 295
	int ret = test_set_page_writeback(page);

296
	WARN_ON_ONCE(ret != 0);
297

298 299 300 301
	if (atomic_long_inc_return(&nfss->writeback) >
			NFS_CONGESTION_ON_THRESH) {
		set_bdi_congested(&nfss->backing_dev_info,
					BLK_RW_ASYNC);
302 303 304
	}
}

305
static void nfs_end_page_writeback(struct nfs_page *req)
306
{
307
	struct inode *inode = page_file_mapping(req->wb_page)->host;
308 309
	struct nfs_server *nfss = NFS_SERVER(inode);

310 311 312 313
	if (!nfs_page_group_sync_on_bit(req, PG_WB_END))
		return;

	end_page_writeback(req->wb_page);
P
Peter Zijlstra 已提交
314
	if (atomic_long_dec_return(&nfss->writeback) < NFS_CONGESTION_OFF_THRESH)
315
		clear_bdi_congested(&nfss->backing_dev_info, BLK_RW_ASYNC);
316 317
}

318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454

/* nfs_page_group_clear_bits
 *   @req - an nfs request
 * clears all page group related bits from @req
 */
static void
nfs_page_group_clear_bits(struct nfs_page *req)
{
	clear_bit(PG_TEARDOWN, &req->wb_flags);
	clear_bit(PG_UNLOCKPAGE, &req->wb_flags);
	clear_bit(PG_UPTODATE, &req->wb_flags);
	clear_bit(PG_WB_END, &req->wb_flags);
	clear_bit(PG_REMOVE, &req->wb_flags);
}


/*
 * nfs_unroll_locks_and_wait -  unlock all newly locked reqs and wait on @req
 *
 * this is a helper function for nfs_lock_and_join_requests
 *
 * @inode - inode associated with request page group, must be holding inode lock
 * @head  - head request of page group, must be holding head lock
 * @req   - request that couldn't lock and needs to wait on the req bit lock
 * @nonblock - if true, don't actually wait
 *
 * NOTE: this must be called holding page_group bit lock and inode spin lock
 *       and BOTH will be released before returning.
 *
 * returns 0 on success, < 0 on error.
 */
static int
nfs_unroll_locks_and_wait(struct inode *inode, struct nfs_page *head,
			  struct nfs_page *req, bool nonblock)
	__releases(&inode->i_lock)
{
	struct nfs_page *tmp;
	int ret;

	/* relinquish all the locks successfully grabbed this run */
	for (tmp = head ; tmp != req; tmp = tmp->wb_this_page)
		nfs_unlock_request(tmp);

	WARN_ON_ONCE(test_bit(PG_TEARDOWN, &req->wb_flags));

	/* grab a ref on the request that will be waited on */
	kref_get(&req->wb_kref);

	nfs_page_group_unlock(head);
	spin_unlock(&inode->i_lock);

	/* release ref from nfs_page_find_head_request_locked */
	nfs_release_request(head);

	if (!nonblock)
		ret = nfs_wait_on_request(req);
	else
		ret = -EAGAIN;
	nfs_release_request(req);

	return ret;
}

/*
 * nfs_destroy_unlinked_subrequests - destroy recently unlinked subrequests
 *
 * @destroy_list - request list (using wb_this_page) terminated by @old_head
 * @old_head - the old head of the list
 *
 * All subrequests must be locked and removed from all lists, so at this point
 * they are only "active" in this function, and possibly in nfs_wait_on_request
 * with a reference held by some other context.
 */
static void
nfs_destroy_unlinked_subrequests(struct nfs_page *destroy_list,
				 struct nfs_page *old_head)
{
	while (destroy_list) {
		struct nfs_page *subreq = destroy_list;

		destroy_list = (subreq->wb_this_page == old_head) ?
				   NULL : subreq->wb_this_page;

		WARN_ON_ONCE(old_head != subreq->wb_head);

		/* make sure old group is not used */
		subreq->wb_head = subreq;
		subreq->wb_this_page = subreq;

		/* subreq is now totally disconnected from page group or any
		 * write / commit lists. last chance to wake any waiters */
		nfs_unlock_request(subreq);

		if (!test_bit(PG_TEARDOWN, &subreq->wb_flags)) {
			/* release ref on old head request */
			nfs_release_request(old_head);

			nfs_page_group_clear_bits(subreq);

			/* release the PG_INODE_REF reference */
			if (test_and_clear_bit(PG_INODE_REF, &subreq->wb_flags))
				nfs_release_request(subreq);
			else
				WARN_ON_ONCE(1);
		} else {
			WARN_ON_ONCE(test_bit(PG_CLEAN, &subreq->wb_flags));
			/* zombie requests have already released the last
			 * reference and were waiting on the rest of the
			 * group to complete. Since it's no longer part of a
			 * group, simply free the request */
			nfs_page_group_clear_bits(subreq);
			nfs_free_request(subreq);
		}
	}
}

/*
 * nfs_lock_and_join_requests - join all subreqs to the head req and return
 *                              a locked reference, cancelling any pending
 *                              operations for this page.
 *
 * @page - the page used to lookup the "page group" of nfs_page structures
 * @nonblock - if true, don't block waiting for request locks
 *
 * This function joins all sub requests to the head request by first
 * locking all requests in the group, cancelling any pending operations
 * and finally updating the head request to cover the whole range covered by
 * the (former) group.  All subrequests are removed from any write or commit
 * lists, unlinked from the group and destroyed.
 *
 * Returns a locked, referenced pointer to the head request - which after
 * this call is guaranteed to be the only request associated with the page.
 * Returns NULL if no requests are found for @page, or a ERR_PTR if an
 * error was encountered.
 */
static struct nfs_page *
nfs_lock_and_join_requests(struct page *page, bool nonblock)
455
{
456
	struct inode *inode = page_file_mapping(page)->host;
457 458 459
	struct nfs_page *head, *subreq;
	struct nfs_page *destroy_list = NULL;
	unsigned int total_bytes;
460 461
	int ret;

462 463 464 465 466
try_again:
	total_bytes = 0;

	WARN_ON_ONCE(destroy_list);

467
	spin_lock(&inode->i_lock);
468 469 470 471 472 473 474 475 476

	/*
	 * A reference is taken only on the head request which acts as a
	 * reference to the whole page group - the group will not be destroyed
	 * until the head reference is released.
	 */
	head = nfs_page_find_head_request_locked(NFS_I(inode), page);

	if (!head) {
477
		spin_unlock(&inode->i_lock);
478 479 480
		return NULL;
	}

481 482
	/* holding inode lock, so always make a non-blocking call to try the
	 * page group lock */
483
	ret = nfs_page_group_lock(head, true);
484 485
	if (ret < 0) {
		spin_unlock(&inode->i_lock);
486 487 488 489 490 491 492

		if (!nonblock && ret == -EAGAIN) {
			nfs_page_group_lock_wait(head);
			nfs_release_request(head);
			goto try_again;
		}

493
		nfs_release_request(head);
494
		return ERR_PTR(ret);
495
	}
496 497

	/* lock each request in the page group */
498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518
	subreq = head;
	do {
		/*
		 * Subrequests are always contiguous, non overlapping
		 * and in order. If not, it's a programming error.
		 */
		WARN_ON_ONCE(subreq->wb_offset !=
		     (head->wb_offset + total_bytes));

		/* keep track of how many bytes this group covers */
		total_bytes += subreq->wb_bytes;

		if (!nfs_lock_request(subreq)) {
			/* releases page group bit lock and
			 * inode spin lock and all references */
			ret = nfs_unroll_locks_and_wait(inode, head,
				subreq, nonblock);

			if (ret == 0)
				goto try_again;

519
			return ERR_PTR(ret);
520 521 522 523 524 525 526 527 528
		}

		subreq = subreq->wb_this_page;
	} while (subreq != head);

	/* Now that all requests are locked, make sure they aren't on any list.
	 * Commit list removal accounting is done after locks are dropped */
	subreq = head;
	do {
529
		nfs_clear_request_commit(subreq);
530 531 532 533 534 535 536 537 538 539 540 541
		subreq = subreq->wb_this_page;
	} while (subreq != head);

	/* unlink subrequests from head, destroy them later */
	if (head->wb_this_page != head) {
		/* destroy list will be terminated by head */
		destroy_list = head->wb_this_page;
		head->wb_this_page = head;

		/* change head request to cover whole range that
		 * the former page group covered */
		head->wb_bytes = total_bytes;
542
	}
543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558

	/*
	 * prepare head request to be added to new pgio descriptor
	 */
	nfs_page_group_clear_bits(head);

	/*
	 * some part of the group was still on the inode list - otherwise
	 * the group wouldn't be involved in async write.
	 * grab a reference for the head request, iff it needs one.
	 */
	if (!test_and_set_bit(PG_INODE_REF, &head->wb_flags))
		kref_get(&head->wb_kref);

	nfs_page_group_unlock(head);

559
	/* drop lock to clean uprequests on destroy list */
560
	spin_unlock(&inode->i_lock);
561 562 563 564 565 566

	nfs_destroy_unlinked_subrequests(destroy_list, head);

	/* still holds ref on head from nfs_page_find_head_request_locked
	 * and still has lock on head from lock loop */
	return head;
567 568 569 570 571 572 573
}

/*
 * Find an associated nfs write request, and prepare to flush it out
 * May return an error if the user signalled nfs_wait_on_request().
 */
static int nfs_page_async_flush(struct nfs_pageio_descriptor *pgio,
574
				struct page *page, bool nonblock)
575 576 577 578
{
	struct nfs_page *req;
	int ret = 0;

579
	req = nfs_lock_and_join_requests(page, nonblock);
580 581 582 583 584 585
	if (!req)
		goto out;
	ret = PTR_ERR(req);
	if (IS_ERR(req))
		goto out;

586 587
	nfs_set_page_writeback(page);
	WARN_ON_ONCE(test_bit(PG_CLEAN, &req->wb_flags));
588

589
	ret = 0;
590 591
	if (!nfs_pageio_add_request(pgio, req)) {
		nfs_redirty_request(req);
592
		ret = pgio->pg_error;
593
	}
594 595
out:
	return ret;
596 597
}

T
Trond Myklebust 已提交
598
static int nfs_do_writepage(struct page *page, struct writeback_control *wbc, struct nfs_pageio_descriptor *pgio)
L
Linus Torvalds 已提交
599
{
600
	struct inode *inode = page_file_mapping(page)->host;
601
	int ret;
L
Linus Torvalds 已提交
602

C
Chuck Lever 已提交
603 604 605
	nfs_inc_stats(inode, NFSIOS_VFSWRITEPAGE);
	nfs_add_stats(inode, NFSIOS_WRITEPAGES, 1);

606
	nfs_pageio_cond_complete(pgio, page_file_index(page));
607
	ret = nfs_page_async_flush(pgio, page, wbc->sync_mode == WB_SYNC_NONE);
608 609 610 611 612
	if (ret == -EAGAIN) {
		redirty_page_for_writepage(wbc, page);
		ret = 0;
	}
	return ret;
T
Trond Myklebust 已提交
613
}
614

T
Trond Myklebust 已提交
615 616 617 618 619 620 621
/*
 * Write an mmapped page to the server.
 */
static int nfs_writepage_locked(struct page *page, struct writeback_control *wbc)
{
	struct nfs_pageio_descriptor pgio;
	int err;
622

623 624
	nfs_pageio_init_write(&pgio, page->mapping->host, wb_priority(wbc),
				false, &nfs_async_write_completion_ops);
T
Trond Myklebust 已提交
625 626 627 628 629 630 631
	err = nfs_do_writepage(page, wbc, &pgio);
	nfs_pageio_complete(&pgio);
	if (err < 0)
		return err;
	if (pgio.pg_error < 0)
		return pgio.pg_error;
	return 0;
632 633 634 635
}

int nfs_writepage(struct page *page, struct writeback_control *wbc)
{
T
Trond Myklebust 已提交
636
	int ret;
637

T
Trond Myklebust 已提交
638
	ret = nfs_writepage_locked(page, wbc);
L
Linus Torvalds 已提交
639
	unlock_page(page);
T
Trond Myklebust 已提交
640 641 642 643 644 645 646 647 648 649
	return ret;
}

static int nfs_writepages_callback(struct page *page, struct writeback_control *wbc, void *data)
{
	int ret;

	ret = nfs_do_writepage(page, wbc, data);
	unlock_page(page);
	return ret;
L
Linus Torvalds 已提交
650 651 652 653 654
}

int nfs_writepages(struct address_space *mapping, struct writeback_control *wbc)
{
	struct inode *inode = mapping->host;
655
	unsigned long *bitlock = &NFS_I(inode)->flags;
656
	struct nfs_pageio_descriptor pgio;
L
Linus Torvalds 已提交
657 658
	int err;

659
	/* Stop dirtying of new pages while we sync */
660
	err = wait_on_bit_lock_action(bitlock, NFS_INO_FLUSHING,
661 662 663 664
			nfs_wait_bit_killable, TASK_KILLABLE);
	if (err)
		goto out_err;

C
Chuck Lever 已提交
665 666
	nfs_inc_stats(inode, NFSIOS_VFSWRITEPAGES);

667 668
	nfs_pageio_init_write(&pgio, inode, wb_priority(wbc), false,
				&nfs_async_write_completion_ops);
T
Trond Myklebust 已提交
669
	err = write_cache_pages(mapping, wbc, nfs_writepages_callback, &pgio);
670
	nfs_pageio_complete(&pgio);
671 672

	clear_bit_unlock(NFS_INO_FLUSHING, bitlock);
673
	smp_mb__after_atomic();
674 675
	wake_up_bit(bitlock, NFS_INO_FLUSHING);

T
Trond Myklebust 已提交
676
	if (err < 0)
677 678 679 680
		goto out_err;
	err = pgio.pg_error;
	if (err < 0)
		goto out_err;
681
	return 0;
682 683
out_err:
	return err;
L
Linus Torvalds 已提交
684 685 686 687 688
}

/*
 * Insert a write request into an inode
 */
F
Fred Isaman 已提交
689
static void nfs_inode_add_request(struct inode *inode, struct nfs_page *req)
L
Linus Torvalds 已提交
690 691
{
	struct nfs_inode *nfsi = NFS_I(inode);
692

693 694
	WARN_ON_ONCE(req->wb_this_page != req);

695
	/* Lock the request! */
696
	nfs_lock_request(req);
697 698

	spin_lock(&inode->i_lock);
699
	if (!nfsi->npages && NFS_PROTO(inode)->have_delegation(inode, FMODE_WRITE))
700
		inode->i_version++;
701 702 703 704 705 706 707 708 709
	/*
	 * Swap-space should not get truncated. Hence no need to plug the race
	 * with invalidate/truncate.
	 */
	if (likely(!PageSwapCache(req->wb_page))) {
		set_bit(PG_MAPPED, &req->wb_flags);
		SetPagePrivate(req->wb_page);
		set_page_private(req->wb_page, (unsigned long)req);
	}
L
Linus Torvalds 已提交
710
	nfsi->npages++;
711 712 713
	/* this a head request for a page group - mark it as having an
	 * extra reference so sub groups can follow suit */
	WARN_ON(test_and_set_bit(PG_INODE_REF, &req->wb_flags));
714
	kref_get(&req->wb_kref);
715
	spin_unlock(&inode->i_lock);
L
Linus Torvalds 已提交
716 717 718
}

/*
719
 * Remove a write request from an inode
L
Linus Torvalds 已提交
720 721 722
 */
static void nfs_inode_remove_request(struct nfs_page *req)
{
723
	struct inode *inode = req->wb_context->dentry->d_inode;
L
Linus Torvalds 已提交
724
	struct nfs_inode *nfsi = NFS_I(inode);
725
	struct nfs_page *head;
L
Linus Torvalds 已提交
726

727 728 729 730 731 732 733 734 735 736 737
	if (nfs_page_group_sync_on_bit(req, PG_REMOVE)) {
		head = req->wb_head;

		spin_lock(&inode->i_lock);
		if (likely(!PageSwapCache(head->wb_page))) {
			set_page_private(head->wb_page, 0);
			ClearPagePrivate(head->wb_page);
			clear_bit(PG_MAPPED, &head->wb_flags);
		}
		nfsi->npages--;
		spin_unlock(&inode->i_lock);
738
	}
739 740 741

	if (test_and_clear_bit(PG_INODE_REF, &req->wb_flags))
		nfs_release_request(req);
742 743
	else
		WARN_ON_ONCE(1);
L
Linus Torvalds 已提交
744 745
}

746
static void
F
Fred 已提交
747
nfs_mark_request_dirty(struct nfs_page *req)
748 749 750 751
{
	__set_page_dirty_nobuffers(req->wb_page);
}

B
Bryan Schumaker 已提交
752
#if IS_ENABLED(CONFIG_NFS_V3) || IS_ENABLED(CONFIG_NFS_V4)
753 754 755
/**
 * nfs_request_add_commit_list - add request to a commit list
 * @req: pointer to a struct nfs_page
F
Fred Isaman 已提交
756 757
 * @dst: commit list head
 * @cinfo: holds list lock and accounting info
758
 *
F
Fred Isaman 已提交
759
 * This sets the PG_CLEAN bit, updates the cinfo count of
760 761 762
 * number of outstanding requests requiring a commit as well as
 * the MM page stats.
 *
F
Fred Isaman 已提交
763
 * The caller must _not_ hold the cinfo->lock, but must be
764
 * holding the nfs_page lock.
L
Linus Torvalds 已提交
765
 */
766
void
F
Fred Isaman 已提交
767 768
nfs_request_add_commit_list(struct nfs_page *req, struct list_head *dst,
			    struct nfs_commit_info *cinfo)
L
Linus Torvalds 已提交
769
{
770
	set_bit(PG_CLEAN, &(req)->wb_flags);
F
Fred Isaman 已提交
771 772 773 774
	spin_lock(cinfo->lock);
	nfs_list_add_request(req, dst);
	cinfo->mds->ncommit++;
	spin_unlock(cinfo->lock);
775 776
	if (!cinfo->dreq) {
		inc_zone_page_state(req->wb_page, NR_UNSTABLE_NFS);
777
		inc_bdi_stat(page_file_mapping(req->wb_page)->backing_dev_info,
778 779 780 781
			     BDI_RECLAIMABLE);
		__mark_inode_dirty(req->wb_context->dentry->d_inode,
				   I_DIRTY_DATASYNC);
	}
L
Linus Torvalds 已提交
782
}
783 784 785 786 787
EXPORT_SYMBOL_GPL(nfs_request_add_commit_list);

/**
 * nfs_request_remove_commit_list - Remove request from a commit list
 * @req: pointer to a nfs_page
F
Fred Isaman 已提交
788
 * @cinfo: holds list lock and accounting info
789
 *
F
Fred Isaman 已提交
790
 * This clears the PG_CLEAN bit, and updates the cinfo's count of
791 792 793
 * number of outstanding requests requiring a commit
 * It does not update the MM page stats.
 *
F
Fred Isaman 已提交
794
 * The caller _must_ hold the cinfo->lock and the nfs_page lock.
795 796
 */
void
F
Fred Isaman 已提交
797 798
nfs_request_remove_commit_list(struct nfs_page *req,
			       struct nfs_commit_info *cinfo)
799 800 801 802
{
	if (!test_and_clear_bit(PG_CLEAN, &(req)->wb_flags))
		return;
	nfs_list_remove_request(req);
F
Fred Isaman 已提交
803
	cinfo->mds->ncommit--;
804 805 806
}
EXPORT_SYMBOL_GPL(nfs_request_remove_commit_list);

F
Fred Isaman 已提交
807 808 809 810 811 812
static void nfs_init_cinfo_from_inode(struct nfs_commit_info *cinfo,
				      struct inode *inode)
{
	cinfo->lock = &inode->i_lock;
	cinfo->mds = &NFS_I(inode)->commit_info;
	cinfo->ds = pnfs_get_ds_info(inode);
F
Fred Isaman 已提交
813
	cinfo->dreq = NULL;
814
	cinfo->completion_ops = &nfs_commit_completion_ops;
F
Fred Isaman 已提交
815 816 817 818 819 820
}

void nfs_init_cinfo(struct nfs_commit_info *cinfo,
		    struct inode *inode,
		    struct nfs_direct_req *dreq)
{
821 822 823 824
	if (dreq)
		nfs_init_cinfo_from_dreq(cinfo, dreq);
	else
		nfs_init_cinfo_from_inode(cinfo, inode);
F
Fred Isaman 已提交
825 826
}
EXPORT_SYMBOL_GPL(nfs_init_cinfo);
827 828 829 830

/*
 * Add a request to the inode's commit list.
 */
831
void
F
Fred Isaman 已提交
832 833
nfs_mark_request_commit(struct nfs_page *req, struct pnfs_layout_segment *lseg,
			struct nfs_commit_info *cinfo)
834
{
F
Fred Isaman 已提交
835
	if (pnfs_mark_request_commit(req, lseg, cinfo))
836
		return;
F
Fred Isaman 已提交
837
	nfs_request_add_commit_list(req, &cinfo->mds->list, cinfo);
838
}
839

F
Fred Isaman 已提交
840 841 842 843
static void
nfs_clear_page_commit(struct page *page)
{
	dec_zone_page_state(page, NR_UNSTABLE_NFS);
844
	dec_bdi_stat(page_file_mapping(page)->backing_dev_info, BDI_RECLAIMABLE);
F
Fred Isaman 已提交
845 846
}

847
/* Called holding inode (/cinfo) lock */
848
static void
849 850
nfs_clear_request_commit(struct nfs_page *req)
{
851 852
	if (test_bit(PG_CLEAN, &req->wb_flags)) {
		struct inode *inode = req->wb_context->dentry->d_inode;
F
Fred Isaman 已提交
853
		struct nfs_commit_info cinfo;
854

F
Fred Isaman 已提交
855 856 857
		nfs_init_cinfo_from_inode(&cinfo, inode);
		if (!pnfs_clear_request_commit(req, &cinfo)) {
			nfs_request_remove_commit_list(req, &cinfo);
858
		}
F
Fred Isaman 已提交
859
		nfs_clear_page_commit(req->wb_page);
860 861 862
	}
}

863
int nfs_write_need_commit(struct nfs_pgio_header *hdr)
864
{
865
	if (hdr->verf.committed == NFS_DATA_SYNC)
866
		return hdr->lseg == NULL;
867
	return hdr->verf.committed != NFS_FILE_SYNC;
868 869 870
}

#else
871 872 873 874 875 876 877 878 879 880 881
static void nfs_init_cinfo_from_inode(struct nfs_commit_info *cinfo,
				      struct inode *inode)
{
}

void nfs_init_cinfo(struct nfs_commit_info *cinfo,
		    struct inode *inode,
		    struct nfs_direct_req *dreq)
{
}

882
void
F
Fred Isaman 已提交
883 884
nfs_mark_request_commit(struct nfs_page *req, struct pnfs_layout_segment *lseg,
			struct nfs_commit_info *cinfo)
885 886 887
{
}

888
static void
889 890 891 892
nfs_clear_request_commit(struct nfs_page *req)
{
}

893
int nfs_write_need_commit(struct nfs_pgio_header *hdr)
894 895 896 897
{
	return 0;
}

898 899
#endif

900
static void nfs_write_completion(struct nfs_pgio_header *hdr)
901
{
F
Fred Isaman 已提交
902
	struct nfs_commit_info cinfo;
903 904 905 906
	unsigned long bytes = 0;

	if (test_bit(NFS_IOHDR_REDO, &hdr->flags))
		goto out;
F
Fred Isaman 已提交
907
	nfs_init_cinfo_from_inode(&cinfo, hdr->inode);
908 909 910 911 912 913 914
	while (!list_empty(&hdr->pages)) {
		struct nfs_page *req = nfs_list_entry(hdr->pages.next);

		bytes += req->wb_bytes;
		nfs_list_remove_request(req);
		if (test_bit(NFS_IOHDR_ERROR, &hdr->flags) &&
		    (hdr->good_bytes < bytes)) {
915
			nfs_set_pageerror(req->wb_page);
916 917 918
			nfs_context_set_write_error(req->wb_context, hdr->error);
			goto remove_req;
		}
919
		if (nfs_write_need_commit(hdr)) {
920
			memcpy(&req->wb_verf, &hdr->verf.verifier, sizeof(req->wb_verf));
F
Fred Isaman 已提交
921
			nfs_mark_request_commit(req, hdr->lseg, &cinfo);
922 923 924 925 926
			goto next;
		}
remove_req:
		nfs_inode_remove_request(req);
next:
927
		nfs_unlock_request(req);
928
		nfs_end_page_writeback(req);
929
		nfs_release_request(req);
930 931 932
	}
out:
	hdr->release(hdr);
933
}
L
Linus Torvalds 已提交
934

B
Bryan Schumaker 已提交
935
#if  IS_ENABLED(CONFIG_NFS_V3) || IS_ENABLED(CONFIG_NFS_V4)
936
unsigned long
F
Fred Isaman 已提交
937
nfs_reqs_to_commit(struct nfs_commit_info *cinfo)
938
{
F
Fred Isaman 已提交
939
	return cinfo->mds->ncommit;
F
Fred Isaman 已提交
940 941
}

F
Fred Isaman 已提交
942
/* cinfo->lock held by caller */
943
int
F
Fred Isaman 已提交
944 945
nfs_scan_commit_list(struct list_head *src, struct list_head *dst,
		     struct nfs_commit_info *cinfo, int max)
F
Fred Isaman 已提交
946 947 948 949 950
{
	struct nfs_page *req, *tmp;
	int ret = 0;

	list_for_each_entry_safe(req, tmp, src, wb_list) {
951 952
		if (!nfs_lock_request(req))
			continue;
953
		kref_get(&req->wb_kref);
F
Fred Isaman 已提交
954
		if (cond_resched_lock(cinfo->lock))
955
			list_safe_reset_next(req, tmp, wb_list);
F
Fred Isaman 已提交
956
		nfs_request_remove_commit_list(req, cinfo);
957 958
		nfs_list_add_request(req, dst);
		ret++;
959
		if ((ret == max) && !cinfo->dreq)
960
			break;
F
Fred Isaman 已提交
961 962
	}
	return ret;
963 964
}

L
Linus Torvalds 已提交
965 966 967
/*
 * nfs_scan_commit - Scan an inode for commit requests
 * @inode: NFS inode to scan
F
Fred Isaman 已提交
968 969
 * @dst: mds destination list
 * @cinfo: mds and ds lists of reqs ready to commit
L
Linus Torvalds 已提交
970 971 972 973
 *
 * Moves requests from the inode's 'commit' request list.
 * The requests are *not* checked to ensure that they form a contiguous set.
 */
974
int
F
Fred Isaman 已提交
975 976
nfs_scan_commit(struct inode *inode, struct list_head *dst,
		struct nfs_commit_info *cinfo)
L
Linus Torvalds 已提交
977
{
F
Fred Isaman 已提交
978
	int ret = 0;
979

F
Fred Isaman 已提交
980 981
	spin_lock(cinfo->lock);
	if (cinfo->mds->ncommit > 0) {
982
		const int max = INT_MAX;
F
Fred Isaman 已提交
983

F
Fred Isaman 已提交
984 985 986
		ret = nfs_scan_commit_list(&cinfo->mds->list, dst,
					   cinfo, max);
		ret += pnfs_scan_commit_lists(inode, cinfo, max - ret);
F
Fred Isaman 已提交
987
	}
F
Fred Isaman 已提交
988
	spin_unlock(cinfo->lock);
989
	return ret;
L
Linus Torvalds 已提交
990
}
F
Fred Isaman 已提交
991

992
#else
993
unsigned long nfs_reqs_to_commit(struct nfs_commit_info *cinfo)
994 995 996 997
{
	return 0;
}

998 999
int nfs_scan_commit(struct inode *inode, struct list_head *dst,
		    struct nfs_commit_info *cinfo)
1000 1001 1002
{
	return 0;
}
L
Linus Torvalds 已提交
1003 1004 1005
#endif

/*
1006 1007
 * Search for an existing write request, and attempt to update
 * it to reflect a new dirty region on a given page.
L
Linus Torvalds 已提交
1008
 *
1009 1010
 * If the attempt fails, then the existing request is flushed out
 * to disk.
L
Linus Torvalds 已提交
1011
 */
1012 1013 1014 1015
static struct nfs_page *nfs_try_to_update_request(struct inode *inode,
		struct page *page,
		unsigned int offset,
		unsigned int bytes)
L
Linus Torvalds 已提交
1016
{
1017 1018 1019 1020 1021 1022 1023
	struct nfs_page *req;
	unsigned int rqend;
	unsigned int end;
	int error;

	if (!PagePrivate(page))
		return NULL;
L
Linus Torvalds 已提交
1024 1025

	end = offset + bytes;
1026
	spin_lock(&inode->i_lock);
L
Linus Torvalds 已提交
1027 1028

	for (;;) {
1029
		req = nfs_page_find_head_request_locked(NFS_I(inode), page);
1030 1031 1032
		if (req == NULL)
			goto out_unlock;

1033 1034 1035 1036
		/* should be handled by nfs_flush_incompatible */
		WARN_ON_ONCE(req->wb_head != req);
		WARN_ON_ONCE(req->wb_this_page != req);

1037 1038 1039 1040 1041 1042 1043
		rqend = req->wb_offset + req->wb_bytes;
		/*
		 * Tell the caller to flush out the request if
		 * the offsets are non-contiguous.
		 * Note: nfs_flush_incompatible() will already
		 * have flushed out requests having wrong owners.
		 */
1044
		if (offset > rqend
1045 1046 1047
		    || end < req->wb_offset)
			goto out_flushme;

1048
		if (nfs_lock_request(req))
L
Linus Torvalds 已提交
1049 1050
			break;

1051
		/* The request is locked, so wait and then retry */
1052
		spin_unlock(&inode->i_lock);
1053 1054 1055 1056 1057
		error = nfs_wait_on_request(req);
		nfs_release_request(req);
		if (error != 0)
			goto out_err;
		spin_lock(&inode->i_lock);
L
Linus Torvalds 已提交
1058 1059 1060 1061 1062 1063 1064 1065 1066
	}

	/* Okay, the request matches. Update the region */
	if (offset < req->wb_offset) {
		req->wb_offset = offset;
		req->wb_pgbase = offset;
	}
	if (end > rqend)
		req->wb_bytes = end - req->wb_offset;
1067 1068 1069
	else
		req->wb_bytes = rqend - req->wb_offset;
out_unlock:
1070 1071
	if (req)
		nfs_clear_request_commit(req);
1072
	spin_unlock(&inode->i_lock);
1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091
	return req;
out_flushme:
	spin_unlock(&inode->i_lock);
	nfs_release_request(req);
	error = nfs_wb_page(inode, page);
out_err:
	return ERR_PTR(error);
}

/*
 * Try to update an existing write request, or create one if there is none.
 *
 * Note: Should always be called with the Page Lock held to prevent races
 * if we have to add a new request. Also assumes that the caller has
 * already called nfs_flush_incompatible() if necessary.
 */
static struct nfs_page * nfs_setup_write_request(struct nfs_open_context* ctx,
		struct page *page, unsigned int offset, unsigned int bytes)
{
1092
	struct inode *inode = page_file_mapping(page)->host;
1093
	struct nfs_page	*req;
L
Linus Torvalds 已提交
1094

1095 1096 1097
	req = nfs_try_to_update_request(inode, page, offset, bytes);
	if (req != NULL)
		goto out;
1098
	req = nfs_create_request(ctx, page, NULL, offset, bytes);
1099 1100
	if (IS_ERR(req))
		goto out;
F
Fred Isaman 已提交
1101
	nfs_inode_add_request(inode, req);
1102
out:
T
Trond Myklebust 已提交
1103
	return req;
L
Linus Torvalds 已提交
1104 1105
}

1106 1107 1108 1109 1110 1111 1112 1113 1114 1115
static int nfs_writepage_setup(struct nfs_open_context *ctx, struct page *page,
		unsigned int offset, unsigned int count)
{
	struct nfs_page	*req;

	req = nfs_setup_write_request(ctx, page, offset, count);
	if (IS_ERR(req))
		return PTR_ERR(req);
	/* Update file length */
	nfs_grow_file(page, offset, count);
1116
	nfs_mark_uptodate(req);
1117
	nfs_mark_request_dirty(req);
1118
	nfs_unlock_and_release_request(req);
1119 1120 1121
	return 0;
}

L
Linus Torvalds 已提交
1122 1123
int nfs_flush_incompatible(struct file *file, struct page *page)
{
1124
	struct nfs_open_context *ctx = nfs_file_open_context(file);
1125
	struct nfs_lock_context *l_ctx;
L
Linus Torvalds 已提交
1126
	struct nfs_page	*req;
T
Trond Myklebust 已提交
1127
	int do_flush, status;
L
Linus Torvalds 已提交
1128 1129 1130 1131 1132 1133 1134 1135
	/*
	 * Look for a request corresponding to this page. If there
	 * is one, and it belongs to another file, we flush it out
	 * before we try to copy anything into the page. Do this
	 * due to the lack of an ACCESS-type call in NFSv2.
	 * Also do the same if we find a request from an existing
	 * dropped page.
	 */
T
Trond Myklebust 已提交
1136
	do {
1137
		req = nfs_page_find_head_request(page);
T
Trond Myklebust 已提交
1138 1139
		if (req == NULL)
			return 0;
1140 1141
		l_ctx = req->wb_lock_context;
		do_flush = req->wb_page != page || req->wb_context != ctx;
1142 1143
		/* for now, flush if more than 1 request in page_group */
		do_flush |= req->wb_this_page != req;
1144
		if (l_ctx && ctx->dentry->d_inode->i_flock != NULL) {
1145 1146 1147
			do_flush |= l_ctx->lockowner.l_owner != current->files
				|| l_ctx->lockowner.l_pid != current->tgid;
		}
L
Linus Torvalds 已提交
1148
		nfs_release_request(req);
T
Trond Myklebust 已提交
1149 1150
		if (!do_flush)
			return 0;
1151
		status = nfs_wb_page(page_file_mapping(page)->host, page);
T
Trond Myklebust 已提交
1152 1153
	} while (status == 0);
	return status;
L
Linus Torvalds 已提交
1154 1155
}

1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182
/*
 * Avoid buffered writes when a open context credential's key would
 * expire soon.
 *
 * Returns -EACCES if the key will expire within RPC_KEY_EXPIRE_FAIL.
 *
 * Return 0 and set a credential flag which triggers the inode to flush
 * and performs  NFS_FILE_SYNC writes if the key will expired within
 * RPC_KEY_EXPIRE_TIMEO.
 */
int
nfs_key_timeout_notify(struct file *filp, struct inode *inode)
{
	struct nfs_open_context *ctx = nfs_file_open_context(filp);
	struct rpc_auth *auth = NFS_SERVER(inode)->client->cl_auth;

	return rpcauth_key_timeout_notify(auth, ctx->cred);
}

/*
 * Test if the open context credential key is marked to expire soon.
 */
bool nfs_ctx_key_to_expire(struct nfs_open_context *ctx)
{
	return rpcauth_cred_key_to_expire(ctx->cred);
}

1183 1184 1185 1186 1187
/*
 * If the page cache is marked as unsafe or invalid, then we can't rely on
 * the PageUptodate() flag. In this case, we will need to turn off
 * write optimisations that depend on the page contents being correct.
 */
1188
static bool nfs_write_pageuptodate(struct page *page, struct inode *inode)
1189
{
1190 1191
	struct nfs_inode *nfsi = NFS_I(inode);

1192 1193
	if (nfs_have_delegated_attributes(inode))
		goto out;
1194
	if (nfsi->cache_validity & NFS_INO_REVAL_PAGECACHE)
1195
		return false;
1196
	smp_rmb();
1197
	if (test_bit(NFS_INO_INVALIDATING, &nfsi->flags))
1198 1199
		return false;
out:
1200 1201
	if (nfsi->cache_validity & NFS_INO_INVALID_DATA)
		return false;
1202
	return PageUptodate(page) != 0;
1203 1204
}

1205 1206 1207 1208 1209
/* If we know the page is up to date, and we're not using byte range locks (or
 * if we have the whole file locked for writing), it may be more efficient to
 * extend the write to cover the entire page in order to avoid fragmentation
 * inefficiencies.
 *
1210 1211
 * If the file is opened for synchronous writes then we can just skip the rest
 * of the checks.
1212 1213 1214 1215 1216
 */
static int nfs_can_extend_write(struct file *file, struct page *page, struct inode *inode)
{
	if (file->f_flags & O_DSYNC)
		return 0;
1217 1218
	if (!nfs_write_pageuptodate(page, inode))
		return 0;
1219 1220
	if (NFS_PROTO(inode)->have_delegation(inode, FMODE_WRITE))
		return 1;
1221
	if (inode->i_flock == NULL || (inode->i_flock->fl_start == 0 &&
1222
			inode->i_flock->fl_end == OFFSET_MAX &&
1223
			inode->i_flock->fl_type != F_RDLCK))
1224 1225 1226 1227
		return 1;
	return 0;
}

L
Linus Torvalds 已提交
1228 1229 1230 1231 1232 1233 1234 1235 1236
/*
 * Update and possibly write a cached page of an NFS file.
 *
 * XXX: Keep an eye on generic_file_read to make sure it doesn't do bad
 * things with a page scheduled for an RPC call (e.g. invalidate it).
 */
int nfs_updatepage(struct file *file, struct page *page,
		unsigned int offset, unsigned int count)
{
1237
	struct nfs_open_context *ctx = nfs_file_open_context(file);
1238
	struct inode	*inode = page_file_mapping(page)->host;
L
Linus Torvalds 已提交
1239 1240
	int		status = 0;

C
Chuck Lever 已提交
1241 1242
	nfs_inc_stats(inode, NFSIOS_VFSUPDATEPAGE);

1243 1244
	dprintk("NFS:       nfs_updatepage(%pD2 %d@%lld)\n",
		file, count, (long long)(page_file_offset(page) + offset));
L
Linus Torvalds 已提交
1245

1246
	if (nfs_can_extend_write(file, page, inode)) {
1247
		count = max(count + offset, nfs_page_length(page));
L
Linus Torvalds 已提交
1248 1249 1250
		offset = 0;
	}

1251
	status = nfs_writepage_setup(ctx, page, offset, count);
1252 1253
	if (status < 0)
		nfs_set_pageerror(page);
1254 1255
	else
		__set_page_dirty_nobuffers(page);
L
Linus Torvalds 已提交
1256

1257
	dprintk("NFS:       nfs_updatepage returns %d (isize %lld)\n",
L
Linus Torvalds 已提交
1258 1259 1260 1261
			status, (long long)i_size_read(inode));
	return status;
}

1262
static int flush_task_priority(int how)
L
Linus Torvalds 已提交
1263 1264 1265 1266 1267 1268 1269 1270 1271 1272
{
	switch (how & (FLUSH_HIGHPRI|FLUSH_LOWPRI)) {
		case FLUSH_HIGHPRI:
			return RPC_PRIORITY_HIGH;
		case FLUSH_LOWPRI:
			return RPC_PRIORITY_LOW;
	}
	return RPC_PRIORITY_NORMAL;
}

1273 1274
static void nfs_initiate_write(struct nfs_pgio_header *hdr,
			       struct rpc_message *msg,
1275
			       struct rpc_task_setup *task_setup_data, int how)
L
Linus Torvalds 已提交
1276
{
1277
	struct inode *inode = hdr->inode;
1278
	int priority = flush_task_priority(how);
1279

1280
	task_setup_data->priority = priority;
1281
	NFS_PROTO(inode)->write_setup(hdr, msg);
1282

1283
	nfs4_state_protect_write(NFS_SERVER(inode)->nfs_client,
1284
				 &task_setup_data->rpc_client, msg, hdr);
1285 1286
}

F
Fred 已提交
1287 1288 1289 1290 1291 1292 1293
/* If a nfs_flush_* function fails, it should remove reqs from @head and
 * call this on each, which will prepare them to be retried on next
 * writeback using standard nfs.
 */
static void nfs_redirty_request(struct nfs_page *req)
{
	nfs_mark_request_dirty(req);
1294
	nfs_unlock_request(req);
1295
	nfs_end_page_writeback(req);
1296
	nfs_release_request(req);
F
Fred 已提交
1297 1298
}

1299
static void nfs_async_write_error(struct list_head *head)
1300 1301 1302 1303 1304 1305 1306 1307 1308 1309
{
	struct nfs_page	*req;

	while (!list_empty(head)) {
		req = nfs_list_entry(head->next);
		nfs_list_remove_request(req);
		nfs_redirty_request(req);
	}
}

1310 1311 1312 1313 1314
static const struct nfs_pgio_completion_ops nfs_async_write_completion_ops = {
	.error_cleanup = nfs_async_write_error,
	.completion = nfs_write_completion,
};

1315
void nfs_pageio_init_write(struct nfs_pageio_descriptor *pgio,
1316
			       struct inode *inode, int ioflags, bool force_mds,
1317
			       const struct nfs_pgio_completion_ops *compl_ops)
L
Linus Torvalds 已提交
1318
{
1319
	struct nfs_server *server = NFS_SERVER(inode);
1320
	const struct nfs_pageio_ops *pg_ops = &nfs_pgio_rw_ops;
1321 1322 1323 1324 1325

#ifdef CONFIG_NFS_V4_1
	if (server->pnfs_curr_ld && !force_mds)
		pg_ops = server->pnfs_curr_ld->pg_write_ops;
#endif
1326 1327
	nfs_pageio_init(pgio, inode, pg_ops, compl_ops, &nfs_rw_write_ops,
			server->wsize, ioflags);
1328
}
B
Bryan Schumaker 已提交
1329
EXPORT_SYMBOL_GPL(nfs_pageio_init_write);
L
Linus Torvalds 已提交
1330

1331 1332
void nfs_pageio_reset_write_mds(struct nfs_pageio_descriptor *pgio)
{
1333
	pgio->pg_ops = &nfs_pgio_rw_ops;
1334 1335
	pgio->pg_bsize = NFS_SERVER(pgio->pg_inode)->wsize;
}
1336
EXPORT_SYMBOL_GPL(nfs_pageio_reset_write_mds);
1337

L
Linus Torvalds 已提交
1338

1339 1340 1341 1342 1343 1344 1345
void nfs_commit_prepare(struct rpc_task *task, void *calldata)
{
	struct nfs_commit_data *data = calldata;

	NFS_PROTO(data->inode)->commit_rpc_prepare(task, data);
}

1346
static void nfs_writeback_release_common(struct nfs_pgio_header *hdr)
1347
{
1348
	/* do nothing! */
L
Linus Torvalds 已提交
1349 1350
}

1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361
/*
 * Special version of should_remove_suid() that ignores capabilities.
 */
static int nfs_should_remove_suid(const struct inode *inode)
{
	umode_t mode = inode->i_mode;
	int kill = 0;

	/* suid always must be killed */
	if (unlikely(mode & S_ISUID))
		kill = ATTR_KILL_SUID;
1362

1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374
	/*
	 * sgid without any exec bits is just a mandatory locking mark; leave
	 * it alone.  If some exec bits are set, it's a real sgid; kill it.
	 */
	if (unlikely((mode & S_ISGID) && (mode & S_IXGRP)))
		kill |= ATTR_KILL_SGID;

	if (unlikely(kill && S_ISREG(mode)))
		return kill;

	return 0;
}
1375

L
Linus Torvalds 已提交
1376 1377 1378
/*
 * This function is called when the WRITE call is complete.
 */
1379 1380
static int nfs_writeback_done(struct rpc_task *task,
			      struct nfs_pgio_header *hdr,
1381
			      struct inode *inode)
L
Linus Torvalds 已提交
1382
{
1383
	int status;
L
Linus Torvalds 已提交
1384

1385 1386 1387 1388 1389 1390 1391
	/*
	 * ->write_done will attempt to use post-op attributes to detect
	 * conflicting writes by other clients.  A strict interpretation
	 * of close-to-open would allow us to continue caching even if
	 * another writer had changed the file, but some applications
	 * depend on tighter cache coherency when writing.
	 */
1392
	status = NFS_PROTO(inode)->write_done(task, hdr);
1393
	if (status != 0)
1394
		return status;
1395
	nfs_add_stats(inode, NFSIOS_SERVERWRITTENBYTES, hdr->res.count);
C
Chuck Lever 已提交
1396

B
Bryan Schumaker 已提交
1397
#if IS_ENABLED(CONFIG_NFS_V3) || IS_ENABLED(CONFIG_NFS_V4)
1398 1399
	if (hdr->res.verf->committed < hdr->args.stable &&
	    task->tk_status >= 0) {
L
Linus Torvalds 已提交
1400 1401 1402 1403 1404 1405 1406 1407 1408 1409
		/* We tried a write call, but the server did not
		 * commit data to stable storage even though we
		 * requested it.
		 * Note: There is a known bug in Tru64 < 5.0 in which
		 *	 the server reports NFS_DATA_SYNC, but performs
		 *	 NFS_FILE_SYNC. We therefore implement this checking
		 *	 as a dprintk() in order to avoid filling syslog.
		 */
		static unsigned long    complain;

1410
		/* Note this will print the MDS for a DS write */
L
Linus Torvalds 已提交
1411
		if (time_before(complain, jiffies)) {
1412
			dprintk("NFS:       faulty NFS server %s:"
L
Linus Torvalds 已提交
1413
				" (committed = %d) != (stable = %d)\n",
1414
				NFS_SERVER(inode)->nfs_client->cl_hostname,
1415
				hdr->res.verf->committed, hdr->args.stable);
L
Linus Torvalds 已提交
1416 1417 1418 1419
			complain = jiffies + 300 * HZ;
		}
	}
#endif
1420 1421 1422 1423

	/* Deal with the suid/sgid bit corner case */
	if (nfs_should_remove_suid(inode))
		nfs_mark_for_revalidate(inode);
1424 1425 1426 1427 1428 1429
	return 0;
}

/*
 * This function is called when the WRITE call is complete.
 */
1430 1431
static void nfs_writeback_result(struct rpc_task *task,
				 struct nfs_pgio_header *hdr)
1432
{
1433 1434
	struct nfs_pgio_args	*argp = &hdr->args;
	struct nfs_pgio_res	*resp = &hdr->res;
1435 1436

	if (resp->count < argp->count) {
L
Linus Torvalds 已提交
1437 1438
		static unsigned long    complain;

1439
		/* This a short write! */
1440
		nfs_inc_stats(hdr->inode, NFSIOS_SHORTWRITE);
C
Chuck Lever 已提交
1441

L
Linus Torvalds 已提交
1442
		/* Has the server at least made some progress? */
1443 1444 1445 1446 1447 1448
		if (resp->count == 0) {
			if (time_before(complain, jiffies)) {
				printk(KERN_WARNING
				       "NFS: Server wrote zero bytes, expected %u.\n",
				       argp->count);
				complain = jiffies + 300 * HZ;
L
Linus Torvalds 已提交
1449
			}
1450
			nfs_set_pgio_error(hdr, -EIO, argp->offset);
1451
			task->tk_status = -EIO;
1452
			return;
L
Linus Torvalds 已提交
1453
		}
1454 1455 1456
		/* Was this an NFSv2 write or an NFSv3 stable write? */
		if (resp->verf->committed != NFS_UNSTABLE) {
			/* Resend from where the server left off */
1457
			hdr->mds_offset += resp->count;
1458 1459 1460 1461 1462 1463 1464 1465
			argp->offset += resp->count;
			argp->pgbase += resp->count;
			argp->count -= resp->count;
		} else {
			/* Resend as a stable write in order to avoid
			 * headaches in the case of a server crash.
			 */
			argp->stable = NFS_FILE_SYNC;
L
Linus Torvalds 已提交
1466
		}
1467
		rpc_restart_call_prepare(task);
L
Linus Torvalds 已提交
1468 1469 1470 1471
	}
}


B
Bryan Schumaker 已提交
1472
#if IS_ENABLED(CONFIG_NFS_V3) || IS_ENABLED(CONFIG_NFS_V4)
1473 1474
static int nfs_commit_set_lock(struct nfs_inode *nfsi, int may_wait)
{
1475 1476
	int ret;

1477 1478
	if (!test_and_set_bit(NFS_INO_COMMIT, &nfsi->flags))
		return 1;
1479 1480 1481 1482 1483 1484 1485
	if (!may_wait)
		return 0;
	ret = out_of_line_wait_on_bit_lock(&nfsi->flags,
				NFS_INO_COMMIT,
				nfs_wait_bit_killable,
				TASK_KILLABLE);
	return (ret < 0) ? ret : 1;
1486 1487
}

1488
static void nfs_commit_clear_lock(struct nfs_inode *nfsi)
1489 1490
{
	clear_bit(NFS_INO_COMMIT, &nfsi->flags);
1491
	smp_mb__after_atomic();
1492 1493 1494
	wake_up_bit(&nfsi->flags, NFS_INO_COMMIT);
}

1495
void nfs_commitdata_release(struct nfs_commit_data *data)
L
Linus Torvalds 已提交
1496
{
1497 1498
	put_nfs_open_context(data->context);
	nfs_commit_free(data);
L
Linus Torvalds 已提交
1499
}
1500
EXPORT_SYMBOL_GPL(nfs_commitdata_release);
L
Linus Torvalds 已提交
1501

1502
int nfs_initiate_commit(struct rpc_clnt *clnt, struct nfs_commit_data *data,
1503
			const struct rpc_call_ops *call_ops,
1504
			int how, int flags)
L
Linus Torvalds 已提交
1505
{
1506
	struct rpc_task *task;
1507
	int priority = flush_task_priority(how);
1508 1509 1510
	struct rpc_message msg = {
		.rpc_argp = &data->args,
		.rpc_resp = &data->res,
1511
		.rpc_cred = data->cred,
1512
	};
1513
	struct rpc_task_setup task_setup_data = {
1514
		.task = &data->task,
1515
		.rpc_client = clnt,
1516
		.rpc_message = &msg,
1517
		.callback_ops = call_ops,
1518
		.callback_data = data,
1519
		.workqueue = nfsiod_workqueue,
1520
		.flags = RPC_TASK_ASYNC | flags,
1521
		.priority = priority,
1522
	};
1523 1524 1525 1526 1527
	/* Set up the initial task struct.  */
	NFS_PROTO(data->inode)->commit_setup(data, &msg);

	dprintk("NFS: %5u initiated commit call\n", data->task.tk_pid);

1528 1529 1530
	nfs4_state_protect(NFS_SERVER(data->inode)->nfs_client,
		NFS_SP4_MACH_CRED_COMMIT, &task_setup_data.rpc_client, &msg);

1531 1532 1533 1534 1535 1536 1537 1538
	task = rpc_run_task(&task_setup_data);
	if (IS_ERR(task))
		return PTR_ERR(task);
	if (how & FLUSH_SYNC)
		rpc_wait_for_completion_task(task);
	rpc_put_task(task);
	return 0;
}
1539
EXPORT_SYMBOL_GPL(nfs_initiate_commit);
1540

1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552
static loff_t nfs_get_lwb(struct list_head *head)
{
	loff_t lwb = 0;
	struct nfs_page *req;

	list_for_each_entry(req, head, wb_list)
		if (lwb < (req_offset(req) + req->wb_bytes))
			lwb = req_offset(req) + req->wb_bytes;

	return lwb;
}

1553 1554 1555
/*
 * Set up the argument/result storage required for the RPC call.
 */
1556
void nfs_init_commit(struct nfs_commit_data *data,
1557 1558 1559
		     struct list_head *head,
		     struct pnfs_layout_segment *lseg,
		     struct nfs_commit_info *cinfo)
1560 1561
{
	struct nfs_page *first = nfs_list_entry(head->next);
1562
	struct inode *inode = first->wb_context->dentry->d_inode;
L
Linus Torvalds 已提交
1563 1564 1565 1566 1567 1568 1569

	/* Set up the RPC argument and reply structs
	 * NB: take care not to mess about with data->commit et al. */

	list_splice_init(head, &data->pages);

	data->inode	  = inode;
1570
	data->cred	  = first->wb_context->cred;
1571
	data->lseg	  = lseg; /* reference transferred */
1572 1573 1574
	/* only set lwb for pnfs commit */
	if (lseg)
		data->lwb = nfs_get_lwb(&data->pages);
1575
	data->mds_ops     = &nfs_commit_ops;
1576
	data->completion_ops = cinfo->completion_ops;
F
Fred Isaman 已提交
1577
	data->dreq	  = cinfo->dreq;
L
Linus Torvalds 已提交
1578 1579

	data->args.fh     = NFS_FH(data->inode);
1580 1581 1582
	/* Note: we always request a commit of the entire inode */
	data->args.offset = 0;
	data->args.count  = 0;
1583
	data->context     = get_nfs_open_context(first->wb_context);
L
Linus Torvalds 已提交
1584 1585
	data->res.fattr   = &data->fattr;
	data->res.verf    = &data->verf;
1586
	nfs_fattr_init(&data->fattr);
L
Linus Torvalds 已提交
1587
}
1588
EXPORT_SYMBOL_GPL(nfs_init_commit);
L
Linus Torvalds 已提交
1589

1590
void nfs_retry_commit(struct list_head *page_list,
F
Fred Isaman 已提交
1591 1592
		      struct pnfs_layout_segment *lseg,
		      struct nfs_commit_info *cinfo)
1593 1594 1595 1596 1597 1598
{
	struct nfs_page *req;

	while (!list_empty(page_list)) {
		req = nfs_list_entry(page_list->next);
		nfs_list_remove_request(req);
F
Fred Isaman 已提交
1599
		nfs_mark_request_commit(req, lseg, cinfo);
1600 1601
		if (!cinfo->dreq) {
			dec_zone_page_state(req->wb_page, NR_UNSTABLE_NFS);
1602
			dec_bdi_stat(page_file_mapping(req->wb_page)->backing_dev_info,
1603 1604
				     BDI_RECLAIMABLE);
		}
1605
		nfs_unlock_and_release_request(req);
1606 1607
	}
}
1608
EXPORT_SYMBOL_GPL(nfs_retry_commit);
1609

L
Linus Torvalds 已提交
1610 1611 1612 1613
/*
 * Commit dirty pages
 */
static int
F
Fred Isaman 已提交
1614 1615
nfs_commit_list(struct inode *inode, struct list_head *head, int how,
		struct nfs_commit_info *cinfo)
L
Linus Torvalds 已提交
1616
{
1617
	struct nfs_commit_data	*data;
L
Linus Torvalds 已提交
1618

1619
	data = nfs_commitdata_alloc();
L
Linus Torvalds 已提交
1620 1621 1622 1623 1624

	if (!data)
		goto out_bad;

	/* Set up the argument struct */
1625 1626
	nfs_init_commit(data, head, NULL, cinfo);
	atomic_inc(&cinfo->mds->rpcs_out);
1627 1628
	return nfs_initiate_commit(NFS_CLIENT(inode), data, data->mds_ops,
				   how, 0);
L
Linus Torvalds 已提交
1629
 out_bad:
F
Fred Isaman 已提交
1630
	nfs_retry_commit(head, NULL, cinfo);
1631
	cinfo->completion_ops->error_cleanup(NFS_I(inode));
L
Linus Torvalds 已提交
1632 1633 1634 1635 1636 1637
	return -ENOMEM;
}

/*
 * COMMIT call returned
 */
1638
static void nfs_commit_done(struct rpc_task *task, void *calldata)
L
Linus Torvalds 已提交
1639
{
1640
	struct nfs_commit_data	*data = calldata;
L
Linus Torvalds 已提交
1641

C
Chuck Lever 已提交
1642
        dprintk("NFS: %5u nfs_commit_done (status %d)\n",
L
Linus Torvalds 已提交
1643 1644
                                task->tk_pid, task->tk_status);

1645
	/* Call the NFS version-specific code */
1646
	NFS_PROTO(data->inode)->commit_done(task, data);
1647 1648
}

1649
static void nfs_commit_release_pages(struct nfs_commit_data *data)
1650
{
1651
	struct nfs_page	*req;
1652
	int status = data->task.tk_status;
1653
	struct nfs_commit_info cinfo;
1654

L
Linus Torvalds 已提交
1655 1656 1657
	while (!list_empty(&data->pages)) {
		req = nfs_list_entry(data->pages.next);
		nfs_list_remove_request(req);
F
Fred Isaman 已提交
1658
		nfs_clear_page_commit(req->wb_page);
L
Linus Torvalds 已提交
1659

1660
		dprintk("NFS:       commit (%s/%llu %d@%lld)",
1661
			req->wb_context->dentry->d_sb->s_id,
1662
			(unsigned long long)NFS_FILEID(req->wb_context->dentry->d_inode),
L
Linus Torvalds 已提交
1663 1664
			req->wb_bytes,
			(long long)req_offset(req));
1665 1666
		if (status < 0) {
			nfs_context_set_write_error(req->wb_context, status);
L
Linus Torvalds 已提交
1667
			nfs_inode_remove_request(req);
1668
			dprintk(", error = %d\n", status);
L
Linus Torvalds 已提交
1669 1670 1671 1672 1673
			goto next;
		}

		/* Okay, COMMIT succeeded, apparently. Check the verifier
		 * returned by the server against all stored verfs. */
1674
		if (!memcmp(&req->wb_verf, &data->verf.verifier, sizeof(req->wb_verf))) {
L
Linus Torvalds 已提交
1675 1676 1677 1678 1679 1680 1681
			/* We have a match */
			nfs_inode_remove_request(req);
			dprintk(" OK\n");
			goto next;
		}
		/* We have a mismatch. Write the page again */
		dprintk(" mismatch\n");
F
Fred 已提交
1682
		nfs_mark_request_dirty(req);
1683
		set_bit(NFS_CONTEXT_RESEND_WRITES, &req->wb_context->flags);
L
Linus Torvalds 已提交
1684
	next:
1685
		nfs_unlock_and_release_request(req);
L
Linus Torvalds 已提交
1686
	}
1687 1688 1689
	nfs_init_cinfo(&cinfo, data->inode, data->dreq);
	if (atomic_dec_and_test(&cinfo.mds->rpcs_out))
		nfs_commit_clear_lock(NFS_I(data->inode));
1690 1691 1692 1693
}

static void nfs_commit_release(void *calldata)
{
1694
	struct nfs_commit_data *data = calldata;
1695

1696
	data->completion_ops->completion(data);
1697
	nfs_commitdata_release(calldata);
L
Linus Torvalds 已提交
1698
}
1699 1700

static const struct rpc_call_ops nfs_commit_ops = {
1701
	.rpc_call_prepare = nfs_commit_prepare,
1702 1703 1704
	.rpc_call_done = nfs_commit_done,
	.rpc_release = nfs_commit_release,
};
L
Linus Torvalds 已提交
1705

1706 1707 1708 1709 1710
static const struct nfs_commit_completion_ops nfs_commit_completion_ops = {
	.completion = nfs_commit_release_pages,
	.error_cleanup = nfs_commit_clear_lock,
};

1711 1712
int nfs_generic_commit_list(struct inode *inode, struct list_head *head,
			    int how, struct nfs_commit_info *cinfo)
1713 1714 1715
{
	int status;

F
Fred Isaman 已提交
1716
	status = pnfs_commit_list(inode, head, how, cinfo);
1717
	if (status == PNFS_NOT_ATTEMPTED)
F
Fred Isaman 已提交
1718
		status = nfs_commit_list(inode, head, how, cinfo);
1719 1720 1721
	return status;
}

1722
int nfs_commit_inode(struct inode *inode, int how)
L
Linus Torvalds 已提交
1723 1724
{
	LIST_HEAD(head);
F
Fred Isaman 已提交
1725
	struct nfs_commit_info cinfo;
1726
	int may_wait = how & FLUSH_SYNC;
1727
	int res;
L
Linus Torvalds 已提交
1728

1729 1730
	res = nfs_commit_set_lock(NFS_I(inode), may_wait);
	if (res <= 0)
1731
		goto out_mark_dirty;
F
Fred Isaman 已提交
1732 1733
	nfs_init_cinfo_from_inode(&cinfo, inode);
	res = nfs_scan_commit(inode, &head, &cinfo);
L
Linus Torvalds 已提交
1734
	if (res) {
1735 1736
		int error;

F
Fred Isaman 已提交
1737
		error = nfs_generic_commit_list(inode, &head, how, &cinfo);
1738 1739
		if (error < 0)
			return error;
1740
		if (!may_wait)
1741
			goto out_mark_dirty;
1742
		error = wait_on_bit_action(&NFS_I(inode)->flags,
1743 1744 1745 1746 1747
				NFS_INO_COMMIT,
				nfs_wait_bit_killable,
				TASK_KILLABLE);
		if (error < 0)
			return error;
1748 1749
	} else
		nfs_commit_clear_lock(NFS_I(inode));
1750 1751 1752 1753 1754 1755 1756 1757
	return res;
	/* Note: If we exit without ensuring that the commit is complete,
	 * we must mark the inode as dirty. Otherwise, future calls to
	 * sync_inode() with the WB_SYNC_ALL flag set will fail to ensure
	 * that the data is on the disk.
	 */
out_mark_dirty:
	__mark_inode_dirty(inode, I_DIRTY_DATASYNC);
L
Linus Torvalds 已提交
1758 1759
	return res;
}
1760 1761 1762

static int nfs_commit_unstable_pages(struct inode *inode, struct writeback_control *wbc)
{
1763 1764 1765
	struct nfs_inode *nfsi = NFS_I(inode);
	int flags = FLUSH_SYNC;
	int ret = 0;
1766

1767
	/* no commits means nothing needs to be done */
F
Fred Isaman 已提交
1768
	if (!nfsi->commit_info.ncommit)
1769 1770
		return ret;

1771 1772 1773 1774
	if (wbc->sync_mode == WB_SYNC_NONE) {
		/* Don't commit yet if this is a non-blocking flush and there
		 * are a lot of outstanding writes for this mapping.
		 */
F
Fred Isaman 已提交
1775
		if (nfsi->commit_info.ncommit <= (nfsi->npages >> 1))
1776
			goto out_mark_dirty;
1777

1778
		/* don't wait for the COMMIT response */
1779
		flags = 0;
1780 1781
	}

1782 1783 1784 1785 1786 1787 1788 1789
	ret = nfs_commit_inode(inode, flags);
	if (ret >= 0) {
		if (wbc->sync_mode == WB_SYNC_NONE) {
			if (ret < wbc->nr_to_write)
				wbc->nr_to_write -= ret;
			else
				wbc->nr_to_write = 0;
		}
1790
		return 0;
1791 1792
	}
out_mark_dirty:
1793 1794 1795
	__mark_inode_dirty(inode, I_DIRTY_DATASYNC);
	return ret;
}
1796
#else
1797 1798 1799 1800
static int nfs_commit_unstable_pages(struct inode *inode, struct writeback_control *wbc)
{
	return 0;
}
L
Linus Torvalds 已提交
1801 1802
#endif

1803 1804
int nfs_write_inode(struct inode *inode, struct writeback_control *wbc)
{
1805 1806
	return nfs_commit_unstable_pages(inode, wbc);
}
B
Bryan Schumaker 已提交
1807
EXPORT_SYMBOL_GPL(nfs_write_inode);
1808

1809 1810 1811 1812
/*
 * flush the inode to disk.
 */
int nfs_wb_all(struct inode *inode)
T
Trond Myklebust 已提交
1813 1814
{
	struct writeback_control wbc = {
1815
		.sync_mode = WB_SYNC_ALL,
T
Trond Myklebust 已提交
1816
		.nr_to_write = LONG_MAX,
1817 1818
		.range_start = 0,
		.range_end = LLONG_MAX,
T
Trond Myklebust 已提交
1819
	};
1820 1821 1822 1823 1824
	int ret;

	trace_nfs_writeback_inode_enter(inode);

	ret = sync_inode(inode, &wbc);
T
Trond Myklebust 已提交
1825

1826 1827
	trace_nfs_writeback_inode_exit(inode, ret);
	return ret;
1828
}
B
Bryan Schumaker 已提交
1829
EXPORT_SYMBOL_GPL(nfs_wb_all);
1830

1831 1832 1833 1834 1835
int nfs_wb_page_cancel(struct inode *inode, struct page *page)
{
	struct nfs_page *req;
	int ret = 0;

1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855
	wait_on_page_writeback(page);

	/* blocking call to cancel all requests and join to a single (head)
	 * request */
	req = nfs_lock_and_join_requests(page, false);

	if (IS_ERR(req)) {
		ret = PTR_ERR(req);
	} else if (req) {
		/* all requests from this page have been cancelled by
		 * nfs_lock_and_join_requests, so just remove the head
		 * request from the inode / page_private pointer and
		 * release it */
		nfs_inode_remove_request(req);
		/*
		 * In case nfs_inode_remove_request has marked the
		 * page as being dirty
		 */
		cancel_dirty_page(page, PAGE_CACHE_SIZE);
		nfs_unlock_and_release_request(req);
1856
	}
1857

1858 1859 1860
	return ret;
}

T
Trond Myklebust 已提交
1861 1862 1863 1864
/*
 * Write back all requests on one page - we do this before reading it.
 */
int nfs_wb_page(struct inode *inode, struct page *page)
1865
{
1866
	loff_t range_start = page_file_offset(page);
1867
	loff_t range_end = range_start + (loff_t)(PAGE_CACHE_SIZE - 1);
1868 1869
	struct writeback_control wbc = {
		.sync_mode = WB_SYNC_ALL,
T
Trond Myklebust 已提交
1870
		.nr_to_write = 0,
1871 1872 1873 1874
		.range_start = range_start,
		.range_end = range_end,
	};
	int ret;
1875

1876 1877
	trace_nfs_writeback_page_enter(inode);

1878
	for (;;) {
1879
		wait_on_page_writeback(page);
1880 1881 1882 1883
		if (clear_page_dirty_for_io(page)) {
			ret = nfs_writepage_locked(page, &wbc);
			if (ret < 0)
				goto out_error;
1884
			continue;
T
Trond Myklebust 已提交
1885
		}
1886
		ret = 0;
1887 1888 1889
		if (!PagePrivate(page))
			break;
		ret = nfs_commit_inode(inode, FLUSH_SYNC);
1890
		if (ret < 0)
1891
			goto out_error;
T
Trond Myklebust 已提交
1892
	}
1893
out_error:
1894
	trace_nfs_writeback_page_exit(inode, ret);
1895
	return ret;
1896 1897
}

1898 1899
#ifdef CONFIG_MIGRATION
int nfs_migrate_page(struct address_space *mapping, struct page *newpage,
1900
		struct page *page, enum migrate_mode mode)
1901
{
1902 1903 1904 1905 1906 1907 1908 1909 1910 1911
	/*
	 * If PagePrivate is set, then the page is currently associated with
	 * an in-progress read or write request. Don't try to migrate it.
	 *
	 * FIXME: we could do this in principle, but we'll need a way to ensure
	 *        that we can safely release the inode reference while holding
	 *        the page lock.
	 */
	if (PagePrivate(page))
		return -EBUSY;
1912

1913 1914
	if (!nfs_fscache_release_page(page, GFP_KERNEL))
		return -EBUSY;
1915

1916
	return migrate_page(mapping, newpage, page, mode);
1917 1918 1919
}
#endif

D
David Howells 已提交
1920
int __init nfs_init_writepagecache(void)
L
Linus Torvalds 已提交
1921 1922
{
	nfs_wdata_cachep = kmem_cache_create("nfs_write_data",
1923
					     sizeof(struct nfs_pgio_header),
L
Linus Torvalds 已提交
1924
					     0, SLAB_HWCACHE_ALIGN,
1925
					     NULL);
L
Linus Torvalds 已提交
1926 1927 1928
	if (nfs_wdata_cachep == NULL)
		return -ENOMEM;

1929 1930
	nfs_wdata_mempool = mempool_create_slab_pool(MIN_POOL_WRITE,
						     nfs_wdata_cachep);
L
Linus Torvalds 已提交
1931
	if (nfs_wdata_mempool == NULL)
1932
		goto out_destroy_write_cache;
L
Linus Torvalds 已提交
1933

1934 1935 1936 1937 1938
	nfs_cdata_cachep = kmem_cache_create("nfs_commit_data",
					     sizeof(struct nfs_commit_data),
					     0, SLAB_HWCACHE_ALIGN,
					     NULL);
	if (nfs_cdata_cachep == NULL)
1939
		goto out_destroy_write_mempool;
1940

1941
	nfs_commit_mempool = mempool_create_slab_pool(MIN_POOL_COMMIT,
1942
						      nfs_cdata_cachep);
L
Linus Torvalds 已提交
1943
	if (nfs_commit_mempool == NULL)
1944
		goto out_destroy_commit_cache;
L
Linus Torvalds 已提交
1945

1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965
	/*
	 * NFS congestion size, scale with available memory.
	 *
	 *  64MB:    8192k
	 * 128MB:   11585k
	 * 256MB:   16384k
	 * 512MB:   23170k
	 *   1GB:   32768k
	 *   2GB:   46340k
	 *   4GB:   65536k
	 *   8GB:   92681k
	 *  16GB:  131072k
	 *
	 * This allows larger machines to have larger/more transfers.
	 * Limit the default to 256M
	 */
	nfs_congestion_kb = (16*int_sqrt(totalram_pages)) << (PAGE_SHIFT-10);
	if (nfs_congestion_kb > 256*1024)
		nfs_congestion_kb = 256*1024;

L
Linus Torvalds 已提交
1966
	return 0;
1967 1968 1969 1970 1971 1972 1973 1974

out_destroy_commit_cache:
	kmem_cache_destroy(nfs_cdata_cachep);
out_destroy_write_mempool:
	mempool_destroy(nfs_wdata_mempool);
out_destroy_write_cache:
	kmem_cache_destroy(nfs_wdata_cachep);
	return -ENOMEM;
L
Linus Torvalds 已提交
1975 1976
}

1977
void nfs_destroy_writepagecache(void)
L
Linus Torvalds 已提交
1978 1979
{
	mempool_destroy(nfs_commit_mempool);
1980
	kmem_cache_destroy(nfs_cdata_cachep);
L
Linus Torvalds 已提交
1981
	mempool_destroy(nfs_wdata_mempool);
1982
	kmem_cache_destroy(nfs_wdata_cachep);
L
Linus Torvalds 已提交
1983 1984
}

1985
static const struct nfs_rw_ops nfs_rw_write_ops = {
1986
	.rw_mode		= FMODE_WRITE,
1987 1988
	.rw_alloc_header	= nfs_writehdr_alloc,
	.rw_free_header		= nfs_writehdr_free,
1989
	.rw_release		= nfs_writeback_release_common,
1990 1991
	.rw_done		= nfs_writeback_done,
	.rw_result		= nfs_writeback_result,
1992
	.rw_initiate		= nfs_initiate_write,
1993
};