read_helper.c 34.8 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18
// SPDX-License-Identifier: GPL-2.0-or-later
/* Network filesystem high-level read support.
 *
 * Copyright (C) 2021 Red Hat, Inc. All Rights Reserved.
 * Written by David Howells (dhowells@redhat.com)
 */

#include <linux/module.h>
#include <linux/export.h>
#include <linux/fs.h>
#include <linux/mm.h>
#include <linux/pagemap.h>
#include <linux/slab.h>
#include <linux/uio.h>
#include <linux/sched/mm.h>
#include <linux/task_io_accounting_ops.h>
#include <linux/netfs.h>
#include "internal.h"
D
David Howells 已提交
19 20
#define CREATE_TRACE_POINTS
#include <trace/events/netfs.h>
21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57

MODULE_DESCRIPTION("Network fs support");
MODULE_AUTHOR("Red Hat, Inc.");
MODULE_LICENSE("GPL");

unsigned netfs_debug;
module_param_named(debug, netfs_debug, uint, S_IWUSR | S_IRUGO);
MODULE_PARM_DESC(netfs_debug, "Netfs support debugging mask");

static void netfs_rreq_work(struct work_struct *);
static void __netfs_put_subrequest(struct netfs_read_subrequest *, bool);

static void netfs_put_subrequest(struct netfs_read_subrequest *subreq,
				 bool was_async)
{
	if (refcount_dec_and_test(&subreq->usage))
		__netfs_put_subrequest(subreq, was_async);
}

static struct netfs_read_request *netfs_alloc_read_request(
	const struct netfs_read_request_ops *ops, void *netfs_priv,
	struct file *file)
{
	static atomic_t debug_ids;
	struct netfs_read_request *rreq;

	rreq = kzalloc(sizeof(struct netfs_read_request), GFP_KERNEL);
	if (rreq) {
		rreq->netfs_ops	= ops;
		rreq->netfs_priv = netfs_priv;
		rreq->inode	= file_inode(file);
		rreq->i_size	= i_size_read(rreq->inode);
		rreq->debug_id	= atomic_inc_return(&debug_ids);
		INIT_LIST_HEAD(&rreq->subrequests);
		INIT_WORK(&rreq->work, netfs_rreq_work);
		refcount_set(&rreq->usage, 1);
		__set_bit(NETFS_RREQ_IN_PROGRESS, &rreq->flags);
58 59
		if (ops->init_rreq)
			ops->init_rreq(rreq, file);
D
David Howells 已提交
60
		netfs_stat(&netfs_n_rh_rreq);
61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90
	}

	return rreq;
}

static void netfs_get_read_request(struct netfs_read_request *rreq)
{
	refcount_inc(&rreq->usage);
}

static void netfs_rreq_clear_subreqs(struct netfs_read_request *rreq,
				     bool was_async)
{
	struct netfs_read_subrequest *subreq;

	while (!list_empty(&rreq->subrequests)) {
		subreq = list_first_entry(&rreq->subrequests,
					  struct netfs_read_subrequest, rreq_link);
		list_del(&subreq->rreq_link);
		netfs_put_subrequest(subreq, was_async);
	}
}

static void netfs_free_read_request(struct work_struct *work)
{
	struct netfs_read_request *rreq =
		container_of(work, struct netfs_read_request, work);
	netfs_rreq_clear_subreqs(rreq, false);
	if (rreq->netfs_priv)
		rreq->netfs_ops->cleanup(rreq->mapping, rreq->netfs_priv);
D
David Howells 已提交
91
	trace_netfs_rreq(rreq, netfs_rreq_trace_free);
92 93
	if (rreq->cache_resources.ops)
		rreq->cache_resources.ops->end_operation(&rreq->cache_resources);
94
	kfree(rreq);
D
David Howells 已提交
95
	netfs_stat_d(&netfs_n_rh_rreq);
96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124
}

static void netfs_put_read_request(struct netfs_read_request *rreq, bool was_async)
{
	if (refcount_dec_and_test(&rreq->usage)) {
		if (was_async) {
			rreq->work.func = netfs_free_read_request;
			if (!queue_work(system_unbound_wq, &rreq->work))
				BUG();
		} else {
			netfs_free_read_request(&rreq->work);
		}
	}
}

/*
 * Allocate and partially initialise an I/O request structure.
 */
static struct netfs_read_subrequest *netfs_alloc_subrequest(
	struct netfs_read_request *rreq)
{
	struct netfs_read_subrequest *subreq;

	subreq = kzalloc(sizeof(struct netfs_read_subrequest), GFP_KERNEL);
	if (subreq) {
		INIT_LIST_HEAD(&subreq->rreq_link);
		refcount_set(&subreq->usage, 2);
		subreq->rreq = rreq;
		netfs_get_read_request(rreq);
D
David Howells 已提交
125
		netfs_stat(&netfs_n_rh_sreq);
126 127 128 129 130 131 132 133 134 135 136 137 138 139 140
	}

	return subreq;
}

static void netfs_get_read_subrequest(struct netfs_read_subrequest *subreq)
{
	refcount_inc(&subreq->usage);
}

static void __netfs_put_subrequest(struct netfs_read_subrequest *subreq,
				   bool was_async)
{
	struct netfs_read_request *rreq = subreq->rreq;

D
David Howells 已提交
141
	trace_netfs_sreq(subreq, netfs_sreq_trace_free);
142
	kfree(subreq);
D
David Howells 已提交
143
	netfs_stat_d(&netfs_n_rh_sreq);
144 145 146 147 148 149 150 151 152 153
	netfs_put_read_request(rreq, was_async);
}

/*
 * Clear the unread part of an I/O request.
 */
static void netfs_clear_unread(struct netfs_read_subrequest *subreq)
{
	struct iov_iter iter;

154
	iov_iter_xarray(&iter, READ, &subreq->rreq->mapping->i_pages,
155 156 157 158 159
			subreq->start + subreq->transferred,
			subreq->len   - subreq->transferred);
	iov_iter_zero(iov_iter_count(&iter), &iter);
}

160 161 162 163 164 165 166 167 168 169 170 171 172 173
static void netfs_cache_read_terminated(void *priv, ssize_t transferred_or_error,
					bool was_async)
{
	struct netfs_read_subrequest *subreq = priv;

	netfs_subreq_terminated(subreq, transferred_or_error, was_async);
}

/*
 * Issue a read against the cache.
 * - Eats the caller's ref on subreq.
 */
static void netfs_read_from_cache(struct netfs_read_request *rreq,
				  struct netfs_read_subrequest *subreq,
174
				  enum netfs_read_from_hole read_hole)
175 176 177 178 179 180 181 182 183
{
	struct netfs_cache_resources *cres = &rreq->cache_resources;
	struct iov_iter iter;

	netfs_stat(&netfs_n_rh_read);
	iov_iter_xarray(&iter, READ, &rreq->mapping->i_pages,
			subreq->start + subreq->transferred,
			subreq->len   - subreq->transferred);

184
	cres->ops->read(cres, subreq->start, &iter, read_hole,
185 186 187
			netfs_cache_read_terminated, subreq);
}

188 189 190 191 192 193
/*
 * Fill a subrequest region with zeroes.
 */
static void netfs_fill_with_zeroes(struct netfs_read_request *rreq,
				   struct netfs_read_subrequest *subreq)
{
D
David Howells 已提交
194
	netfs_stat(&netfs_n_rh_zero);
195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217
	__set_bit(NETFS_SREQ_CLEAR_TAIL, &subreq->flags);
	netfs_subreq_terminated(subreq, 0, false);
}

/*
 * Ask the netfs to issue a read request to the server for us.
 *
 * The netfs is expected to read from subreq->pos + subreq->transferred to
 * subreq->pos + subreq->len - 1.  It may not backtrack and write data into the
 * buffer prior to the transferred point as it might clobber dirty data
 * obtained from the cache.
 *
 * Alternatively, the netfs is allowed to indicate one of two things:
 *
 * - NETFS_SREQ_SHORT_READ: A short read - it will get called again to try and
 *   make progress.
 *
 * - NETFS_SREQ_CLEAR_TAIL: A short read - the rest of the buffer will be
 *   cleared.
 */
static void netfs_read_from_server(struct netfs_read_request *rreq,
				   struct netfs_read_subrequest *subreq)
{
D
David Howells 已提交
218
	netfs_stat(&netfs_n_rh_download);
219 220 221 222 223 224 225 226
	rreq->netfs_ops->issue_op(subreq);
}

/*
 * Release those waiting.
 */
static void netfs_rreq_completed(struct netfs_read_request *rreq, bool was_async)
{
D
David Howells 已提交
227
	trace_netfs_rreq(rreq, netfs_rreq_trace_done);
228 229 230 231
	netfs_rreq_clear_subreqs(rreq, was_async);
	netfs_put_read_request(rreq, was_async);
}

232 233
/*
 * Deal with the completion of writing the data to the cache.  We have to clear
D
David Howells 已提交
234
 * the PG_fscache bits on the folios involved and release the caller's ref.
235 236 237 238 239 240 241
 *
 * May be called in softirq mode and we inherit a ref from the caller.
 */
static void netfs_rreq_unmark_after_write(struct netfs_read_request *rreq,
					  bool was_async)
{
	struct netfs_read_subrequest *subreq;
D
David Howells 已提交
242
	struct folio *folio;
243 244 245 246 247 248 249 250
	pgoff_t unlocked = 0;
	bool have_unlocked = false;

	rcu_read_lock();

	list_for_each_entry(subreq, &rreq->subrequests, rreq_link) {
		XA_STATE(xas, &rreq->mapping->i_pages, subreq->start / PAGE_SIZE);

D
David Howells 已提交
251
		xas_for_each(&xas, folio, (subreq->start + subreq->len - 1) / PAGE_SIZE) {
252
			/* We might have multiple writes from the same huge
D
David Howells 已提交
253
			 * folio, but we mustn't unlock a folio more than once.
254
			 */
D
David Howells 已提交
255
			if (have_unlocked && folio_index(folio) <= unlocked)
256
				continue;
D
David Howells 已提交
257 258
			unlocked = folio_index(folio);
			folio_end_fscache(folio);
259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274
			have_unlocked = true;
		}
	}

	rcu_read_unlock();
	netfs_rreq_completed(rreq, was_async);
}

static void netfs_rreq_copy_terminated(void *priv, ssize_t transferred_or_error,
				       bool was_async)
{
	struct netfs_read_subrequest *subreq = priv;
	struct netfs_read_request *rreq = subreq->rreq;

	if (IS_ERR_VALUE(transferred_or_error)) {
		netfs_stat(&netfs_n_rh_write_failed);
275 276
		trace_netfs_failure(rreq, subreq, transferred_or_error,
				    netfs_fail_copy_to_cache);
277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326
	} else {
		netfs_stat(&netfs_n_rh_write_done);
	}

	trace_netfs_sreq(subreq, netfs_sreq_trace_write_term);

	/* If we decrement nr_wr_ops to 0, the ref belongs to us. */
	if (atomic_dec_and_test(&rreq->nr_wr_ops))
		netfs_rreq_unmark_after_write(rreq, was_async);

	netfs_put_subrequest(subreq, was_async);
}

/*
 * Perform any outstanding writes to the cache.  We inherit a ref from the
 * caller.
 */
static void netfs_rreq_do_write_to_cache(struct netfs_read_request *rreq)
{
	struct netfs_cache_resources *cres = &rreq->cache_resources;
	struct netfs_read_subrequest *subreq, *next, *p;
	struct iov_iter iter;
	int ret;

	trace_netfs_rreq(rreq, netfs_rreq_trace_write);

	/* We don't want terminating writes trying to wake us up whilst we're
	 * still going through the list.
	 */
	atomic_inc(&rreq->nr_wr_ops);

	list_for_each_entry_safe(subreq, p, &rreq->subrequests, rreq_link) {
		if (!test_bit(NETFS_SREQ_WRITE_TO_CACHE, &subreq->flags)) {
			list_del_init(&subreq->rreq_link);
			netfs_put_subrequest(subreq, false);
		}
	}

	list_for_each_entry(subreq, &rreq->subrequests, rreq_link) {
		/* Amalgamate adjacent writes */
		while (!list_is_last(&subreq->rreq_link, &rreq->subrequests)) {
			next = list_next_entry(subreq, rreq_link);
			if (next->start != subreq->start + subreq->len)
				break;
			subreq->len += next->len;
			list_del_init(&next->rreq_link);
			netfs_put_subrequest(next, false);
		}

		ret = cres->ops->prepare_write(cres, &subreq->start, &subreq->len,
327
					       rreq->i_size, true);
328
		if (ret < 0) {
329
			trace_netfs_failure(rreq, subreq, ret, netfs_fail_prepare_write);
330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357
			trace_netfs_sreq(subreq, netfs_sreq_trace_write_skip);
			continue;
		}

		iov_iter_xarray(&iter, WRITE, &rreq->mapping->i_pages,
				subreq->start, subreq->len);

		atomic_inc(&rreq->nr_wr_ops);
		netfs_stat(&netfs_n_rh_write);
		netfs_get_read_subrequest(subreq);
		trace_netfs_sreq(subreq, netfs_sreq_trace_write);
		cres->ops->write(cres, subreq->start, &iter,
				 netfs_rreq_copy_terminated, subreq);
	}

	/* If we decrement nr_wr_ops to 0, the usage ref belongs to us. */
	if (atomic_dec_and_test(&rreq->nr_wr_ops))
		netfs_rreq_unmark_after_write(rreq, false);
}

static void netfs_rreq_write_to_cache_work(struct work_struct *work)
{
	struct netfs_read_request *rreq =
		container_of(work, struct netfs_read_request, work);

	netfs_rreq_do_write_to_cache(rreq);
}

358
static void netfs_rreq_write_to_cache(struct netfs_read_request *rreq)
359
{
360 361 362
	rreq->work.func = netfs_rreq_write_to_cache_work;
	if (!queue_work(system_unbound_wq, &rreq->work))
		BUG();
363 364
}

365
/*
D
David Howells 已提交
366 367
 * Unlock the folios in a read operation.  We need to set PG_fscache on any
 * folios we're going to write back before we unlock them.
368 369 370 371
 */
static void netfs_rreq_unlock(struct netfs_read_request *rreq)
{
	struct netfs_read_subrequest *subreq;
D
David Howells 已提交
372
	struct folio *folio;
373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397
	unsigned int iopos, account = 0;
	pgoff_t start_page = rreq->start / PAGE_SIZE;
	pgoff_t last_page = ((rreq->start + rreq->len) / PAGE_SIZE) - 1;
	bool subreq_failed = false;

	XA_STATE(xas, &rreq->mapping->i_pages, start_page);

	if (test_bit(NETFS_RREQ_FAILED, &rreq->flags)) {
		__clear_bit(NETFS_RREQ_WRITE_TO_CACHE, &rreq->flags);
		list_for_each_entry(subreq, &rreq->subrequests, rreq_link) {
			__clear_bit(NETFS_SREQ_WRITE_TO_CACHE, &subreq->flags);
		}
	}

	/* Walk through the pagecache and the I/O request lists simultaneously.
	 * We may have a mixture of cached and uncached sections and we only
	 * really want to write out the uncached sections.  This is slightly
	 * complicated by the possibility that we might have huge pages with a
	 * mixture inside.
	 */
	subreq = list_first_entry(&rreq->subrequests,
				  struct netfs_read_subrequest, rreq_link);
	iopos = 0;
	subreq_failed = (subreq->error < 0);

D
David Howells 已提交
398 399
	trace_netfs_rreq(rreq, netfs_rreq_trace_unlock);

400
	rcu_read_lock();
D
David Howells 已提交
401 402 403
	xas_for_each(&xas, folio, last_page) {
		unsigned int pgpos = (folio_index(folio) - start_page) * PAGE_SIZE;
		unsigned int pgend = pgpos + folio_size(folio);
404 405 406 407 408 409 410 411
		bool pg_failed = false;

		for (;;) {
			if (!subreq) {
				pg_failed = true;
				break;
			}
			if (test_bit(NETFS_SREQ_WRITE_TO_CACHE, &subreq->flags))
D
David Howells 已提交
412
				folio_start_fscache(folio);
413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430
			pg_failed |= subreq_failed;
			if (pgend < iopos + subreq->len)
				break;

			account += subreq->transferred;
			iopos += subreq->len;
			if (!list_is_last(&subreq->rreq_link, &rreq->subrequests)) {
				subreq = list_next_entry(subreq, rreq_link);
				subreq_failed = (subreq->error < 0);
			} else {
				subreq = NULL;
				subreq_failed = false;
			}
			if (pgend == iopos)
				break;
		}

		if (!pg_failed) {
D
David Howells 已提交
431 432
			flush_dcache_folio(folio);
			folio_mark_uptodate(folio);
433 434
		}

D
David Howells 已提交
435 436 437
		if (!test_bit(NETFS_RREQ_DONT_UNLOCK_FOLIOS, &rreq->flags)) {
			if (folio_index(folio) == rreq->no_unlock_folio &&
			    test_bit(NETFS_RREQ_NO_UNLOCK_FOLIO, &rreq->flags))
438 439
				_debug("no unlock");
			else
D
David Howells 已提交
440
				folio_unlock(folio);
441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458
		}
	}
	rcu_read_unlock();

	task_io_account_read(account);
	if (rreq->netfs_ops->done)
		rreq->netfs_ops->done(rreq);
}

/*
 * Handle a short read.
 */
static void netfs_rreq_short_read(struct netfs_read_request *rreq,
				  struct netfs_read_subrequest *subreq)
{
	__clear_bit(NETFS_SREQ_SHORT_READ, &subreq->flags);
	__set_bit(NETFS_SREQ_SEEK_DATA_READ, &subreq->flags);

D
David Howells 已提交
459
	netfs_stat(&netfs_n_rh_short_read);
D
David Howells 已提交
460 461
	trace_netfs_sreq(subreq, netfs_sreq_trace_resubmit_short);

462 463
	netfs_get_read_subrequest(subreq);
	atomic_inc(&rreq->nr_rd_ops);
464
	if (subreq->source == NETFS_READ_FROM_CACHE)
465
		netfs_read_from_cache(rreq, subreq, NETFS_READ_HOLE_CLEAR);
466 467
	else
		netfs_read_from_server(rreq, subreq);
468 469 470 471 472 473 474 475 476 477 478 479
}

/*
 * Resubmit any short or failed operations.  Returns true if we got the rreq
 * ref back.
 */
static bool netfs_rreq_perform_resubmissions(struct netfs_read_request *rreq)
{
	struct netfs_read_subrequest *subreq;

	WARN_ON(in_interrupt());

D
David Howells 已提交
480 481
	trace_netfs_rreq(rreq, netfs_rreq_trace_resubmit);

482 483 484 485 486 487 488 489 490 491 492 493
	/* We don't want terminating submissions trying to wake us up whilst
	 * we're still going through the list.
	 */
	atomic_inc(&rreq->nr_rd_ops);

	__clear_bit(NETFS_RREQ_INCOMPLETE_IO, &rreq->flags);
	list_for_each_entry(subreq, &rreq->subrequests, rreq_link) {
		if (subreq->error) {
			if (subreq->source != NETFS_READ_FROM_CACHE)
				break;
			subreq->source = NETFS_DOWNLOAD_FROM_SERVER;
			subreq->error = 0;
D
David Howells 已提交
494
			netfs_stat(&netfs_n_rh_download_instead);
D
David Howells 已提交
495
			trace_netfs_sreq(subreq, netfs_sreq_trace_download_instead);
496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511
			netfs_get_read_subrequest(subreq);
			atomic_inc(&rreq->nr_rd_ops);
			netfs_read_from_server(rreq, subreq);
		} else if (test_bit(NETFS_SREQ_SHORT_READ, &subreq->flags)) {
			netfs_rreq_short_read(rreq, subreq);
		}
	}

	/* If we decrement nr_rd_ops to 0, the usage ref belongs to us. */
	if (atomic_dec_and_test(&rreq->nr_rd_ops))
		return true;

	wake_up_var(&rreq->nr_rd_ops);
	return false;
}

512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530
/*
 * Check to see if the data read is still valid.
 */
static void netfs_rreq_is_still_valid(struct netfs_read_request *rreq)
{
	struct netfs_read_subrequest *subreq;

	if (!rreq->netfs_ops->is_still_valid ||
	    rreq->netfs_ops->is_still_valid(rreq))
		return;

	list_for_each_entry(subreq, &rreq->subrequests, rreq_link) {
		if (subreq->source == NETFS_READ_FROM_CACHE) {
			subreq->error = -ESTALE;
			__set_bit(NETFS_RREQ_INCOMPLETE_IO, &rreq->flags);
		}
	}
}

531 532 533 534 535 536 537 538
/*
 * Assess the state of a read request and decide what to do next.
 *
 * Note that we could be in an ordinary kernel thread, on a workqueue or in
 * softirq context at this point.  We inherit a ref from the caller.
 */
static void netfs_rreq_assess(struct netfs_read_request *rreq, bool was_async)
{
D
David Howells 已提交
539 540
	trace_netfs_rreq(rreq, netfs_rreq_trace_assess);

541
again:
542 543
	netfs_rreq_is_still_valid(rreq);

544 545 546 547 548 549 550 551 552 553 554 555
	if (!test_bit(NETFS_RREQ_FAILED, &rreq->flags) &&
	    test_bit(NETFS_RREQ_INCOMPLETE_IO, &rreq->flags)) {
		if (netfs_rreq_perform_resubmissions(rreq))
			goto again;
		return;
	}

	netfs_rreq_unlock(rreq);

	clear_bit_unlock(NETFS_RREQ_IN_PROGRESS, &rreq->flags);
	wake_up_bit(&rreq->flags, NETFS_RREQ_IN_PROGRESS);

556
	if (test_bit(NETFS_RREQ_WRITE_TO_CACHE, &rreq->flags))
557
		return netfs_rreq_write_to_cache(rreq);
558

559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613
	netfs_rreq_completed(rreq, was_async);
}

static void netfs_rreq_work(struct work_struct *work)
{
	struct netfs_read_request *rreq =
		container_of(work, struct netfs_read_request, work);
	netfs_rreq_assess(rreq, false);
}

/*
 * Handle the completion of all outstanding I/O operations on a read request.
 * We inherit a ref from the caller.
 */
static void netfs_rreq_terminated(struct netfs_read_request *rreq,
				  bool was_async)
{
	if (test_bit(NETFS_RREQ_INCOMPLETE_IO, &rreq->flags) &&
	    was_async) {
		if (!queue_work(system_unbound_wq, &rreq->work))
			BUG();
	} else {
		netfs_rreq_assess(rreq, was_async);
	}
}

/**
 * netfs_subreq_terminated - Note the termination of an I/O operation.
 * @subreq: The I/O request that has terminated.
 * @transferred_or_error: The amount of data transferred or an error code.
 * @was_async: The termination was asynchronous
 *
 * This tells the read helper that a contributory I/O operation has terminated,
 * one way or another, and that it should integrate the results.
 *
 * The caller indicates in @transferred_or_error the outcome of the operation,
 * supplying a positive value to indicate the number of bytes transferred, 0 to
 * indicate a failure to transfer anything that should be retried or a negative
 * error code.  The helper will look after reissuing I/O operations as
 * appropriate and writing downloaded data to the cache.
 *
 * If @was_async is true, the caller might be running in softirq or interrupt
 * context and we can't sleep.
 */
void netfs_subreq_terminated(struct netfs_read_subrequest *subreq,
			     ssize_t transferred_or_error,
			     bool was_async)
{
	struct netfs_read_request *rreq = subreq->rreq;
	int u;

	_enter("[%u]{%llx,%lx},%zd",
	       subreq->debug_index, subreq->start, subreq->flags,
	       transferred_or_error);

D
David Howells 已提交
614 615 616 617 618 619 620 621 622 623 624
	switch (subreq->source) {
	case NETFS_READ_FROM_CACHE:
		netfs_stat(&netfs_n_rh_read_done);
		break;
	case NETFS_DOWNLOAD_FROM_SERVER:
		netfs_stat(&netfs_n_rh_download_done);
		break;
	default:
		break;
	}

625 626
	if (IS_ERR_VALUE(transferred_or_error)) {
		subreq->error = transferred_or_error;
627 628
		trace_netfs_failure(rreq, subreq, transferred_or_error,
				    netfs_fail_read);
629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648
		goto failed;
	}

	if (WARN(transferred_or_error > subreq->len - subreq->transferred,
		 "Subreq overread: R%x[%x] %zd > %zu - %zu",
		 rreq->debug_id, subreq->debug_index,
		 transferred_or_error, subreq->len, subreq->transferred))
		transferred_or_error = subreq->len - subreq->transferred;

	subreq->error = 0;
	subreq->transferred += transferred_or_error;
	if (subreq->transferred < subreq->len)
		goto incomplete;

complete:
	__clear_bit(NETFS_SREQ_NO_PROGRESS, &subreq->flags);
	if (test_bit(NETFS_SREQ_WRITE_TO_CACHE, &subreq->flags))
		set_bit(NETFS_RREQ_WRITE_TO_CACHE, &rreq->flags);

out:
D
David Howells 已提交
649 650
	trace_netfs_sreq(subreq, netfs_sreq_trace_terminated);

651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682
	/* If we decrement nr_rd_ops to 0, the ref belongs to us. */
	u = atomic_dec_return(&rreq->nr_rd_ops);
	if (u == 0)
		netfs_rreq_terminated(rreq, was_async);
	else if (u == 1)
		wake_up_var(&rreq->nr_rd_ops);

	netfs_put_subrequest(subreq, was_async);
	return;

incomplete:
	if (test_bit(NETFS_SREQ_CLEAR_TAIL, &subreq->flags)) {
		netfs_clear_unread(subreq);
		subreq->transferred = subreq->len;
		goto complete;
	}

	if (transferred_or_error == 0) {
		if (__test_and_set_bit(NETFS_SREQ_NO_PROGRESS, &subreq->flags)) {
			subreq->error = -ENODATA;
			goto failed;
		}
	} else {
		__clear_bit(NETFS_SREQ_NO_PROGRESS, &subreq->flags);
	}

	__set_bit(NETFS_SREQ_SHORT_READ, &subreq->flags);
	set_bit(NETFS_RREQ_INCOMPLETE_IO, &rreq->flags);
	goto out;

failed:
	if (subreq->source == NETFS_READ_FROM_CACHE) {
D
David Howells 已提交
683
		netfs_stat(&netfs_n_rh_read_failed);
684 685
		set_bit(NETFS_RREQ_INCOMPLETE_IO, &rreq->flags);
	} else {
D
David Howells 已提交
686
		netfs_stat(&netfs_n_rh_download_failed);
687 688 689 690 691 692 693 694 695 696 697
		set_bit(NETFS_RREQ_FAILED, &rreq->flags);
		rreq->error = subreq->error;
	}
	goto out;
}
EXPORT_SYMBOL(netfs_subreq_terminated);

static enum netfs_read_source netfs_cache_prepare_read(struct netfs_read_subrequest *subreq,
						       loff_t i_size)
{
	struct netfs_read_request *rreq = subreq->rreq;
698
	struct netfs_cache_resources *cres = &rreq->cache_resources;
699

700 701
	if (cres->ops)
		return cres->ops->prepare_read(subreq, i_size);
702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743
	if (subreq->start >= rreq->i_size)
		return NETFS_FILL_WITH_ZEROES;
	return NETFS_DOWNLOAD_FROM_SERVER;
}

/*
 * Work out what sort of subrequest the next one will be.
 */
static enum netfs_read_source
netfs_rreq_prepare_read(struct netfs_read_request *rreq,
			struct netfs_read_subrequest *subreq)
{
	enum netfs_read_source source;

	_enter("%llx-%llx,%llx", subreq->start, subreq->start + subreq->len, rreq->i_size);

	source = netfs_cache_prepare_read(subreq, rreq->i_size);
	if (source == NETFS_INVALID_READ)
		goto out;

	if (source == NETFS_DOWNLOAD_FROM_SERVER) {
		/* Call out to the netfs to let it shrink the request to fit
		 * its own I/O sizes and boundaries.  If it shinks it here, it
		 * will be called again to make simultaneous calls; if it wants
		 * to make serial calls, it can indicate a short read and then
		 * we will call it again.
		 */
		if (subreq->len > rreq->i_size - subreq->start)
			subreq->len = rreq->i_size - subreq->start;

		if (rreq->netfs_ops->clamp_length &&
		    !rreq->netfs_ops->clamp_length(subreq)) {
			source = NETFS_INVALID_READ;
			goto out;
		}
	}

	if (WARN_ON(subreq->len == 0))
		source = NETFS_INVALID_READ;

out:
	subreq->source = source;
D
David Howells 已提交
744
	trace_netfs_sreq(subreq, netfs_sreq_trace_prepare);
745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783
	return source;
}

/*
 * Slice off a piece of a read request and submit an I/O request for it.
 */
static bool netfs_rreq_submit_slice(struct netfs_read_request *rreq,
				    unsigned int *_debug_index)
{
	struct netfs_read_subrequest *subreq;
	enum netfs_read_source source;

	subreq = netfs_alloc_subrequest(rreq);
	if (!subreq)
		return false;

	subreq->debug_index	= (*_debug_index)++;
	subreq->start		= rreq->start + rreq->submitted;
	subreq->len		= rreq->len   - rreq->submitted;

	_debug("slice %llx,%zx,%zx", subreq->start, subreq->len, rreq->submitted);
	list_add_tail(&subreq->rreq_link, &rreq->subrequests);

	/* Call out to the cache to find out what it can do with the remaining
	 * subset.  It tells us in subreq->flags what it decided should be done
	 * and adjusts subreq->len down if the subset crosses a cache boundary.
	 *
	 * Then when we hand the subset, it can choose to take a subset of that
	 * (the starts must coincide), in which case, we go around the loop
	 * again and ask it to download the next piece.
	 */
	source = netfs_rreq_prepare_read(rreq, subreq);
	if (source == NETFS_INVALID_READ)
		goto subreq_failed;

	atomic_inc(&rreq->nr_rd_ops);

	rreq->submitted += subreq->len;

D
David Howells 已提交
784
	trace_netfs_sreq(subreq, netfs_sreq_trace_submit);
785 786 787 788 789 790 791
	switch (source) {
	case NETFS_FILL_WITH_ZEROES:
		netfs_fill_with_zeroes(rreq, subreq);
		break;
	case NETFS_DOWNLOAD_FROM_SERVER:
		netfs_read_from_server(rreq, subreq);
		break;
792
	case NETFS_READ_FROM_CACHE:
793
		netfs_read_from_cache(rreq, subreq, NETFS_READ_HOLE_IGNORE);
794
		break;
795 796 797 798 799 800 801 802 803 804 805 806
	default:
		BUG();
	}

	return true;

subreq_failed:
	rreq->error = subreq->error;
	netfs_put_subrequest(subreq, false);
	return false;
}

807 808 809 810 811 812 813 814 815
static void netfs_cache_expand_readahead(struct netfs_read_request *rreq,
					 loff_t *_start, size_t *_len, loff_t i_size)
{
	struct netfs_cache_resources *cres = &rreq->cache_resources;

	if (cres->ops && cres->ops->expand_readahead)
		cres->ops->expand_readahead(cres, _start, _len, i_size);
}

816 817 818
static void netfs_rreq_expand(struct netfs_read_request *rreq,
			      struct readahead_control *ractl)
{
819 820 821 822 823
	/* Give the cache a chance to change the request parameters.  The
	 * resultant request must contain the original region.
	 */
	netfs_cache_expand_readahead(rreq, &rreq->start, &rreq->len, rreq->i_size);

824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843
	/* Give the netfs a chance to change the request parameters.  The
	 * resultant request must contain the original region.
	 */
	if (rreq->netfs_ops->expand_readahead)
		rreq->netfs_ops->expand_readahead(rreq);

	/* Expand the request if the cache wants it to start earlier.  Note
	 * that the expansion may get further extended if the VM wishes to
	 * insert THPs and the preferred start and/or end wind up in the middle
	 * of THPs.
	 *
	 * If this is the case, however, the THP size should be an integer
	 * multiple of the cache granule size, so we get a whole number of
	 * granules to deal with.
	 */
	if (rreq->start  != readahead_pos(ractl) ||
	    rreq->len != readahead_length(ractl)) {
		readahead_expand(ractl, rreq->start, rreq->len);
		rreq->start  = readahead_pos(ractl);
		rreq->len = readahead_length(ractl);
D
David Howells 已提交
844 845 846

		trace_netfs_read(rreq, readahead_pos(ractl), readahead_length(ractl),
				 netfs_read_trace_expanded);
847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873
	}
}

/**
 * netfs_readahead - Helper to manage a read request
 * @ractl: The description of the readahead request
 * @ops: The network filesystem's operations for the helper to use
 * @netfs_priv: Private netfs data to be retained in the request
 *
 * Fulfil a readahead request by drawing data from the cache if possible, or
 * the netfs if not.  Space beyond the EOF is zero-filled.  Multiple I/O
 * requests from different sources will get munged together.  If necessary, the
 * readahead window can be expanded in either direction to a more convenient
 * alighment for RPC efficiency or to make storage in the cache feasible.
 *
 * The calling netfs must provide a table of operations, only one of which,
 * issue_op, is mandatory.  It may also be passed a private token, which will
 * be retained in rreq->netfs_priv and will be cleaned up by ops->cleanup().
 *
 * This is usable whether or not caching is enabled.
 */
void netfs_readahead(struct readahead_control *ractl,
		     const struct netfs_read_request_ops *ops,
		     void *netfs_priv)
{
	struct netfs_read_request *rreq;
	unsigned int debug_index = 0;
874
	int ret;
875 876 877 878 879 880 881 882 883 884 885 886 887

	_enter("%lx,%x", readahead_index(ractl), readahead_count(ractl));

	if (readahead_count(ractl) == 0)
		goto cleanup;

	rreq = netfs_alloc_read_request(ops, netfs_priv, ractl->file);
	if (!rreq)
		goto cleanup;
	rreq->mapping	= ractl->mapping;
	rreq->start	= readahead_pos(ractl);
	rreq->len	= readahead_length(ractl);

888 889 890 891 892 893
	if (ops->begin_cache_operation) {
		ret = ops->begin_cache_operation(rreq);
		if (ret == -ENOMEM || ret == -EINTR || ret == -ERESTARTSYS)
			goto cleanup_free;
	}

D
David Howells 已提交
894
	netfs_stat(&netfs_n_rh_readahead);
D
David Howells 已提交
895 896 897
	trace_netfs_read(rreq, readahead_pos(ractl), readahead_length(ractl),
			 netfs_read_trace_readahead);

898 899 900 901 902 903 904 905 906
	netfs_rreq_expand(rreq, ractl);

	atomic_set(&rreq->nr_rd_ops, 1);
	do {
		if (!netfs_rreq_submit_slice(rreq, &debug_index))
			break;

	} while (rreq->submitted < rreq->len);

D
David Howells 已提交
907
	/* Drop the refs on the folios here rather than in the cache or
908 909
	 * filesystem.  The locks will be dropped in netfs_rreq_unlock().
	 */
D
David Howells 已提交
910 911
	while (readahead_folio(ractl))
		;
912 913 914 915 916 917

	/* If we decrement nr_rd_ops to 0, the ref belongs to us. */
	if (atomic_dec_and_test(&rreq->nr_rd_ops))
		netfs_rreq_assess(rreq, false);
	return;

918 919 920
cleanup_free:
	netfs_put_read_request(rreq, false);
	return;
921 922 923 924 925 926 927 928
cleanup:
	if (netfs_priv)
		ops->cleanup(ractl->mapping, netfs_priv);
	return;
}
EXPORT_SYMBOL(netfs_readahead);

/**
D
David Howells 已提交
929
 * netfs_readpage - Helper to manage a readpage request
930
 * @file: The file to read from
D
David Howells 已提交
931
 * @folio: The folio to read
932 933 934 935 936 937 938 939 940 941 942 943 944 945
 * @ops: The network filesystem's operations for the helper to use
 * @netfs_priv: Private netfs data to be retained in the request
 *
 * Fulfil a readpage request by drawing data from the cache if possible, or the
 * netfs if not.  Space beyond the EOF is zero-filled.  Multiple I/O requests
 * from different sources will get munged together.
 *
 * The calling netfs must provide a table of operations, only one of which,
 * issue_op, is mandatory.  It may also be passed a private token, which will
 * be retained in rreq->netfs_priv and will be cleaned up by ops->cleanup().
 *
 * This is usable whether or not caching is enabled.
 */
int netfs_readpage(struct file *file,
D
David Howells 已提交
946
		   struct folio *folio,
947 948 949 950 951 952 953
		   const struct netfs_read_request_ops *ops,
		   void *netfs_priv)
{
	struct netfs_read_request *rreq;
	unsigned int debug_index = 0;
	int ret;

D
David Howells 已提交
954
	_enter("%lx", folio_index(folio));
955 956 957 958

	rreq = netfs_alloc_read_request(ops, netfs_priv, file);
	if (!rreq) {
		if (netfs_priv)
J
Jeffle Xu 已提交
959
			ops->cleanup(folio_file_mapping(folio), netfs_priv);
D
David Howells 已提交
960
		folio_unlock(folio);
961 962
		return -ENOMEM;
	}
D
David Howells 已提交
963 964 965
	rreq->mapping	= folio_file_mapping(folio);
	rreq->start	= folio_file_pos(folio);
	rreq->len	= folio_size(folio);
966

967 968 969
	if (ops->begin_cache_operation) {
		ret = ops->begin_cache_operation(rreq);
		if (ret == -ENOMEM || ret == -EINTR || ret == -ERESTARTSYS) {
D
David Howells 已提交
970
			folio_unlock(folio);
971 972 973 974
			goto out;
		}
	}

D
David Howells 已提交
975
	netfs_stat(&netfs_n_rh_readpage);
D
David Howells 已提交
976 977
	trace_netfs_read(rreq, rreq->start, rreq->len, netfs_read_trace_readpage);

978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996
	netfs_get_read_request(rreq);

	atomic_set(&rreq->nr_rd_ops, 1);
	do {
		if (!netfs_rreq_submit_slice(rreq, &debug_index))
			break;

	} while (rreq->submitted < rreq->len);

	/* Keep nr_rd_ops incremented so that the ref always belongs to us, and
	 * the service code isn't punted off to a random thread pool to
	 * process.
	 */
	do {
		wait_var_event(&rreq->nr_rd_ops, atomic_read(&rreq->nr_rd_ops) == 1);
		netfs_rreq_assess(rreq, false);
	} while (test_bit(NETFS_RREQ_IN_PROGRESS, &rreq->flags));

	ret = rreq->error;
997 998
	if (ret == 0 && rreq->submitted < rreq->len) {
		trace_netfs_failure(rreq, NULL, ret, netfs_fail_short_readpage);
999
		ret = -EIO;
1000
	}
1001
out:
1002 1003 1004 1005
	netfs_put_read_request(rreq, false);
	return ret;
}
EXPORT_SYMBOL(netfs_readpage);
D
David Howells 已提交
1006

D
David Howells 已提交
1007 1008
/*
 * Prepare a folio for writing without reading first
D
David Howells 已提交
1009
 * @folio: The folio being prepared
1010 1011 1012 1013
 * @pos: starting position for the write
 * @len: length of write
 *
 * In some cases, write_begin doesn't need to read at all:
D
David Howells 已提交
1014 1015 1016
 * - full folio write
 * - write that lies in a folio that is completely beyond EOF
 * - write that covers the folio from start to EOF or beyond it
1017 1018
 *
 * If any of these criteria are met, then zero out the unwritten parts
D
David Howells 已提交
1019
 * of the folio and return true. Otherwise, return false.
1020
 */
D
David Howells 已提交
1021
static bool netfs_skip_folio_read(struct folio *folio, loff_t pos, size_t len)
D
David Howells 已提交
1022
{
D
David Howells 已提交
1023
	struct inode *inode = folio_inode(folio);
1024
	loff_t i_size = i_size_read(inode);
D
David Howells 已提交
1025
	size_t offset = offset_in_folio(folio, pos);
1026

D
David Howells 已提交
1027 1028
	/* Full folio write */
	if (offset == 0 && len >= folio_size(folio))
1029 1030
		return true;

D
David Howells 已提交
1031
	/* pos beyond last folio in the file */
1032 1033 1034
	if (pos - offset >= i_size)
		goto zero_out;

D
David Howells 已提交
1035
	/* Write that covers from the start of the folio to EOF or beyond */
1036 1037
	if (offset == 0 && (pos + len) >= i_size)
		goto zero_out;
D
David Howells 已提交
1038

1039 1040
	return false;
zero_out:
D
David Howells 已提交
1041
	zero_user_segments(&folio->page, 0, offset, offset + len, folio_size(folio));
1042
	return true;
D
David Howells 已提交
1043 1044 1045 1046 1047 1048 1049
}

/**
 * netfs_write_begin - Helper to prepare for writing
 * @file: The file to read from
 * @mapping: The mapping to read from
 * @pos: File position at which the write will begin
D
David Howells 已提交
1050 1051 1052
 * @len: The length of the write (may extend beyond the end of the folio chosen)
 * @aop_flags: AOP_* flags
 * @_folio: Where to put the resultant folio
D
David Howells 已提交
1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067
 * @_fsdata: Place for the netfs to store a cookie
 * @ops: The network filesystem's operations for the helper to use
 * @netfs_priv: Private netfs data to be retained in the request
 *
 * Pre-read data for a write-begin request by drawing data from the cache if
 * possible, or the netfs if not.  Space beyond the EOF is zero-filled.
 * Multiple I/O requests from different sources will get munged together.  If
 * necessary, the readahead window can be expanded in either direction to a
 * more convenient alighment for RPC efficiency or to make storage in the cache
 * feasible.
 *
 * The calling netfs must provide a table of operations, only one of which,
 * issue_op, is mandatory.
 *
 * The check_write_begin() operation can be provided to check for and flush
D
David Howells 已提交
1068
 * conflicting writes once the folio is grabbed and locked.  It is passed a
D
David Howells 已提交
1069 1070
 * pointer to the fsdata cookie that gets returned to the VM to be passed to
 * write_end.  It is permitted to sleep.  It should return 0 if the request
D
David Howells 已提交
1071 1072
 * should go ahead; unlock the folio and return -EAGAIN to cause the folio to
 * be regot; or return an error.
D
David Howells 已提交
1073 1074 1075 1076
 *
 * This is usable whether or not caching is enabled.
 */
int netfs_write_begin(struct file *file, struct address_space *mapping,
D
David Howells 已提交
1077 1078
		      loff_t pos, unsigned int len, unsigned int aop_flags,
		      struct folio **_folio, void **_fsdata,
D
David Howells 已提交
1079 1080 1081 1082
		      const struct netfs_read_request_ops *ops,
		      void *netfs_priv)
{
	struct netfs_read_request *rreq;
D
David Howells 已提交
1083
	struct folio *folio;
D
David Howells 已提交
1084
	struct inode *inode = file_inode(file);
D
David Howells 已提交
1085
	unsigned int debug_index = 0, fgp_flags;
D
David Howells 已提交
1086 1087 1088 1089 1090 1091
	pgoff_t index = pos >> PAGE_SHIFT;
	int ret;

	DEFINE_READAHEAD(ractl, file, NULL, mapping, index);

retry:
D
David Howells 已提交
1092 1093 1094 1095 1096 1097
	fgp_flags = FGP_LOCK | FGP_WRITE | FGP_CREAT | FGP_STABLE;
	if (aop_flags & AOP_FLAG_NOFS)
		fgp_flags |= FGP_NOFS;
	folio = __filemap_get_folio(mapping, index, fgp_flags,
				    mapping_gfp_mask(mapping));
	if (!folio)
D
David Howells 已提交
1098 1099 1100 1101
		return -ENOMEM;

	if (ops->check_write_begin) {
		/* Allow the netfs (eg. ceph) to flush conflicts. */
D
David Howells 已提交
1102
		ret = ops->check_write_begin(file, pos, len, folio, _fsdata);
D
David Howells 已提交
1103
		if (ret < 0) {
1104
			trace_netfs_failure(NULL, NULL, ret, netfs_fail_check_write_begin);
D
David Howells 已提交
1105 1106 1107 1108 1109 1110
			if (ret == -EAGAIN)
				goto retry;
			goto error;
		}
	}

D
David Howells 已提交
1111 1112
	if (folio_test_uptodate(folio))
		goto have_folio;
D
David Howells 已提交
1113 1114 1115 1116 1117 1118

	/* If the page is beyond the EOF, we want to clear it - unless it's
	 * within the cache granule containing the EOF, in which case we need
	 * to preload the granule.
	 */
	if (!ops->is_cache_enabled(inode) &&
D
David Howells 已提交
1119
	    netfs_skip_folio_read(folio, pos, len)) {
D
David Howells 已提交
1120
		netfs_stat(&netfs_n_rh_write_zskip);
D
David Howells 已提交
1121
		goto have_folio_no_wait;
D
David Howells 已提交
1122 1123 1124 1125 1126 1127
	}

	ret = -ENOMEM;
	rreq = netfs_alloc_read_request(ops, netfs_priv, file);
	if (!rreq)
		goto error;
D
David Howells 已提交
1128 1129 1130 1131 1132
	rreq->mapping		= folio_file_mapping(folio);
	rreq->start		= folio_file_pos(folio);
	rreq->len		= folio_size(folio);
	rreq->no_unlock_folio	= folio_index(folio);
	__set_bit(NETFS_RREQ_NO_UNLOCK_FOLIO, &rreq->flags);
D
David Howells 已提交
1133 1134
	netfs_priv = NULL;

1135 1136 1137 1138 1139 1140
	if (ops->begin_cache_operation) {
		ret = ops->begin_cache_operation(rreq);
		if (ret == -ENOMEM || ret == -EINTR || ret == -ERESTARTSYS)
			goto error_put;
	}

D
David Howells 已提交
1141 1142 1143 1144 1145 1146
	netfs_stat(&netfs_n_rh_write_begin);
	trace_netfs_read(rreq, pos, len, netfs_read_trace_write_begin);

	/* Expand the request to meet caching requirements and download
	 * preferences.
	 */
D
David Howells 已提交
1147
	ractl._nr_pages = folio_nr_pages(folio);
D
David Howells 已提交
1148 1149 1150
	netfs_rreq_expand(rreq, &ractl);
	netfs_get_read_request(rreq);

D
David Howells 已提交
1151 1152 1153 1154
	/* We hold the folio locks, so we can drop the references */
	folio_get(folio);
	while (readahead_folio(&ractl))
		;
D
David Howells 已提交
1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175

	atomic_set(&rreq->nr_rd_ops, 1);
	do {
		if (!netfs_rreq_submit_slice(rreq, &debug_index))
			break;

	} while (rreq->submitted < rreq->len);

	/* Keep nr_rd_ops incremented so that the ref always belongs to us, and
	 * the service code isn't punted off to a random thread pool to
	 * process.
	 */
	for (;;) {
		wait_var_event(&rreq->nr_rd_ops, atomic_read(&rreq->nr_rd_ops) == 1);
		netfs_rreq_assess(rreq, false);
		if (!test_bit(NETFS_RREQ_IN_PROGRESS, &rreq->flags))
			break;
		cond_resched();
	}

	ret = rreq->error;
1176 1177
	if (ret == 0 && rreq->submitted < rreq->len) {
		trace_netfs_failure(rreq, NULL, ret, netfs_fail_short_write_begin);
D
David Howells 已提交
1178
		ret = -EIO;
1179
	}
D
David Howells 已提交
1180 1181 1182 1183
	netfs_put_read_request(rreq, false);
	if (ret < 0)
		goto error;

D
David Howells 已提交
1184 1185
have_folio:
	ret = folio_wait_fscache_killable(folio);
D
David Howells 已提交
1186 1187
	if (ret < 0)
		goto error;
D
David Howells 已提交
1188
have_folio_no_wait:
D
David Howells 已提交
1189
	if (netfs_priv)
J
Jeffle Xu 已提交
1190
		ops->cleanup(mapping, netfs_priv);
D
David Howells 已提交
1191
	*_folio = folio;
D
David Howells 已提交
1192 1193 1194 1195 1196 1197
	_leave(" = 0");
	return 0;

error_put:
	netfs_put_read_request(rreq, false);
error:
D
David Howells 已提交
1198 1199
	folio_unlock(folio);
	folio_put(folio);
D
David Howells 已提交
1200
	if (netfs_priv)
J
Jeffle Xu 已提交
1201
		ops->cleanup(mapping, netfs_priv);
D
David Howells 已提交
1202 1203 1204 1205
	_leave(" = %d", ret);
	return ret;
}
EXPORT_SYMBOL(netfs_write_begin);