read_helper.c 34.8 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18
// SPDX-License-Identifier: GPL-2.0-or-later
/* Network filesystem high-level read support.
 *
 * Copyright (C) 2021 Red Hat, Inc. All Rights Reserved.
 * Written by David Howells (dhowells@redhat.com)
 */

#include <linux/module.h>
#include <linux/export.h>
#include <linux/fs.h>
#include <linux/mm.h>
#include <linux/pagemap.h>
#include <linux/slab.h>
#include <linux/uio.h>
#include <linux/sched/mm.h>
#include <linux/task_io_accounting_ops.h>
#include <linux/netfs.h>
#include "internal.h"
D
David Howells 已提交
19 20
#define CREATE_TRACE_POINTS
#include <trace/events/netfs.h>
21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58

MODULE_DESCRIPTION("Network fs support");
MODULE_AUTHOR("Red Hat, Inc.");
MODULE_LICENSE("GPL");

unsigned netfs_debug;
module_param_named(debug, netfs_debug, uint, S_IWUSR | S_IRUGO);
MODULE_PARM_DESC(netfs_debug, "Netfs support debugging mask");

static void netfs_rreq_work(struct work_struct *);
static void __netfs_put_subrequest(struct netfs_read_subrequest *, bool);

static void netfs_put_subrequest(struct netfs_read_subrequest *subreq,
				 bool was_async)
{
	if (refcount_dec_and_test(&subreq->usage))
		__netfs_put_subrequest(subreq, was_async);
}

static struct netfs_read_request *netfs_alloc_read_request(
	const struct netfs_read_request_ops *ops, void *netfs_priv,
	struct file *file)
{
	static atomic_t debug_ids;
	struct netfs_read_request *rreq;

	rreq = kzalloc(sizeof(struct netfs_read_request), GFP_KERNEL);
	if (rreq) {
		rreq->netfs_ops	= ops;
		rreq->netfs_priv = netfs_priv;
		rreq->inode	= file_inode(file);
		rreq->i_size	= i_size_read(rreq->inode);
		rreq->debug_id	= atomic_inc_return(&debug_ids);
		INIT_LIST_HEAD(&rreq->subrequests);
		INIT_WORK(&rreq->work, netfs_rreq_work);
		refcount_set(&rreq->usage, 1);
		__set_bit(NETFS_RREQ_IN_PROGRESS, &rreq->flags);
		ops->init_rreq(rreq, file);
D
David Howells 已提交
59
		netfs_stat(&netfs_n_rh_rreq);
60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89
	}

	return rreq;
}

static void netfs_get_read_request(struct netfs_read_request *rreq)
{
	refcount_inc(&rreq->usage);
}

static void netfs_rreq_clear_subreqs(struct netfs_read_request *rreq,
				     bool was_async)
{
	struct netfs_read_subrequest *subreq;

	while (!list_empty(&rreq->subrequests)) {
		subreq = list_first_entry(&rreq->subrequests,
					  struct netfs_read_subrequest, rreq_link);
		list_del(&subreq->rreq_link);
		netfs_put_subrequest(subreq, was_async);
	}
}

static void netfs_free_read_request(struct work_struct *work)
{
	struct netfs_read_request *rreq =
		container_of(work, struct netfs_read_request, work);
	netfs_rreq_clear_subreqs(rreq, false);
	if (rreq->netfs_priv)
		rreq->netfs_ops->cleanup(rreq->mapping, rreq->netfs_priv);
D
David Howells 已提交
90
	trace_netfs_rreq(rreq, netfs_rreq_trace_free);
91 92
	if (rreq->cache_resources.ops)
		rreq->cache_resources.ops->end_operation(&rreq->cache_resources);
93
	kfree(rreq);
D
David Howells 已提交
94
	netfs_stat_d(&netfs_n_rh_rreq);
95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123
}

static void netfs_put_read_request(struct netfs_read_request *rreq, bool was_async)
{
	if (refcount_dec_and_test(&rreq->usage)) {
		if (was_async) {
			rreq->work.func = netfs_free_read_request;
			if (!queue_work(system_unbound_wq, &rreq->work))
				BUG();
		} else {
			netfs_free_read_request(&rreq->work);
		}
	}
}

/*
 * Allocate and partially initialise an I/O request structure.
 */
static struct netfs_read_subrequest *netfs_alloc_subrequest(
	struct netfs_read_request *rreq)
{
	struct netfs_read_subrequest *subreq;

	subreq = kzalloc(sizeof(struct netfs_read_subrequest), GFP_KERNEL);
	if (subreq) {
		INIT_LIST_HEAD(&subreq->rreq_link);
		refcount_set(&subreq->usage, 2);
		subreq->rreq = rreq;
		netfs_get_read_request(rreq);
D
David Howells 已提交
124
		netfs_stat(&netfs_n_rh_sreq);
125 126 127 128 129 130 131 132 133 134 135 136 137 138 139
	}

	return subreq;
}

static void netfs_get_read_subrequest(struct netfs_read_subrequest *subreq)
{
	refcount_inc(&subreq->usage);
}

static void __netfs_put_subrequest(struct netfs_read_subrequest *subreq,
				   bool was_async)
{
	struct netfs_read_request *rreq = subreq->rreq;

D
David Howells 已提交
140
	trace_netfs_sreq(subreq, netfs_sreq_trace_free);
141
	kfree(subreq);
D
David Howells 已提交
142
	netfs_stat_d(&netfs_n_rh_sreq);
143 144 145 146 147 148 149 150 151 152
	netfs_put_read_request(rreq, was_async);
}

/*
 * Clear the unread part of an I/O request.
 */
static void netfs_clear_unread(struct netfs_read_subrequest *subreq)
{
	struct iov_iter iter;

153
	iov_iter_xarray(&iter, READ, &subreq->rreq->mapping->i_pages,
154 155 156 157 158
			subreq->start + subreq->transferred,
			subreq->len   - subreq->transferred);
	iov_iter_zero(iov_iter_count(&iter), &iter);
}

159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186
static void netfs_cache_read_terminated(void *priv, ssize_t transferred_or_error,
					bool was_async)
{
	struct netfs_read_subrequest *subreq = priv;

	netfs_subreq_terminated(subreq, transferred_or_error, was_async);
}

/*
 * Issue a read against the cache.
 * - Eats the caller's ref on subreq.
 */
static void netfs_read_from_cache(struct netfs_read_request *rreq,
				  struct netfs_read_subrequest *subreq,
				  bool seek_data)
{
	struct netfs_cache_resources *cres = &rreq->cache_resources;
	struct iov_iter iter;

	netfs_stat(&netfs_n_rh_read);
	iov_iter_xarray(&iter, READ, &rreq->mapping->i_pages,
			subreq->start + subreq->transferred,
			subreq->len   - subreq->transferred);

	cres->ops->read(cres, subreq->start, &iter, seek_data,
			netfs_cache_read_terminated, subreq);
}

187 188 189 190 191 192
/*
 * Fill a subrequest region with zeroes.
 */
static void netfs_fill_with_zeroes(struct netfs_read_request *rreq,
				   struct netfs_read_subrequest *subreq)
{
D
David Howells 已提交
193
	netfs_stat(&netfs_n_rh_zero);
194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216
	__set_bit(NETFS_SREQ_CLEAR_TAIL, &subreq->flags);
	netfs_subreq_terminated(subreq, 0, false);
}

/*
 * Ask the netfs to issue a read request to the server for us.
 *
 * The netfs is expected to read from subreq->pos + subreq->transferred to
 * subreq->pos + subreq->len - 1.  It may not backtrack and write data into the
 * buffer prior to the transferred point as it might clobber dirty data
 * obtained from the cache.
 *
 * Alternatively, the netfs is allowed to indicate one of two things:
 *
 * - NETFS_SREQ_SHORT_READ: A short read - it will get called again to try and
 *   make progress.
 *
 * - NETFS_SREQ_CLEAR_TAIL: A short read - the rest of the buffer will be
 *   cleared.
 */
static void netfs_read_from_server(struct netfs_read_request *rreq,
				   struct netfs_read_subrequest *subreq)
{
D
David Howells 已提交
217
	netfs_stat(&netfs_n_rh_download);
218 219 220 221 222 223 224 225
	rreq->netfs_ops->issue_op(subreq);
}

/*
 * Release those waiting.
 */
static void netfs_rreq_completed(struct netfs_read_request *rreq, bool was_async)
{
D
David Howells 已提交
226
	trace_netfs_rreq(rreq, netfs_rreq_trace_done);
227 228 229 230
	netfs_rreq_clear_subreqs(rreq, was_async);
	netfs_put_read_request(rreq, was_async);
}

231 232
/*
 * Deal with the completion of writing the data to the cache.  We have to clear
D
David Howells 已提交
233
 * the PG_fscache bits on the folios involved and release the caller's ref.
234 235 236 237 238 239 240
 *
 * May be called in softirq mode and we inherit a ref from the caller.
 */
static void netfs_rreq_unmark_after_write(struct netfs_read_request *rreq,
					  bool was_async)
{
	struct netfs_read_subrequest *subreq;
D
David Howells 已提交
241
	struct folio *folio;
242 243 244 245 246 247 248 249
	pgoff_t unlocked = 0;
	bool have_unlocked = false;

	rcu_read_lock();

	list_for_each_entry(subreq, &rreq->subrequests, rreq_link) {
		XA_STATE(xas, &rreq->mapping->i_pages, subreq->start / PAGE_SIZE);

D
David Howells 已提交
250
		xas_for_each(&xas, folio, (subreq->start + subreq->len - 1) / PAGE_SIZE) {
251
			/* We might have multiple writes from the same huge
D
David Howells 已提交
252
			 * folio, but we mustn't unlock a folio more than once.
253
			 */
D
David Howells 已提交
254
			if (have_unlocked && folio_index(folio) <= unlocked)
255
				continue;
D
David Howells 已提交
256 257
			unlocked = folio_index(folio);
			folio_end_fscache(folio);
258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273
			have_unlocked = true;
		}
	}

	rcu_read_unlock();
	netfs_rreq_completed(rreq, was_async);
}

static void netfs_rreq_copy_terminated(void *priv, ssize_t transferred_or_error,
				       bool was_async)
{
	struct netfs_read_subrequest *subreq = priv;
	struct netfs_read_request *rreq = subreq->rreq;

	if (IS_ERR_VALUE(transferred_or_error)) {
		netfs_stat(&netfs_n_rh_write_failed);
274 275
		trace_netfs_failure(rreq, subreq, transferred_or_error,
				    netfs_fail_copy_to_cache);
276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327
	} else {
		netfs_stat(&netfs_n_rh_write_done);
	}

	trace_netfs_sreq(subreq, netfs_sreq_trace_write_term);

	/* If we decrement nr_wr_ops to 0, the ref belongs to us. */
	if (atomic_dec_and_test(&rreq->nr_wr_ops))
		netfs_rreq_unmark_after_write(rreq, was_async);

	netfs_put_subrequest(subreq, was_async);
}

/*
 * Perform any outstanding writes to the cache.  We inherit a ref from the
 * caller.
 */
static void netfs_rreq_do_write_to_cache(struct netfs_read_request *rreq)
{
	struct netfs_cache_resources *cres = &rreq->cache_resources;
	struct netfs_read_subrequest *subreq, *next, *p;
	struct iov_iter iter;
	int ret;

	trace_netfs_rreq(rreq, netfs_rreq_trace_write);

	/* We don't want terminating writes trying to wake us up whilst we're
	 * still going through the list.
	 */
	atomic_inc(&rreq->nr_wr_ops);

	list_for_each_entry_safe(subreq, p, &rreq->subrequests, rreq_link) {
		if (!test_bit(NETFS_SREQ_WRITE_TO_CACHE, &subreq->flags)) {
			list_del_init(&subreq->rreq_link);
			netfs_put_subrequest(subreq, false);
		}
	}

	list_for_each_entry(subreq, &rreq->subrequests, rreq_link) {
		/* Amalgamate adjacent writes */
		while (!list_is_last(&subreq->rreq_link, &rreq->subrequests)) {
			next = list_next_entry(subreq, rreq_link);
			if (next->start != subreq->start + subreq->len)
				break;
			subreq->len += next->len;
			list_del_init(&next->rreq_link);
			netfs_put_subrequest(next, false);
		}

		ret = cres->ops->prepare_write(cres, &subreq->start, &subreq->len,
					       rreq->i_size);
		if (ret < 0) {
328
			trace_netfs_failure(rreq, subreq, ret, netfs_fail_prepare_write);
329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368
			trace_netfs_sreq(subreq, netfs_sreq_trace_write_skip);
			continue;
		}

		iov_iter_xarray(&iter, WRITE, &rreq->mapping->i_pages,
				subreq->start, subreq->len);

		atomic_inc(&rreq->nr_wr_ops);
		netfs_stat(&netfs_n_rh_write);
		netfs_get_read_subrequest(subreq);
		trace_netfs_sreq(subreq, netfs_sreq_trace_write);
		cres->ops->write(cres, subreq->start, &iter,
				 netfs_rreq_copy_terminated, subreq);
	}

	/* If we decrement nr_wr_ops to 0, the usage ref belongs to us. */
	if (atomic_dec_and_test(&rreq->nr_wr_ops))
		netfs_rreq_unmark_after_write(rreq, false);
}

static void netfs_rreq_write_to_cache_work(struct work_struct *work)
{
	struct netfs_read_request *rreq =
		container_of(work, struct netfs_read_request, work);

	netfs_rreq_do_write_to_cache(rreq);
}

static void netfs_rreq_write_to_cache(struct netfs_read_request *rreq,
				      bool was_async)
{
	if (was_async) {
		rreq->work.func = netfs_rreq_write_to_cache_work;
		if (!queue_work(system_unbound_wq, &rreq->work))
			BUG();
	} else {
		netfs_rreq_do_write_to_cache(rreq);
	}
}

369
/*
D
David Howells 已提交
370 371
 * Unlock the folios in a read operation.  We need to set PG_fscache on any
 * folios we're going to write back before we unlock them.
372 373 374 375
 */
static void netfs_rreq_unlock(struct netfs_read_request *rreq)
{
	struct netfs_read_subrequest *subreq;
D
David Howells 已提交
376
	struct folio *folio;
377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401
	unsigned int iopos, account = 0;
	pgoff_t start_page = rreq->start / PAGE_SIZE;
	pgoff_t last_page = ((rreq->start + rreq->len) / PAGE_SIZE) - 1;
	bool subreq_failed = false;

	XA_STATE(xas, &rreq->mapping->i_pages, start_page);

	if (test_bit(NETFS_RREQ_FAILED, &rreq->flags)) {
		__clear_bit(NETFS_RREQ_WRITE_TO_CACHE, &rreq->flags);
		list_for_each_entry(subreq, &rreq->subrequests, rreq_link) {
			__clear_bit(NETFS_SREQ_WRITE_TO_CACHE, &subreq->flags);
		}
	}

	/* Walk through the pagecache and the I/O request lists simultaneously.
	 * We may have a mixture of cached and uncached sections and we only
	 * really want to write out the uncached sections.  This is slightly
	 * complicated by the possibility that we might have huge pages with a
	 * mixture inside.
	 */
	subreq = list_first_entry(&rreq->subrequests,
				  struct netfs_read_subrequest, rreq_link);
	iopos = 0;
	subreq_failed = (subreq->error < 0);

D
David Howells 已提交
402 403
	trace_netfs_rreq(rreq, netfs_rreq_trace_unlock);

404
	rcu_read_lock();
D
David Howells 已提交
405 406 407
	xas_for_each(&xas, folio, last_page) {
		unsigned int pgpos = (folio_index(folio) - start_page) * PAGE_SIZE;
		unsigned int pgend = pgpos + folio_size(folio);
408 409 410 411 412 413 414 415
		bool pg_failed = false;

		for (;;) {
			if (!subreq) {
				pg_failed = true;
				break;
			}
			if (test_bit(NETFS_SREQ_WRITE_TO_CACHE, &subreq->flags))
D
David Howells 已提交
416
				folio_start_fscache(folio);
417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434
			pg_failed |= subreq_failed;
			if (pgend < iopos + subreq->len)
				break;

			account += subreq->transferred;
			iopos += subreq->len;
			if (!list_is_last(&subreq->rreq_link, &rreq->subrequests)) {
				subreq = list_next_entry(subreq, rreq_link);
				subreq_failed = (subreq->error < 0);
			} else {
				subreq = NULL;
				subreq_failed = false;
			}
			if (pgend == iopos)
				break;
		}

		if (!pg_failed) {
D
David Howells 已提交
435 436
			flush_dcache_folio(folio);
			folio_mark_uptodate(folio);
437 438
		}

D
David Howells 已提交
439 440 441
		if (!test_bit(NETFS_RREQ_DONT_UNLOCK_FOLIOS, &rreq->flags)) {
			if (folio_index(folio) == rreq->no_unlock_folio &&
			    test_bit(NETFS_RREQ_NO_UNLOCK_FOLIO, &rreq->flags))
442 443
				_debug("no unlock");
			else
D
David Howells 已提交
444
				folio_unlock(folio);
445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462
		}
	}
	rcu_read_unlock();

	task_io_account_read(account);
	if (rreq->netfs_ops->done)
		rreq->netfs_ops->done(rreq);
}

/*
 * Handle a short read.
 */
static void netfs_rreq_short_read(struct netfs_read_request *rreq,
				  struct netfs_read_subrequest *subreq)
{
	__clear_bit(NETFS_SREQ_SHORT_READ, &subreq->flags);
	__set_bit(NETFS_SREQ_SEEK_DATA_READ, &subreq->flags);

D
David Howells 已提交
463
	netfs_stat(&netfs_n_rh_short_read);
D
David Howells 已提交
464 465
	trace_netfs_sreq(subreq, netfs_sreq_trace_resubmit_short);

466 467
	netfs_get_read_subrequest(subreq);
	atomic_inc(&rreq->nr_rd_ops);
468 469 470 471
	if (subreq->source == NETFS_READ_FROM_CACHE)
		netfs_read_from_cache(rreq, subreq, true);
	else
		netfs_read_from_server(rreq, subreq);
472 473 474 475 476 477 478 479 480 481 482 483
}

/*
 * Resubmit any short or failed operations.  Returns true if we got the rreq
 * ref back.
 */
static bool netfs_rreq_perform_resubmissions(struct netfs_read_request *rreq)
{
	struct netfs_read_subrequest *subreq;

	WARN_ON(in_interrupt());

D
David Howells 已提交
484 485
	trace_netfs_rreq(rreq, netfs_rreq_trace_resubmit);

486 487 488 489 490 491 492 493 494 495 496 497
	/* We don't want terminating submissions trying to wake us up whilst
	 * we're still going through the list.
	 */
	atomic_inc(&rreq->nr_rd_ops);

	__clear_bit(NETFS_RREQ_INCOMPLETE_IO, &rreq->flags);
	list_for_each_entry(subreq, &rreq->subrequests, rreq_link) {
		if (subreq->error) {
			if (subreq->source != NETFS_READ_FROM_CACHE)
				break;
			subreq->source = NETFS_DOWNLOAD_FROM_SERVER;
			subreq->error = 0;
D
David Howells 已提交
498
			netfs_stat(&netfs_n_rh_download_instead);
D
David Howells 已提交
499
			trace_netfs_sreq(subreq, netfs_sreq_trace_download_instead);
500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515
			netfs_get_read_subrequest(subreq);
			atomic_inc(&rreq->nr_rd_ops);
			netfs_read_from_server(rreq, subreq);
		} else if (test_bit(NETFS_SREQ_SHORT_READ, &subreq->flags)) {
			netfs_rreq_short_read(rreq, subreq);
		}
	}

	/* If we decrement nr_rd_ops to 0, the usage ref belongs to us. */
	if (atomic_dec_and_test(&rreq->nr_rd_ops))
		return true;

	wake_up_var(&rreq->nr_rd_ops);
	return false;
}

516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534
/*
 * Check to see if the data read is still valid.
 */
static void netfs_rreq_is_still_valid(struct netfs_read_request *rreq)
{
	struct netfs_read_subrequest *subreq;

	if (!rreq->netfs_ops->is_still_valid ||
	    rreq->netfs_ops->is_still_valid(rreq))
		return;

	list_for_each_entry(subreq, &rreq->subrequests, rreq_link) {
		if (subreq->source == NETFS_READ_FROM_CACHE) {
			subreq->error = -ESTALE;
			__set_bit(NETFS_RREQ_INCOMPLETE_IO, &rreq->flags);
		}
	}
}

535 536 537 538 539 540 541 542
/*
 * Assess the state of a read request and decide what to do next.
 *
 * Note that we could be in an ordinary kernel thread, on a workqueue or in
 * softirq context at this point.  We inherit a ref from the caller.
 */
static void netfs_rreq_assess(struct netfs_read_request *rreq, bool was_async)
{
D
David Howells 已提交
543 544
	trace_netfs_rreq(rreq, netfs_rreq_trace_assess);

545
again:
546 547
	netfs_rreq_is_still_valid(rreq);

548 549 550 551 552 553 554 555 556 557 558 559
	if (!test_bit(NETFS_RREQ_FAILED, &rreq->flags) &&
	    test_bit(NETFS_RREQ_INCOMPLETE_IO, &rreq->flags)) {
		if (netfs_rreq_perform_resubmissions(rreq))
			goto again;
		return;
	}

	netfs_rreq_unlock(rreq);

	clear_bit_unlock(NETFS_RREQ_IN_PROGRESS, &rreq->flags);
	wake_up_bit(&rreq->flags, NETFS_RREQ_IN_PROGRESS);

560 561 562
	if (test_bit(NETFS_RREQ_WRITE_TO_CACHE, &rreq->flags))
		return netfs_rreq_write_to_cache(rreq, was_async);

563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617
	netfs_rreq_completed(rreq, was_async);
}

static void netfs_rreq_work(struct work_struct *work)
{
	struct netfs_read_request *rreq =
		container_of(work, struct netfs_read_request, work);
	netfs_rreq_assess(rreq, false);
}

/*
 * Handle the completion of all outstanding I/O operations on a read request.
 * We inherit a ref from the caller.
 */
static void netfs_rreq_terminated(struct netfs_read_request *rreq,
				  bool was_async)
{
	if (test_bit(NETFS_RREQ_INCOMPLETE_IO, &rreq->flags) &&
	    was_async) {
		if (!queue_work(system_unbound_wq, &rreq->work))
			BUG();
	} else {
		netfs_rreq_assess(rreq, was_async);
	}
}

/**
 * netfs_subreq_terminated - Note the termination of an I/O operation.
 * @subreq: The I/O request that has terminated.
 * @transferred_or_error: The amount of data transferred or an error code.
 * @was_async: The termination was asynchronous
 *
 * This tells the read helper that a contributory I/O operation has terminated,
 * one way or another, and that it should integrate the results.
 *
 * The caller indicates in @transferred_or_error the outcome of the operation,
 * supplying a positive value to indicate the number of bytes transferred, 0 to
 * indicate a failure to transfer anything that should be retried or a negative
 * error code.  The helper will look after reissuing I/O operations as
 * appropriate and writing downloaded data to the cache.
 *
 * If @was_async is true, the caller might be running in softirq or interrupt
 * context and we can't sleep.
 */
void netfs_subreq_terminated(struct netfs_read_subrequest *subreq,
			     ssize_t transferred_or_error,
			     bool was_async)
{
	struct netfs_read_request *rreq = subreq->rreq;
	int u;

	_enter("[%u]{%llx,%lx},%zd",
	       subreq->debug_index, subreq->start, subreq->flags,
	       transferred_or_error);

D
David Howells 已提交
618 619 620 621 622 623 624 625 626 627 628
	switch (subreq->source) {
	case NETFS_READ_FROM_CACHE:
		netfs_stat(&netfs_n_rh_read_done);
		break;
	case NETFS_DOWNLOAD_FROM_SERVER:
		netfs_stat(&netfs_n_rh_download_done);
		break;
	default:
		break;
	}

629 630
	if (IS_ERR_VALUE(transferred_or_error)) {
		subreq->error = transferred_or_error;
631 632
		trace_netfs_failure(rreq, subreq, transferred_or_error,
				    netfs_fail_read);
633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652
		goto failed;
	}

	if (WARN(transferred_or_error > subreq->len - subreq->transferred,
		 "Subreq overread: R%x[%x] %zd > %zu - %zu",
		 rreq->debug_id, subreq->debug_index,
		 transferred_or_error, subreq->len, subreq->transferred))
		transferred_or_error = subreq->len - subreq->transferred;

	subreq->error = 0;
	subreq->transferred += transferred_or_error;
	if (subreq->transferred < subreq->len)
		goto incomplete;

complete:
	__clear_bit(NETFS_SREQ_NO_PROGRESS, &subreq->flags);
	if (test_bit(NETFS_SREQ_WRITE_TO_CACHE, &subreq->flags))
		set_bit(NETFS_RREQ_WRITE_TO_CACHE, &rreq->flags);

out:
D
David Howells 已提交
653 654
	trace_netfs_sreq(subreq, netfs_sreq_trace_terminated);

655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686
	/* If we decrement nr_rd_ops to 0, the ref belongs to us. */
	u = atomic_dec_return(&rreq->nr_rd_ops);
	if (u == 0)
		netfs_rreq_terminated(rreq, was_async);
	else if (u == 1)
		wake_up_var(&rreq->nr_rd_ops);

	netfs_put_subrequest(subreq, was_async);
	return;

incomplete:
	if (test_bit(NETFS_SREQ_CLEAR_TAIL, &subreq->flags)) {
		netfs_clear_unread(subreq);
		subreq->transferred = subreq->len;
		goto complete;
	}

	if (transferred_or_error == 0) {
		if (__test_and_set_bit(NETFS_SREQ_NO_PROGRESS, &subreq->flags)) {
			subreq->error = -ENODATA;
			goto failed;
		}
	} else {
		__clear_bit(NETFS_SREQ_NO_PROGRESS, &subreq->flags);
	}

	__set_bit(NETFS_SREQ_SHORT_READ, &subreq->flags);
	set_bit(NETFS_RREQ_INCOMPLETE_IO, &rreq->flags);
	goto out;

failed:
	if (subreq->source == NETFS_READ_FROM_CACHE) {
D
David Howells 已提交
687
		netfs_stat(&netfs_n_rh_read_failed);
688 689
		set_bit(NETFS_RREQ_INCOMPLETE_IO, &rreq->flags);
	} else {
D
David Howells 已提交
690
		netfs_stat(&netfs_n_rh_download_failed);
691 692 693 694 695 696 697 698 699 700 701
		set_bit(NETFS_RREQ_FAILED, &rreq->flags);
		rreq->error = subreq->error;
	}
	goto out;
}
EXPORT_SYMBOL(netfs_subreq_terminated);

static enum netfs_read_source netfs_cache_prepare_read(struct netfs_read_subrequest *subreq,
						       loff_t i_size)
{
	struct netfs_read_request *rreq = subreq->rreq;
702
	struct netfs_cache_resources *cres = &rreq->cache_resources;
703

704 705
	if (cres->ops)
		return cres->ops->prepare_read(subreq, i_size);
706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747
	if (subreq->start >= rreq->i_size)
		return NETFS_FILL_WITH_ZEROES;
	return NETFS_DOWNLOAD_FROM_SERVER;
}

/*
 * Work out what sort of subrequest the next one will be.
 */
static enum netfs_read_source
netfs_rreq_prepare_read(struct netfs_read_request *rreq,
			struct netfs_read_subrequest *subreq)
{
	enum netfs_read_source source;

	_enter("%llx-%llx,%llx", subreq->start, subreq->start + subreq->len, rreq->i_size);

	source = netfs_cache_prepare_read(subreq, rreq->i_size);
	if (source == NETFS_INVALID_READ)
		goto out;

	if (source == NETFS_DOWNLOAD_FROM_SERVER) {
		/* Call out to the netfs to let it shrink the request to fit
		 * its own I/O sizes and boundaries.  If it shinks it here, it
		 * will be called again to make simultaneous calls; if it wants
		 * to make serial calls, it can indicate a short read and then
		 * we will call it again.
		 */
		if (subreq->len > rreq->i_size - subreq->start)
			subreq->len = rreq->i_size - subreq->start;

		if (rreq->netfs_ops->clamp_length &&
		    !rreq->netfs_ops->clamp_length(subreq)) {
			source = NETFS_INVALID_READ;
			goto out;
		}
	}

	if (WARN_ON(subreq->len == 0))
		source = NETFS_INVALID_READ;

out:
	subreq->source = source;
D
David Howells 已提交
748
	trace_netfs_sreq(subreq, netfs_sreq_trace_prepare);
749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787
	return source;
}

/*
 * Slice off a piece of a read request and submit an I/O request for it.
 */
static bool netfs_rreq_submit_slice(struct netfs_read_request *rreq,
				    unsigned int *_debug_index)
{
	struct netfs_read_subrequest *subreq;
	enum netfs_read_source source;

	subreq = netfs_alloc_subrequest(rreq);
	if (!subreq)
		return false;

	subreq->debug_index	= (*_debug_index)++;
	subreq->start		= rreq->start + rreq->submitted;
	subreq->len		= rreq->len   - rreq->submitted;

	_debug("slice %llx,%zx,%zx", subreq->start, subreq->len, rreq->submitted);
	list_add_tail(&subreq->rreq_link, &rreq->subrequests);

	/* Call out to the cache to find out what it can do with the remaining
	 * subset.  It tells us in subreq->flags what it decided should be done
	 * and adjusts subreq->len down if the subset crosses a cache boundary.
	 *
	 * Then when we hand the subset, it can choose to take a subset of that
	 * (the starts must coincide), in which case, we go around the loop
	 * again and ask it to download the next piece.
	 */
	source = netfs_rreq_prepare_read(rreq, subreq);
	if (source == NETFS_INVALID_READ)
		goto subreq_failed;

	atomic_inc(&rreq->nr_rd_ops);

	rreq->submitted += subreq->len;

D
David Howells 已提交
788
	trace_netfs_sreq(subreq, netfs_sreq_trace_submit);
789 790 791 792 793 794 795
	switch (source) {
	case NETFS_FILL_WITH_ZEROES:
		netfs_fill_with_zeroes(rreq, subreq);
		break;
	case NETFS_DOWNLOAD_FROM_SERVER:
		netfs_read_from_server(rreq, subreq);
		break;
796 797 798
	case NETFS_READ_FROM_CACHE:
		netfs_read_from_cache(rreq, subreq, false);
		break;
799 800 801 802 803 804 805 806 807 808 809 810
	default:
		BUG();
	}

	return true;

subreq_failed:
	rreq->error = subreq->error;
	netfs_put_subrequest(subreq, false);
	return false;
}

811 812 813 814 815 816 817 818 819
static void netfs_cache_expand_readahead(struct netfs_read_request *rreq,
					 loff_t *_start, size_t *_len, loff_t i_size)
{
	struct netfs_cache_resources *cres = &rreq->cache_resources;

	if (cres->ops && cres->ops->expand_readahead)
		cres->ops->expand_readahead(cres, _start, _len, i_size);
}

820 821 822
static void netfs_rreq_expand(struct netfs_read_request *rreq,
			      struct readahead_control *ractl)
{
823 824 825 826 827
	/* Give the cache a chance to change the request parameters.  The
	 * resultant request must contain the original region.
	 */
	netfs_cache_expand_readahead(rreq, &rreq->start, &rreq->len, rreq->i_size);

828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847
	/* Give the netfs a chance to change the request parameters.  The
	 * resultant request must contain the original region.
	 */
	if (rreq->netfs_ops->expand_readahead)
		rreq->netfs_ops->expand_readahead(rreq);

	/* Expand the request if the cache wants it to start earlier.  Note
	 * that the expansion may get further extended if the VM wishes to
	 * insert THPs and the preferred start and/or end wind up in the middle
	 * of THPs.
	 *
	 * If this is the case, however, the THP size should be an integer
	 * multiple of the cache granule size, so we get a whole number of
	 * granules to deal with.
	 */
	if (rreq->start  != readahead_pos(ractl) ||
	    rreq->len != readahead_length(ractl)) {
		readahead_expand(ractl, rreq->start, rreq->len);
		rreq->start  = readahead_pos(ractl);
		rreq->len = readahead_length(ractl);
D
David Howells 已提交
848 849 850

		trace_netfs_read(rreq, readahead_pos(ractl), readahead_length(ractl),
				 netfs_read_trace_expanded);
851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877
	}
}

/**
 * netfs_readahead - Helper to manage a read request
 * @ractl: The description of the readahead request
 * @ops: The network filesystem's operations for the helper to use
 * @netfs_priv: Private netfs data to be retained in the request
 *
 * Fulfil a readahead request by drawing data from the cache if possible, or
 * the netfs if not.  Space beyond the EOF is zero-filled.  Multiple I/O
 * requests from different sources will get munged together.  If necessary, the
 * readahead window can be expanded in either direction to a more convenient
 * alighment for RPC efficiency or to make storage in the cache feasible.
 *
 * The calling netfs must provide a table of operations, only one of which,
 * issue_op, is mandatory.  It may also be passed a private token, which will
 * be retained in rreq->netfs_priv and will be cleaned up by ops->cleanup().
 *
 * This is usable whether or not caching is enabled.
 */
void netfs_readahead(struct readahead_control *ractl,
		     const struct netfs_read_request_ops *ops,
		     void *netfs_priv)
{
	struct netfs_read_request *rreq;
	unsigned int debug_index = 0;
878
	int ret;
879 880 881 882 883 884 885 886 887 888 889 890 891

	_enter("%lx,%x", readahead_index(ractl), readahead_count(ractl));

	if (readahead_count(ractl) == 0)
		goto cleanup;

	rreq = netfs_alloc_read_request(ops, netfs_priv, ractl->file);
	if (!rreq)
		goto cleanup;
	rreq->mapping	= ractl->mapping;
	rreq->start	= readahead_pos(ractl);
	rreq->len	= readahead_length(ractl);

892 893 894 895 896 897
	if (ops->begin_cache_operation) {
		ret = ops->begin_cache_operation(rreq);
		if (ret == -ENOMEM || ret == -EINTR || ret == -ERESTARTSYS)
			goto cleanup_free;
	}

D
David Howells 已提交
898
	netfs_stat(&netfs_n_rh_readahead);
D
David Howells 已提交
899 900 901
	trace_netfs_read(rreq, readahead_pos(ractl), readahead_length(ractl),
			 netfs_read_trace_readahead);

902 903 904 905 906 907 908 909 910
	netfs_rreq_expand(rreq, ractl);

	atomic_set(&rreq->nr_rd_ops, 1);
	do {
		if (!netfs_rreq_submit_slice(rreq, &debug_index))
			break;

	} while (rreq->submitted < rreq->len);

D
David Howells 已提交
911
	/* Drop the refs on the folios here rather than in the cache or
912 913
	 * filesystem.  The locks will be dropped in netfs_rreq_unlock().
	 */
D
David Howells 已提交
914 915
	while (readahead_folio(ractl))
		;
916 917 918 919 920 921

	/* If we decrement nr_rd_ops to 0, the ref belongs to us. */
	if (atomic_dec_and_test(&rreq->nr_rd_ops))
		netfs_rreq_assess(rreq, false);
	return;

922 923 924
cleanup_free:
	netfs_put_read_request(rreq, false);
	return;
925 926 927 928 929 930 931 932
cleanup:
	if (netfs_priv)
		ops->cleanup(ractl->mapping, netfs_priv);
	return;
}
EXPORT_SYMBOL(netfs_readahead);

/**
D
David Howells 已提交
933
 * netfs_readpage - Helper to manage a readpage request
934
 * @file: The file to read from
D
David Howells 已提交
935
 * @folio: The folio to read
936 937 938 939 940 941 942 943 944 945 946 947 948 949
 * @ops: The network filesystem's operations for the helper to use
 * @netfs_priv: Private netfs data to be retained in the request
 *
 * Fulfil a readpage request by drawing data from the cache if possible, or the
 * netfs if not.  Space beyond the EOF is zero-filled.  Multiple I/O requests
 * from different sources will get munged together.
 *
 * The calling netfs must provide a table of operations, only one of which,
 * issue_op, is mandatory.  It may also be passed a private token, which will
 * be retained in rreq->netfs_priv and will be cleaned up by ops->cleanup().
 *
 * This is usable whether or not caching is enabled.
 */
int netfs_readpage(struct file *file,
D
David Howells 已提交
950
		   struct folio *folio,
951 952 953 954 955 956 957
		   const struct netfs_read_request_ops *ops,
		   void *netfs_priv)
{
	struct netfs_read_request *rreq;
	unsigned int debug_index = 0;
	int ret;

D
David Howells 已提交
958
	_enter("%lx", folio_index(folio));
959 960 961 962

	rreq = netfs_alloc_read_request(ops, netfs_priv, file);
	if (!rreq) {
		if (netfs_priv)
D
David Howells 已提交
963 964
			ops->cleanup(netfs_priv, folio_file_mapping(folio));
		folio_unlock(folio);
965 966
		return -ENOMEM;
	}
D
David Howells 已提交
967 968 969
	rreq->mapping	= folio_file_mapping(folio);
	rreq->start	= folio_file_pos(folio);
	rreq->len	= folio_size(folio);
970

971 972 973
	if (ops->begin_cache_operation) {
		ret = ops->begin_cache_operation(rreq);
		if (ret == -ENOMEM || ret == -EINTR || ret == -ERESTARTSYS) {
D
David Howells 已提交
974
			folio_unlock(folio);
975 976 977 978
			goto out;
		}
	}

D
David Howells 已提交
979
	netfs_stat(&netfs_n_rh_readpage);
D
David Howells 已提交
980 981
	trace_netfs_read(rreq, rreq->start, rreq->len, netfs_read_trace_readpage);

982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000
	netfs_get_read_request(rreq);

	atomic_set(&rreq->nr_rd_ops, 1);
	do {
		if (!netfs_rreq_submit_slice(rreq, &debug_index))
			break;

	} while (rreq->submitted < rreq->len);

	/* Keep nr_rd_ops incremented so that the ref always belongs to us, and
	 * the service code isn't punted off to a random thread pool to
	 * process.
	 */
	do {
		wait_var_event(&rreq->nr_rd_ops, atomic_read(&rreq->nr_rd_ops) == 1);
		netfs_rreq_assess(rreq, false);
	} while (test_bit(NETFS_RREQ_IN_PROGRESS, &rreq->flags));

	ret = rreq->error;
1001 1002
	if (ret == 0 && rreq->submitted < rreq->len) {
		trace_netfs_failure(rreq, NULL, ret, netfs_fail_short_readpage);
1003
		ret = -EIO;
1004
	}
1005
out:
1006 1007 1008 1009
	netfs_put_read_request(rreq, false);
	return ret;
}
EXPORT_SYMBOL(netfs_readpage);
D
David Howells 已提交
1010

1011
/**
D
David Howells 已提交
1012 1013
 * netfs_skip_folio_read - prep a folio for writing without reading first
 * @folio: The folio being prepared
1014 1015 1016 1017
 * @pos: starting position for the write
 * @len: length of write
 *
 * In some cases, write_begin doesn't need to read at all:
D
David Howells 已提交
1018 1019 1020
 * - full folio write
 * - write that lies in a folio that is completely beyond EOF
 * - write that covers the folio from start to EOF or beyond it
1021 1022
 *
 * If any of these criteria are met, then zero out the unwritten parts
D
David Howells 已提交
1023
 * of the folio and return true. Otherwise, return false.
1024
 */
D
David Howells 已提交
1025
static bool netfs_skip_folio_read(struct folio *folio, loff_t pos, size_t len)
D
David Howells 已提交
1026
{
D
David Howells 已提交
1027
	struct inode *inode = folio_inode(folio);
1028
	loff_t i_size = i_size_read(inode);
D
David Howells 已提交
1029
	size_t offset = offset_in_folio(folio, pos);
1030

D
David Howells 已提交
1031 1032
	/* Full folio write */
	if (offset == 0 && len >= folio_size(folio))
1033 1034
		return true;

D
David Howells 已提交
1035
	/* pos beyond last folio in the file */
1036 1037 1038
	if (pos - offset >= i_size)
		goto zero_out;

D
David Howells 已提交
1039
	/* Write that covers from the start of the folio to EOF or beyond */
1040 1041
	if (offset == 0 && (pos + len) >= i_size)
		goto zero_out;
D
David Howells 已提交
1042

1043 1044
	return false;
zero_out:
D
David Howells 已提交
1045
	zero_user_segments(&folio->page, 0, offset, offset + len, folio_size(folio));
1046
	return true;
D
David Howells 已提交
1047 1048 1049 1050 1051 1052 1053
}

/**
 * netfs_write_begin - Helper to prepare for writing
 * @file: The file to read from
 * @mapping: The mapping to read from
 * @pos: File position at which the write will begin
D
David Howells 已提交
1054 1055 1056
 * @len: The length of the write (may extend beyond the end of the folio chosen)
 * @aop_flags: AOP_* flags
 * @_folio: Where to put the resultant folio
D
David Howells 已提交
1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071
 * @_fsdata: Place for the netfs to store a cookie
 * @ops: The network filesystem's operations for the helper to use
 * @netfs_priv: Private netfs data to be retained in the request
 *
 * Pre-read data for a write-begin request by drawing data from the cache if
 * possible, or the netfs if not.  Space beyond the EOF is zero-filled.
 * Multiple I/O requests from different sources will get munged together.  If
 * necessary, the readahead window can be expanded in either direction to a
 * more convenient alighment for RPC efficiency or to make storage in the cache
 * feasible.
 *
 * The calling netfs must provide a table of operations, only one of which,
 * issue_op, is mandatory.
 *
 * The check_write_begin() operation can be provided to check for and flush
D
David Howells 已提交
1072
 * conflicting writes once the folio is grabbed and locked.  It is passed a
D
David Howells 已提交
1073 1074
 * pointer to the fsdata cookie that gets returned to the VM to be passed to
 * write_end.  It is permitted to sleep.  It should return 0 if the request
D
David Howells 已提交
1075 1076
 * should go ahead; unlock the folio and return -EAGAIN to cause the folio to
 * be regot; or return an error.
D
David Howells 已提交
1077 1078 1079 1080
 *
 * This is usable whether or not caching is enabled.
 */
int netfs_write_begin(struct file *file, struct address_space *mapping,
D
David Howells 已提交
1081 1082
		      loff_t pos, unsigned int len, unsigned int aop_flags,
		      struct folio **_folio, void **_fsdata,
D
David Howells 已提交
1083 1084 1085 1086
		      const struct netfs_read_request_ops *ops,
		      void *netfs_priv)
{
	struct netfs_read_request *rreq;
D
David Howells 已提交
1087
	struct folio *folio;
D
David Howells 已提交
1088
	struct inode *inode = file_inode(file);
D
David Howells 已提交
1089
	unsigned int debug_index = 0, fgp_flags;
D
David Howells 已提交
1090 1091 1092 1093 1094 1095
	pgoff_t index = pos >> PAGE_SHIFT;
	int ret;

	DEFINE_READAHEAD(ractl, file, NULL, mapping, index);

retry:
D
David Howells 已提交
1096 1097 1098 1099 1100 1101
	fgp_flags = FGP_LOCK | FGP_WRITE | FGP_CREAT | FGP_STABLE;
	if (aop_flags & AOP_FLAG_NOFS)
		fgp_flags |= FGP_NOFS;
	folio = __filemap_get_folio(mapping, index, fgp_flags,
				    mapping_gfp_mask(mapping));
	if (!folio)
D
David Howells 已提交
1102 1103 1104 1105
		return -ENOMEM;

	if (ops->check_write_begin) {
		/* Allow the netfs (eg. ceph) to flush conflicts. */
D
David Howells 已提交
1106
		ret = ops->check_write_begin(file, pos, len, folio, _fsdata);
D
David Howells 已提交
1107
		if (ret < 0) {
1108
			trace_netfs_failure(NULL, NULL, ret, netfs_fail_check_write_begin);
D
David Howells 已提交
1109 1110 1111 1112 1113 1114
			if (ret == -EAGAIN)
				goto retry;
			goto error;
		}
	}

D
David Howells 已提交
1115 1116
	if (folio_test_uptodate(folio))
		goto have_folio;
D
David Howells 已提交
1117 1118 1119 1120 1121 1122

	/* If the page is beyond the EOF, we want to clear it - unless it's
	 * within the cache granule containing the EOF, in which case we need
	 * to preload the granule.
	 */
	if (!ops->is_cache_enabled(inode) &&
D
David Howells 已提交
1123
	    netfs_skip_folio_read(folio, pos, len)) {
D
David Howells 已提交
1124
		netfs_stat(&netfs_n_rh_write_zskip);
D
David Howells 已提交
1125
		goto have_folio_no_wait;
D
David Howells 已提交
1126 1127 1128 1129 1130 1131
	}

	ret = -ENOMEM;
	rreq = netfs_alloc_read_request(ops, netfs_priv, file);
	if (!rreq)
		goto error;
D
David Howells 已提交
1132 1133 1134 1135 1136
	rreq->mapping		= folio_file_mapping(folio);
	rreq->start		= folio_file_pos(folio);
	rreq->len		= folio_size(folio);
	rreq->no_unlock_folio	= folio_index(folio);
	__set_bit(NETFS_RREQ_NO_UNLOCK_FOLIO, &rreq->flags);
D
David Howells 已提交
1137 1138
	netfs_priv = NULL;

1139 1140 1141 1142 1143 1144
	if (ops->begin_cache_operation) {
		ret = ops->begin_cache_operation(rreq);
		if (ret == -ENOMEM || ret == -EINTR || ret == -ERESTARTSYS)
			goto error_put;
	}

D
David Howells 已提交
1145 1146 1147 1148 1149 1150
	netfs_stat(&netfs_n_rh_write_begin);
	trace_netfs_read(rreq, pos, len, netfs_read_trace_write_begin);

	/* Expand the request to meet caching requirements and download
	 * preferences.
	 */
D
David Howells 已提交
1151
	ractl._nr_pages = folio_nr_pages(folio);
D
David Howells 已提交
1152 1153 1154
	netfs_rreq_expand(rreq, &ractl);
	netfs_get_read_request(rreq);

D
David Howells 已提交
1155 1156 1157 1158
	/* We hold the folio locks, so we can drop the references */
	folio_get(folio);
	while (readahead_folio(&ractl))
		;
D
David Howells 已提交
1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179

	atomic_set(&rreq->nr_rd_ops, 1);
	do {
		if (!netfs_rreq_submit_slice(rreq, &debug_index))
			break;

	} while (rreq->submitted < rreq->len);

	/* Keep nr_rd_ops incremented so that the ref always belongs to us, and
	 * the service code isn't punted off to a random thread pool to
	 * process.
	 */
	for (;;) {
		wait_var_event(&rreq->nr_rd_ops, atomic_read(&rreq->nr_rd_ops) == 1);
		netfs_rreq_assess(rreq, false);
		if (!test_bit(NETFS_RREQ_IN_PROGRESS, &rreq->flags))
			break;
		cond_resched();
	}

	ret = rreq->error;
1180 1181
	if (ret == 0 && rreq->submitted < rreq->len) {
		trace_netfs_failure(rreq, NULL, ret, netfs_fail_short_write_begin);
D
David Howells 已提交
1182
		ret = -EIO;
1183
	}
D
David Howells 已提交
1184 1185 1186 1187
	netfs_put_read_request(rreq, false);
	if (ret < 0)
		goto error;

D
David Howells 已提交
1188 1189
have_folio:
	ret = folio_wait_fscache_killable(folio);
D
David Howells 已提交
1190 1191
	if (ret < 0)
		goto error;
D
David Howells 已提交
1192
have_folio_no_wait:
D
David Howells 已提交
1193 1194
	if (netfs_priv)
		ops->cleanup(netfs_priv, mapping);
D
David Howells 已提交
1195
	*_folio = folio;
D
David Howells 已提交
1196 1197 1198 1199 1200 1201
	_leave(" = 0");
	return 0;

error_put:
	netfs_put_read_request(rreq, false);
error:
D
David Howells 已提交
1202 1203
	folio_unlock(folio);
	folio_put(folio);
D
David Howells 已提交
1204 1205 1206 1207 1208 1209
	if (netfs_priv)
		ops->cleanup(netfs_priv, mapping);
	_leave(" = %d", ret);
	return ret;
}
EXPORT_SYMBOL(netfs_write_begin);