data.c 69.2 KB
Newer Older
C
Chao Yu 已提交
1
// SPDX-License-Identifier: GPL-2.0
J
Jaegeuk Kim 已提交
2
/*
3 4 5 6 7 8 9 10 11 12 13
 * fs/f2fs/data.c
 *
 * Copyright (c) 2012 Samsung Electronics Co., Ltd.
 *             http://www.samsung.com/
 */
#include <linux/fs.h>
#include <linux/f2fs_fs.h>
#include <linux/buffer_head.h>
#include <linux/mpage.h>
#include <linux/writeback.h>
#include <linux/backing-dev.h>
C
Chao Yu 已提交
14
#include <linux/pagevec.h>
15 16
#include <linux/blkdev.h>
#include <linux/bio.h>
17
#include <linux/prefetch.h>
18
#include <linux/uio.h>
J
Jaegeuk Kim 已提交
19
#include <linux/cleancache.h>
20
#include <linux/sched/signal.h>
21 22 23 24

#include "f2fs.h"
#include "node.h"
#include "segment.h"
J
Jaegeuk Kim 已提交
25
#include "trace.h"
26
#include <trace/events/f2fs.h>
27

28 29 30 31 32
#define NUM_PREALLOC_POST_READ_CTXS	128

static struct kmem_cache *bio_post_read_ctx_cache;
static mempool_t *bio_post_read_ctx_pool;

33 34 35 36 37 38 39 40 41 42 43 44 45 46 47
static bool __is_cp_guaranteed(struct page *page)
{
	struct address_space *mapping = page->mapping;
	struct inode *inode;
	struct f2fs_sb_info *sbi;

	if (!mapping)
		return false;

	inode = mapping->host;
	sbi = F2FS_I_SB(inode);

	if (inode->i_ino == F2FS_META_INO(sbi) ||
			inode->i_ino ==  F2FS_NODE_INO(sbi) ||
			S_ISDIR(inode->i_mode) ||
48
			(S_ISREG(inode->i_mode) &&
49
			(f2fs_is_atomic_file(inode) || IS_NOQUOTA(inode))) ||
50 51 52 53 54
			is_cold_data(page))
		return true;
	return false;
}

55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71
static enum count_type __read_io_type(struct page *page)
{
	struct address_space *mapping = page->mapping;

	if (mapping) {
		struct inode *inode = mapping->host;
		struct f2fs_sb_info *sbi = F2FS_I_SB(inode);

		if (inode->i_ino == F2FS_META_INO(sbi))
			return F2FS_RD_META;

		if (inode->i_ino == F2FS_NODE_INO(sbi))
			return F2FS_RD_NODE;
	}
	return F2FS_RD_DATA;
}

72 73 74 75 76 77 78 79 80 81 82 83 84 85
/* postprocessing steps for read bios */
enum bio_post_read_step {
	STEP_INITIAL = 0,
	STEP_DECRYPT,
};

struct bio_post_read_ctx {
	struct bio *bio;
	struct work_struct work;
	unsigned int cur_step;
	unsigned int enabled_steps;
};

static void __read_end_io(struct bio *bio)
86
{
87 88
	struct page *page;
	struct bio_vec *bv;
89
	int i;
90
	struct bvec_iter_all iter_all;
91

92
	bio_for_each_segment_all(bv, bio, i, iter_all) {
93 94 95 96 97
		page = bv->bv_page;

		/* PG_error was set if any post_read step failed */
		if (bio->bi_status || PageError(page)) {
			ClearPageUptodate(page);
98 99
			/* will re-read again later */
			ClearPageError(page);
100 101 102
		} else {
			SetPageUptodate(page);
		}
103
		dec_page_count(F2FS_P_SB(page), __read_io_type(page));
104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145
		unlock_page(page);
	}
	if (bio->bi_private)
		mempool_free(bio->bi_private, bio_post_read_ctx_pool);
	bio_put(bio);
}

static void bio_post_read_processing(struct bio_post_read_ctx *ctx);

static void decrypt_work(struct work_struct *work)
{
	struct bio_post_read_ctx *ctx =
		container_of(work, struct bio_post_read_ctx, work);

	fscrypt_decrypt_bio(ctx->bio);

	bio_post_read_processing(ctx);
}

static void bio_post_read_processing(struct bio_post_read_ctx *ctx)
{
	switch (++ctx->cur_step) {
	case STEP_DECRYPT:
		if (ctx->enabled_steps & (1 << STEP_DECRYPT)) {
			INIT_WORK(&ctx->work, decrypt_work);
			fscrypt_enqueue_decrypt_work(&ctx->work);
			return;
		}
		ctx->cur_step++;
		/* fall-through */
	default:
		__read_end_io(ctx->bio);
	}
}

static bool f2fs_bio_post_read_required(struct bio *bio)
{
	return bio->bi_private && !bio->bi_status;
}

static void f2fs_read_end_io(struct bio *bio)
{
146 147 148
	if (time_to_inject(F2FS_P_SB(bio_first_page_all(bio)),
						FAULT_READ_IO)) {
		f2fs_show_injection_info(FAULT_READ_IO);
149
		bio->bi_status = BLK_STS_IOERR;
150
	}
C
Chao Yu 已提交
151

152 153
	if (f2fs_bio_post_read_required(bio)) {
		struct bio_post_read_ctx *ctx = bio->bi_private;
J
Jaegeuk Kim 已提交
154

155 156 157
		ctx->cur_step = STEP_INITIAL;
		bio_post_read_processing(ctx);
		return;
J
Jaegeuk Kim 已提交
158
	}
159 160

	__read_end_io(bio);
J
Jaegeuk Kim 已提交
161 162
}

163
static void f2fs_write_end_io(struct bio *bio)
164
{
165
	struct f2fs_sb_info *sbi = bio->bi_private;
166 167
	struct bio_vec *bvec;
	int i;
168
	struct bvec_iter_all iter_all;
169

170 171 172 173 174
	if (time_to_inject(sbi, FAULT_WRITE_IO)) {
		f2fs_show_injection_info(FAULT_WRITE_IO);
		bio->bi_status = BLK_STS_IOERR;
	}

175
	bio_for_each_segment_all(bvec, bio, i, iter_all) {
176
		struct page *page = bvec->bv_page;
177
		enum count_type type = WB_DATA_TYPE(page);
178

179 180 181 182 183 184
		if (IS_DUMMY_WRITTEN_PAGE(page)) {
			set_page_private(page, (unsigned long)NULL);
			ClearPagePrivate(page);
			unlock_page(page);
			mempool_free(page, sbi->write_io_dummy);

185
			if (unlikely(bio->bi_status))
186 187 188 189
				f2fs_stop_checkpoint(sbi, true);
			continue;
		}

190
		fscrypt_pullback_bio_page(&page, true);
191

192
		if (unlikely(bio->bi_status)) {
193
			mapping_set_error(page->mapping, -EIO);
194 195
			if (type == F2FS_WB_CP_DATA)
				f2fs_stop_checkpoint(sbi, true);
196
		}
197 198 199 200

		f2fs_bug_on(sbi, page->mapping == NODE_MAPPING(sbi) &&
					page->index != nid_of_node(page));

201
		dec_page_count(sbi, type);
202 203
		if (f2fs_in_warm_node_list(sbi, page))
			f2fs_del_fsync_node_entry(sbi, page);
204
		clear_cold_data(page);
205
		end_page_writeback(page);
206
	}
207
	if (!get_pages(sbi, F2FS_WB_CP_DATA) &&
208
				wq_has_sleeper(&sbi->cp_wait))
209 210 211 212 213
		wake_up(&sbi->cp_wait);

	bio_put(bio);
}

J
Jaegeuk Kim 已提交
214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231
/*
 * Return true, if pre_bio's bdev is same as its target device.
 */
struct block_device *f2fs_target_device(struct f2fs_sb_info *sbi,
				block_t blk_addr, struct bio *bio)
{
	struct block_device *bdev = sbi->sb->s_bdev;
	int i;

	for (i = 0; i < sbi->s_ndevs; i++) {
		if (FDEV(i).start_blk <= blk_addr &&
					FDEV(i).end_blk >= blk_addr) {
			blk_addr -= FDEV(i).start_blk;
			bdev = FDEV(i).bdev;
			break;
		}
	}
	if (bio) {
232
		bio_set_dev(bio, bdev);
J
Jaegeuk Kim 已提交
233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250
		bio->bi_iter.bi_sector = SECTOR_FROM_BLOCK(blk_addr);
	}
	return bdev;
}

int f2fs_target_device_index(struct f2fs_sb_info *sbi, block_t blkaddr)
{
	int i;

	for (i = 0; i < sbi->s_ndevs; i++)
		if (FDEV(i).start_blk <= blkaddr && FDEV(i).end_blk >= blkaddr)
			return i;
	return 0;
}

static bool __same_bdev(struct f2fs_sb_info *sbi,
				block_t blk_addr, struct bio *bio)
{
251 252
	struct block_device *b = f2fs_target_device(sbi, blk_addr, NULL);
	return bio->bi_disk == b->bd_disk && bio->bi_partno == b->bd_partno;
J
Jaegeuk Kim 已提交
253 254
}

255 256 257 258
/*
 * Low-level block read/write IO operations.
 */
static struct bio *__bio_alloc(struct f2fs_sb_info *sbi, block_t blk_addr,
259
				struct writeback_control *wbc,
260 261
				int npages, bool is_read,
				enum page_type type, enum temp_type temp)
262 263 264
{
	struct bio *bio;

265
	bio = f2fs_bio_alloc(sbi, npages, true);
266

J
Jaegeuk Kim 已提交
267
	f2fs_target_device(sbi, blk_addr, bio);
268 269 270 271 272 273
	if (is_read) {
		bio->bi_end_io = f2fs_read_end_io;
		bio->bi_private = NULL;
	} else {
		bio->bi_end_io = f2fs_write_end_io;
		bio->bi_private = sbi;
C
Chao Yu 已提交
274
		bio->bi_write_hint = f2fs_io_type_to_rw_hint(sbi, type, temp);
275
	}
276 277
	if (wbc)
		wbc_init_bio(wbc, bio);
278 279 280 281

	return bio;
}

282 283
static inline void __submit_bio(struct f2fs_sb_info *sbi,
				struct bio *bio, enum page_type type)
284
{
285
	if (!is_read_io(bio_op(bio))) {
286 287 288 289 290
		unsigned int start;

		if (type != DATA && type != NODE)
			goto submit_io;

291
		if (test_opt(sbi, LFS) && current->plug)
292 293
			blk_finish_plug(current->plug);

294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318
		start = bio->bi_iter.bi_size >> F2FS_BLKSIZE_BITS;
		start %= F2FS_IO_SIZE(sbi);

		if (start == 0)
			goto submit_io;

		/* fill dummy pages */
		for (; start < F2FS_IO_SIZE(sbi); start++) {
			struct page *page =
				mempool_alloc(sbi->write_io_dummy,
					GFP_NOIO | __GFP_ZERO | __GFP_NOFAIL);
			f2fs_bug_on(sbi, !page);

			SetPagePrivate(page);
			set_page_private(page, (unsigned long)DUMMY_WRITTEN_PAGE);
			lock_page(page);
			if (bio_add_page(bio, page, PAGE_SIZE, 0) < PAGE_SIZE)
				f2fs_bug_on(sbi, 1);
		}
		/*
		 * In the NODE case, we lose next block address chain. So, we
		 * need to do checkpoint in f2fs_sync_file.
		 */
		if (type == NODE)
			set_sbi_flag(sbi, SBI_NEED_CP);
J
Jaegeuk Kim 已提交
319
	}
320
submit_io:
J
Jaegeuk Kim 已提交
321 322 323 324
	if (is_read_io(bio_op(bio)))
		trace_f2fs_submit_read_bio(sbi->sb, type, bio);
	else
		trace_f2fs_submit_write_bio(sbi->sb, type, bio);
325
	submit_bio(bio);
326 327
}

J
Jaegeuk Kim 已提交
328
static void __submit_merged_bio(struct f2fs_bio_info *io)
329
{
J
Jaegeuk Kim 已提交
330
	struct f2fs_io_info *fio = &io->fio;
331 332 333 334

	if (!io->bio)
		return;

J
Jaegeuk Kim 已提交
335 336
	bio_set_op_attrs(io->bio, fio->op, fio->op_flags);

M
Mike Christie 已提交
337
	if (is_read_io(fio->op))
J
Jaegeuk Kim 已提交
338
		trace_f2fs_prepare_read_bio(io->sbi->sb, fio->type, io->bio);
339
	else
J
Jaegeuk Kim 已提交
340
		trace_f2fs_prepare_write_bio(io->sbi->sb, fio->type, io->bio);
M
Mike Christie 已提交
341

342
	__submit_bio(io->sbi, io->bio, fio->type);
343 344 345
	io->bio = NULL;
}

346 347
static bool __has_merged_page(struct f2fs_bio_info *io, struct inode *inode,
						struct page *page, nid_t ino)
C
Chao Yu 已提交
348 349 350 351
{
	struct bio_vec *bvec;
	struct page *target;
	int i;
352
	struct bvec_iter_all iter_all;
C
Chao Yu 已提交
353

354
	if (!io->bio)
C
Chao Yu 已提交
355
		return false;
356

357
	if (!inode && !page && !ino)
358
		return true;
C
Chao Yu 已提交
359

360
	bio_for_each_segment_all(bvec, io->bio, i, iter_all) {
C
Chao Yu 已提交
361

362
		if (bvec->bv_page->mapping)
C
Chao Yu 已提交
363
			target = bvec->bv_page;
364 365
		else
			target = fscrypt_control_page(bvec->bv_page);
C
Chao Yu 已提交
366

367 368
		if (inode && inode == target->mapping->host)
			return true;
369 370
		if (page && page == target)
			return true;
371
		if (ino && ino == ino_of_node(target))
C
Chao Yu 已提交
372 373 374 375 376 377
			return true;
	}

	return false;
}

378
static void __f2fs_submit_merged_write(struct f2fs_sb_info *sbi,
J
Jaegeuk Kim 已提交
379
				enum page_type type, enum temp_type temp)
380 381
{
	enum page_type btype = PAGE_TYPE_OF_BIO(type);
J
Jaegeuk Kim 已提交
382
	struct f2fs_bio_info *io = sbi->write_io[btype] + temp;
383

384
	down_write(&io->io_rwsem);
J
Jaegeuk Kim 已提交
385 386 387 388

	/* change META to META_FLUSH in the checkpoint procedure */
	if (type >= META_FLUSH) {
		io->fio.type = META_FLUSH;
M
Mike Christie 已提交
389
		io->fio.op = REQ_OP_WRITE;
390
		io->fio.op_flags = REQ_META | REQ_PRIO | REQ_SYNC;
391
		if (!test_opt(sbi, NOBARRIER))
392
			io->fio.op_flags |= REQ_PREFLUSH | REQ_FUA;
J
Jaegeuk Kim 已提交
393 394
	}
	__submit_merged_bio(io);
395
	up_write(&io->io_rwsem);
396 397
}

J
Jaegeuk Kim 已提交
398
static void __submit_merged_write_cond(struct f2fs_sb_info *sbi,
399 400
				struct inode *inode, struct page *page,
				nid_t ino, enum page_type type, bool force)
401
{
J
Jaegeuk Kim 已提交
402
	enum temp_type temp;
403
	bool ret = true;
J
Jaegeuk Kim 已提交
404 405

	for (temp = HOT; temp < NR_TEMP_TYPE; temp++) {
406 407 408
		if (!force)	{
			enum page_type btype = PAGE_TYPE_OF_BIO(type);
			struct f2fs_bio_info *io = sbi->write_io[btype] + temp;
J
Jaegeuk Kim 已提交
409

410 411 412 413 414 415
			down_read(&io->io_rwsem);
			ret = __has_merged_page(io, inode, page, ino);
			up_read(&io->io_rwsem);
		}
		if (ret)
			__f2fs_submit_merged_write(sbi, type, temp);
J
Jaegeuk Kim 已提交
416 417 418 419 420

		/* TODO: use HOT temp only for meta pages now. */
		if (type >= META)
			break;
	}
421 422
}

423
void f2fs_submit_merged_write(struct f2fs_sb_info *sbi, enum page_type type)
424
{
J
Jaegeuk Kim 已提交
425
	__submit_merged_write_cond(sbi, NULL, 0, 0, type, true);
426 427
}

428
void f2fs_submit_merged_write_cond(struct f2fs_sb_info *sbi,
429 430
				struct inode *inode, struct page *page,
				nid_t ino, enum page_type type)
431
{
432
	__submit_merged_write_cond(sbi, inode, page, ino, type, false);
433 434
}

435
void f2fs_flush_merged_writes(struct f2fs_sb_info *sbi)
436
{
437 438 439
	f2fs_submit_merged_write(sbi, DATA);
	f2fs_submit_merged_write(sbi, NODE);
	f2fs_submit_merged_write(sbi, META);
440 441
}

442 443
/*
 * Fill the locked page with data located in the block address.
444
 * A caller needs to unlock the page on failure.
445
 */
446
int f2fs_submit_page_bio(struct f2fs_io_info *fio)
447 448
{
	struct bio *bio;
449 450
	struct page *page = fio->encrypted_page ?
			fio->encrypted_page : fio->page;
451

452 453 454 455
	if (!f2fs_is_valid_blkaddr(fio->sbi, fio->new_blkaddr,
			__is_meta_io(fio) ? META_GENERIC : DATA_GENERIC))
		return -EFAULT;

456
	trace_f2fs_submit_page_bio(page, fio);
457
	f2fs_trace_ios(fio, 0);
458 459

	/* Allocate a new bio */
460
	bio = __bio_alloc(fio->sbi, fio->new_blkaddr, fio->io_wbc,
461
				1, is_read_io(fio->op), fio->type, fio->temp);
462

463
	if (bio_add_page(bio, page, PAGE_SIZE, 0) < PAGE_SIZE) {
464 465 466
		bio_put(bio);
		return -EFAULT;
	}
467 468 469 470

	if (fio->io_wbc && !is_read_io(fio->op))
		wbc_account_io(fio->io_wbc, page, PAGE_SIZE);

M
Mike Christie 已提交
471
	bio_set_op_attrs(bio, fio->op, fio->op_flags);
472

473 474
	inc_page_count(fio->sbi, is_read_io(fio->op) ?
			__read_io_type(page): WB_DATA_TYPE(fio->page));
C
Chao Yu 已提交
475 476

	__submit_bio(fio->sbi, bio, fio->type);
477 478 479
	return 0;
}

480
void f2fs_submit_page_write(struct f2fs_io_info *fio)
481
{
482
	struct f2fs_sb_info *sbi = fio->sbi;
J
Jaegeuk Kim 已提交
483
	enum page_type btype = PAGE_TYPE_OF_BIO(fio->type);
J
Jaegeuk Kim 已提交
484
	struct f2fs_bio_info *io = sbi->write_io[btype] + fio->temp;
485
	struct page *bio_page;
486

487
	f2fs_bug_on(sbi, is_read_io(fio->op));
488

489 490 491 492 493 494
	down_write(&io->io_rwsem);
next:
	if (fio->in_list) {
		spin_lock(&io->io_lock);
		if (list_empty(&io->io_list)) {
			spin_unlock(&io->io_lock);
495
			goto out;
496 497 498 499 500 501
		}
		fio = list_first_entry(&io->io_list,
						struct f2fs_io_info, list);
		list_del(&fio->list);
		spin_unlock(&io->io_lock);
	}
502

503
	if (__is_valid_data_blkaddr(fio->old_blkaddr))
504 505
		verify_block_addr(fio, fio->old_blkaddr);
	verify_block_addr(fio, fio->new_blkaddr);
506

507 508
	bio_page = fio->encrypted_page ? fio->encrypted_page : fio->page;

509 510
	/* set submitted = true as a return value */
	fio->submitted = true;
511

512
	inc_page_count(sbi, WB_DATA_TYPE(bio_page));
513

514
	if (io->bio && (io->last_block_in_bio != fio->new_blkaddr - 1 ||
J
Jaegeuk Kim 已提交
515 516
	    (io->fio.op != fio->op || io->fio.op_flags != fio->op_flags) ||
			!__same_bdev(sbi, fio->new_blkaddr, io->bio)))
J
Jaegeuk Kim 已提交
517
		__submit_merged_bio(io);
518 519
alloc_new:
	if (io->bio == NULL) {
520 521
		if ((fio->type == DATA || fio->type == NODE) &&
				fio->new_blkaddr & F2FS_IO_SIZE_MASK(sbi)) {
522
			dec_page_count(sbi, WB_DATA_TYPE(bio_page));
523 524
			fio->retry = true;
			goto skip;
525
		}
526
		io->bio = __bio_alloc(sbi, fio->new_blkaddr, fio->io_wbc,
527 528
						BIO_MAX_PAGES, false,
						fio->type, fio->temp);
J
Jaegeuk Kim 已提交
529
		io->fio = *fio;
530 531
	}

J
Jaegeuk Kim 已提交
532
	if (bio_add_page(io->bio, bio_page, PAGE_SIZE, 0) < PAGE_SIZE) {
J
Jaegeuk Kim 已提交
533
		__submit_merged_bio(io);
534 535 536
		goto alloc_new;
	}

537 538 539
	if (fio->io_wbc)
		wbc_account_io(fio->io_wbc, bio_page, PAGE_SIZE);

540
	io->last_block_in_bio = fio->new_blkaddr;
541
	f2fs_trace_ios(fio, 0);
542 543

	trace_f2fs_submit_page_write(fio->page, fio);
544
skip:
545 546
	if (fio->in_list)
		goto next;
547
out:
D
Daniel Rosenberg 已提交
548 549
	if (is_sbi_flag_set(sbi, SBI_IS_SHUTDOWN) ||
				f2fs_is_checkpoint_ready(sbi))
J
Jaegeuk Kim 已提交
550
		__submit_merged_bio(io);
551
	up_write(&io->io_rwsem);
552 553
}

554
static struct bio *f2fs_grab_read_bio(struct inode *inode, block_t blkaddr,
555
					unsigned nr_pages, unsigned op_flag)
556 557 558
{
	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
	struct bio *bio;
559 560
	struct bio_post_read_ctx *ctx;
	unsigned int post_read_steps = 0;
561

562 563 564
	if (!f2fs_is_valid_blkaddr(sbi, blkaddr, DATA_GENERIC))
		return ERR_PTR(-EFAULT);

565
	bio = f2fs_bio_alloc(sbi, min_t(int, nr_pages, BIO_MAX_PAGES), false);
566
	if (!bio)
567 568 569
		return ERR_PTR(-ENOMEM);
	f2fs_target_device(sbi, blkaddr, bio);
	bio->bi_end_io = f2fs_read_end_io;
570
	bio_set_op_attrs(bio, REQ_OP_READ, op_flag);
571

572 573 574 575 576 577 578 579 580 581 582 583 584
	if (f2fs_encrypted_file(inode))
		post_read_steps |= 1 << STEP_DECRYPT;
	if (post_read_steps) {
		ctx = mempool_alloc(bio_post_read_ctx_pool, GFP_NOFS);
		if (!ctx) {
			bio_put(bio);
			return ERR_PTR(-ENOMEM);
		}
		ctx->bio = bio;
		ctx->enabled_steps = post_read_steps;
		bio->bi_private = ctx;
	}

585 586 587 588 589 590 591
	return bio;
}

/* This can handle encryption stuffs */
static int f2fs_submit_page_read(struct inode *inode, struct page *page,
							block_t blkaddr)
{
592
	struct bio *bio = f2fs_grab_read_bio(inode, blkaddr, 1, 0);
593 594 595 596

	if (IS_ERR(bio))
		return PTR_ERR(bio);

597 598 599
	/* wait for GCed page writeback via META_MAPPING */
	f2fs_wait_on_block_writeback(inode, blkaddr);

600 601 602 603
	if (bio_add_page(bio, page, PAGE_SIZE, 0) < PAGE_SIZE) {
		bio_put(bio);
		return -EFAULT;
	}
604
	ClearPageError(page);
605
	inc_page_count(F2FS_I_SB(inode), F2FS_RD_DATA);
606 607 608 609
	__submit_bio(F2FS_I_SB(inode), bio, DATA);
	return 0;
}

610 611 612 613
static void __set_data_blkaddr(struct dnode_of_data *dn)
{
	struct f2fs_node *rn = F2FS_NODE(dn->node_page);
	__le32 *addr_array;
614 615 616 617
	int base = 0;

	if (IS_INODE(dn->node_page) && f2fs_has_extra_attr(dn->inode))
		base = get_extra_isize(dn->inode);
618 619 620

	/* Get physical address of data block */
	addr_array = blkaddr_in_node(rn);
621
	addr_array[base + dn->ofs_in_node] = cpu_to_le32(dn->data_blkaddr);
622 623
}

J
Jaegeuk Kim 已提交
624
/*
625 626 627 628 629
 * Lock ordering for the change of data block address:
 * ->data_page
 *  ->node_page
 *    update block addresses in the node page
 */
C
Chao Yu 已提交
630
void f2fs_set_data_blkaddr(struct dnode_of_data *dn)
631
{
632
	f2fs_wait_on_page_writeback(dn->node_page, NODE, true, true);
633 634
	__set_data_blkaddr(dn);
	if (set_page_dirty(dn->node_page))
635
		dn->node_changed = true;
636 637
}

638 639 640
void f2fs_update_data_blkaddr(struct dnode_of_data *dn, block_t blkaddr)
{
	dn->data_blkaddr = blkaddr;
C
Chao Yu 已提交
641
	f2fs_set_data_blkaddr(dn);
642 643 644
	f2fs_update_extent_cache(dn);
}

645
/* dn->ofs_in_node will be returned with up-to-date last block pointer */
C
Chao Yu 已提交
646
int f2fs_reserve_new_blocks(struct dnode_of_data *dn, blkcnt_t count)
647
{
648
	struct f2fs_sb_info *sbi = F2FS_I_SB(dn->inode);
C
Chao Yu 已提交
649
	int err;
650

651 652 653
	if (!count)
		return 0;

654
	if (unlikely(is_inode_flag_set(dn->inode, FI_NO_ALLOC)))
655
		return -EPERM;
C
Chao Yu 已提交
656 657
	if (unlikely((err = inc_valid_block_count(sbi, dn->inode, &count))))
		return err;
658

659 660 661
	trace_f2fs_reserve_new_blocks(dn->inode, dn->nid,
						dn->ofs_in_node, count);

662
	f2fs_wait_on_page_writeback(dn->node_page, NODE, true, true);
663 664

	for (; count > 0; dn->ofs_in_node++) {
665 666
		block_t blkaddr = datablock_addr(dn->inode,
					dn->node_page, dn->ofs_in_node);
667 668 669 670 671 672 673 674 675
		if (blkaddr == NULL_ADDR) {
			dn->data_blkaddr = NEW_ADDR;
			__set_data_blkaddr(dn);
			count--;
		}
	}

	if (set_page_dirty(dn->node_page))
		dn->node_changed = true;
676 677 678
	return 0;
}

679
/* Should keep dn->ofs_in_node unchanged */
C
Chao Yu 已提交
680
int f2fs_reserve_new_block(struct dnode_of_data *dn)
681 682 683 684
{
	unsigned int ofs_in_node = dn->ofs_in_node;
	int ret;

C
Chao Yu 已提交
685
	ret = f2fs_reserve_new_blocks(dn, 1);
686 687 688 689
	dn->ofs_in_node = ofs_in_node;
	return ret;
}

690 691 692 693 694
int f2fs_reserve_block(struct dnode_of_data *dn, pgoff_t index)
{
	bool need_put = dn->inode_page ? false : true;
	int err;

C
Chao Yu 已提交
695
	err = f2fs_get_dnode_of_data(dn, index, ALLOC_NODE);
696 697
	if (err)
		return err;
698

699
	if (dn->data_blkaddr == NULL_ADDR)
C
Chao Yu 已提交
700
		err = f2fs_reserve_new_block(dn);
701
	if (err || need_put)
702 703 704 705
		f2fs_put_dnode(dn);
	return err;
}

706
int f2fs_get_block(struct dnode_of_data *dn, pgoff_t index)
707
{
708
	struct extent_info ei  = {0,0,0};
709
	struct inode *inode = dn->inode;
710

711 712 713
	if (f2fs_lookup_extent_cache(inode, index, &ei)) {
		dn->data_blkaddr = ei.blk + index - ei.fofs;
		return 0;
714
	}
715

716
	return f2fs_reserve_block(dn, index);
717 718
}

C
Chao Yu 已提交
719
struct page *f2fs_get_read_data_page(struct inode *inode, pgoff_t index,
M
Mike Christie 已提交
720
						int op_flags, bool for_write)
721 722 723 724
{
	struct address_space *mapping = inode->i_mapping;
	struct dnode_of_data dn;
	struct page *page;
725
	struct extent_info ei = {0,0,0};
726
	int err;
727

728
	page = f2fs_grab_cache_page(mapping, index, for_write);
729 730 731
	if (!page)
		return ERR_PTR(-ENOMEM);

C
Chao Yu 已提交
732 733 734 735 736
	if (f2fs_lookup_extent_cache(inode, index, &ei)) {
		dn.data_blkaddr = ei.blk + index - ei.fofs;
		goto got_it;
	}

737
	set_new_dnode(&dn, inode, NULL, NULL, 0);
C
Chao Yu 已提交
738
	err = f2fs_get_dnode_of_data(&dn, index, LOOKUP_NODE);
739 740
	if (err)
		goto put_err;
741 742
	f2fs_put_dnode(&dn);

743
	if (unlikely(dn.data_blkaddr == NULL_ADDR)) {
744 745
		err = -ENOENT;
		goto put_err;
746
	}
C
Chao Yu 已提交
747
got_it:
748 749
	if (PageUptodate(page)) {
		unlock_page(page);
750
		return page;
751
	}
752

J
Jaegeuk Kim 已提交
753 754 755 756
	/*
	 * A new dentry page is allocated but not able to be written, since its
	 * new inode page couldn't be allocated due to -ENOSPC.
	 * In such the case, its blkaddr can be remained as NEW_ADDR.
C
Chao Yu 已提交
757 758
	 * see, f2fs_add_link -> f2fs_get_new_data_page ->
	 * f2fs_init_inode_metadata.
J
Jaegeuk Kim 已提交
759 760
	 */
	if (dn.data_blkaddr == NEW_ADDR) {
761
		zero_user_segment(page, 0, PAGE_SIZE);
762 763
		if (!PageUptodate(page))
			SetPageUptodate(page);
764
		unlock_page(page);
J
Jaegeuk Kim 已提交
765 766
		return page;
	}
767

768
	err = f2fs_submit_page_read(inode, page, dn.data_blkaddr);
769
	if (err)
770
		goto put_err;
771
	return page;
772 773 774 775

put_err:
	f2fs_put_page(page, 1);
	return ERR_PTR(err);
776 777
}

C
Chao Yu 已提交
778
struct page *f2fs_find_data_page(struct inode *inode, pgoff_t index)
779 780 781 782 783 784 785 786 787
{
	struct address_space *mapping = inode->i_mapping;
	struct page *page;

	page = find_get_page(mapping, index);
	if (page && PageUptodate(page))
		return page;
	f2fs_put_page(page, 0);

C
Chao Yu 已提交
788
	page = f2fs_get_read_data_page(inode, index, 0, false);
789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807
	if (IS_ERR(page))
		return page;

	if (PageUptodate(page))
		return page;

	wait_on_page_locked(page);
	if (unlikely(!PageUptodate(page))) {
		f2fs_put_page(page, 0);
		return ERR_PTR(-EIO);
	}
	return page;
}

/*
 * If it tries to access a hole, return an error.
 * Because, the callers, functions in dir.c and GC, should be able to know
 * whether this page exists or not.
 */
C
Chao Yu 已提交
808
struct page *f2fs_get_lock_data_page(struct inode *inode, pgoff_t index,
809
							bool for_write)
810 811 812 813
{
	struct address_space *mapping = inode->i_mapping;
	struct page *page;
repeat:
C
Chao Yu 已提交
814
	page = f2fs_get_read_data_page(inode, index, 0, for_write);
815 816
	if (IS_ERR(page))
		return page;
817

818
	/* wait for read completion */
819
	lock_page(page);
820
	if (unlikely(page->mapping != mapping)) {
821 822
		f2fs_put_page(page, 1);
		goto repeat;
823
	}
824 825 826 827
	if (unlikely(!PageUptodate(page))) {
		f2fs_put_page(page, 1);
		return ERR_PTR(-EIO);
	}
828 829 830
	return page;
}

J
Jaegeuk Kim 已提交
831
/*
832 833
 * Caller ensures that this data page is never allocated.
 * A new zero-filled data page is allocated in the page cache.
834
 *
C
Chao Yu 已提交
835 836
 * Also, caller should grab and release a rwsem by calling f2fs_lock_op() and
 * f2fs_unlock_op().
837 838
 * Note that, ipage is set only by make_empty_dir, and if any error occur,
 * ipage should be released by this function.
839
 */
C
Chao Yu 已提交
840
struct page *f2fs_get_new_data_page(struct inode *inode,
841
		struct page *ipage, pgoff_t index, bool new_i_size)
842 843 844 845 846
{
	struct address_space *mapping = inode->i_mapping;
	struct page *page;
	struct dnode_of_data dn;
	int err;
847

848
	page = f2fs_grab_cache_page(mapping, index, true);
849 850 851 852 853 854
	if (!page) {
		/*
		 * before exiting, we should make sure ipage will be released
		 * if any error occur.
		 */
		f2fs_put_page(ipage, 1);
855
		return ERR_PTR(-ENOMEM);
856
	}
857

858
	set_new_dnode(&dn, inode, ipage, NULL, 0);
859
	err = f2fs_reserve_block(&dn, index);
860 861
	if (err) {
		f2fs_put_page(page, 1);
862
		return ERR_PTR(err);
863
	}
864 865
	if (!ipage)
		f2fs_put_dnode(&dn);
866 867

	if (PageUptodate(page))
868
		goto got_it;
869 870

	if (dn.data_blkaddr == NEW_ADDR) {
871
		zero_user_segment(page, 0, PAGE_SIZE);
872 873
		if (!PageUptodate(page))
			SetPageUptodate(page);
874
	} else {
875
		f2fs_put_page(page, 1);
876

877 878
		/* if ipage exists, blkaddr should be NEW_ADDR */
		f2fs_bug_on(F2FS_I_SB(inode), ipage);
C
Chao Yu 已提交
879
		page = f2fs_get_lock_data_page(inode, index, true);
880
		if (IS_ERR(page))
881
			return page;
882
	}
883
got_it:
C
Chao Yu 已提交
884
	if (new_i_size && i_size_read(inode) <
885
				((loff_t)(index + 1) << PAGE_SHIFT))
886
		f2fs_i_size_write(inode, ((loff_t)(index + 1) << PAGE_SHIFT));
887 888 889
	return page;
}

890
static int __allocate_data_block(struct dnode_of_data *dn, int seg_type)
891
{
892
	struct f2fs_sb_info *sbi = F2FS_I_SB(dn->inode);
893 894
	struct f2fs_summary sum;
	struct node_info ni;
895
	block_t old_blkaddr;
896
	blkcnt_t count = 1;
C
Chao Yu 已提交
897
	int err;
898

899
	if (unlikely(is_inode_flag_set(dn->inode, FI_NO_ALLOC)))
900
		return -EPERM;
901

902 903 904 905
	err = f2fs_get_node_info(sbi, dn->nid, &ni);
	if (err)
		return err;

906 907
	dn->data_blkaddr = datablock_addr(dn->inode,
				dn->node_page, dn->ofs_in_node);
908
	if (dn->data_blkaddr != NULL_ADDR)
909 910
		goto alloc;

C
Chao Yu 已提交
911 912
	if (unlikely((err = inc_valid_block_count(sbi, dn->inode, &count))))
		return err;
913

914
alloc:
915
	set_summary(&sum, dn->nid, dn->ofs_in_node, ni.version);
916 917
	old_blkaddr = dn->data_blkaddr;
	f2fs_allocate_data_block(sbi, NULL, old_blkaddr, &dn->data_blkaddr,
918
					&sum, seg_type, NULL, false);
919 920 921
	if (GET_SEGNO(sbi, old_blkaddr) != NULL_SEGNO)
		invalidate_mapping_pages(META_MAPPING(sbi),
					old_blkaddr, old_blkaddr);
C
Chao Yu 已提交
922
	f2fs_set_data_blkaddr(dn);
923

924 925 926 927
	/*
	 * i_size will be updated by direct_IO. Otherwise, we'll get stale
	 * data from unwritten block via dio_read.
	 */
928 929 930
	return 0;
}

931
int f2fs_preallocate_blocks(struct kiocb *iocb, struct iov_iter *from)
932
{
933
	struct inode *inode = file_inode(iocb->ki_filp);
C
Chao Yu 已提交
934
	struct f2fs_map_blocks map;
935
	int flag;
936
	int err = 0;
937
	bool direct_io = iocb->ki_flags & IOCB_DIRECT;
938

939
	/* convert inline data for Direct I/O*/
940
	if (direct_io) {
941 942 943 944 945
		err = f2fs_convert_inline_inode(inode);
		if (err)
			return err;
	}

C
Chao Yu 已提交
946 947 948
	if (direct_io && allow_outplace_dio(inode, iocb, from))
		return 0;

949 950 951
	if (is_inode_flag_set(inode, FI_NO_PREALLOC))
		return 0;

952
	map.m_lblk = F2FS_BLK_ALIGN(iocb->ki_pos);
953 954 955 956 957 958
	map.m_len = F2FS_BYTES_TO_BLK(iocb->ki_pos + iov_iter_count(from));
	if (map.m_len > map.m_lblk)
		map.m_len -= map.m_lblk;
	else
		map.m_len = 0;

959
	map.m_next_pgofs = NULL;
960
	map.m_next_extent = NULL;
961
	map.m_seg_type = NO_CHECK_TYPE;
C
Chao Yu 已提交
962
	map.m_may_create = true;
963

964
	if (direct_io) {
C
Chao Yu 已提交
965
		map.m_seg_type = f2fs_rw_hint_to_seg_type(iocb->ki_hint);
966
		flag = f2fs_force_buffered_io(inode, iocb, from) ?
967 968 969
					F2FS_GET_BLOCK_PRE_AIO :
					F2FS_GET_BLOCK_PRE_DIO;
		goto map_blocks;
970
	}
C
Chao Yu 已提交
971
	if (iocb->ki_pos + iov_iter_count(from) > MAX_INLINE_DATA(inode)) {
972 973 974
		err = f2fs_convert_inline_inode(inode);
		if (err)
			return err;
975
	}
976
	if (f2fs_has_inline_data(inode))
977
		return err;
978 979 980 981 982 983 984 985 986

	flag = F2FS_GET_BLOCK_PRE_AIO;

map_blocks:
	err = f2fs_map_blocks(inode, &map, 1, flag);
	if (map.m_len > 0 && err == -ENOSPC) {
		if (!direct_io)
			set_inode_flag(inode, FI_NO_PREALLOC);
		err = 0;
987
	}
988
	return err;
989 990
}

C
Chao Yu 已提交
991
void __do_map_lock(struct f2fs_sb_info *sbi, int flag, bool lock)
992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005
{
	if (flag == F2FS_GET_BLOCK_PRE_AIO) {
		if (lock)
			down_read(&sbi->node_change);
		else
			up_read(&sbi->node_change);
	} else {
		if (lock)
			f2fs_lock_op(sbi);
		else
			f2fs_unlock_op(sbi);
	}
}

J
Jaegeuk Kim 已提交
1006
/*
J
Jaegeuk Kim 已提交
1007 1008
 * f2fs_map_blocks() now supported readahead/bmap/rw direct_IO with
 * f2fs_map_blocks structure.
C
Chao Yu 已提交
1009 1010 1011 1012 1013
 * If original data blocks are allocated, then give them to blockdev.
 * Otherwise,
 *     a. preallocate requested block addresses
 *     b. do not use extent cache for better performance
 *     c. give the block addresses to blockdev
1014
 */
C
Chao Yu 已提交
1015
int f2fs_map_blocks(struct inode *inode, struct f2fs_map_blocks *map,
C
Chao Yu 已提交
1016
						int create, int flag)
1017
{
J
Jaegeuk Kim 已提交
1018
	unsigned int maxblocks = map->m_len;
1019
	struct dnode_of_data dn;
1020
	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
C
Chao Yu 已提交
1021
	int mode = map->m_may_create ? ALLOC_NODE : LOOKUP_NODE;
1022
	pgoff_t pgofs, end_offset, end;
1023
	int err = 0, ofs = 1;
1024 1025
	unsigned int ofs_in_node, last_ofs_in_node;
	blkcnt_t prealloc;
1026
	struct extent_info ei = {0,0,0};
1027
	block_t blkaddr;
1028
	unsigned int start_pgofs;
1029

1030 1031 1032
	if (!maxblocks)
		return 0;

J
Jaegeuk Kim 已提交
1033 1034 1035 1036 1037
	map->m_len = 0;
	map->m_flags = 0;

	/* it only supports block size == page size */
	pgofs =	(pgoff_t)map->m_lblk;
1038
	end = pgofs + maxblocks;
1039

1040
	if (!create && f2fs_lookup_extent_cache(inode, pgofs, &ei)) {
1041 1042 1043 1044
		if (test_opt(sbi, LFS) && flag == F2FS_GET_BLOCK_DIO &&
							map->m_may_create)
			goto next_dnode;

J
Jaegeuk Kim 已提交
1045 1046 1047
		map->m_pblk = ei.blk + pgofs - ei.fofs;
		map->m_len = min((pgoff_t)maxblocks, ei.fofs + ei.len - pgofs);
		map->m_flags = F2FS_MAP_MAPPED;
1048 1049
		if (map->m_next_extent)
			*map->m_next_extent = pgofs + map->m_len;
1050 1051 1052 1053 1054

		/* for hardware encryption, but to avoid potential issue in future */
		if (flag == F2FS_GET_BLOCK_DIO)
			f2fs_wait_on_block_writeback_range(inode,
						map->m_pblk, map->m_len);
1055
		goto out;
1056
	}
1057

C
Chao Yu 已提交
1058
next_dnode:
C
Chao Yu 已提交
1059
	if (map->m_may_create)
1060
		__do_map_lock(sbi, flag, true);
1061 1062 1063

	/* When reading holes, we need its node page */
	set_new_dnode(&dn, inode, NULL, NULL, 0);
C
Chao Yu 已提交
1064
	err = f2fs_get_dnode_of_data(&dn, pgofs, mode);
1065
	if (err) {
C
Chao Yu 已提交
1066 1067
		if (flag == F2FS_GET_BLOCK_BMAP)
			map->m_pblk = 0;
1068
		if (err == -ENOENT) {
1069
			err = 0;
1070 1071
			if (map->m_next_pgofs)
				*map->m_next_pgofs =
C
Chao Yu 已提交
1072
					f2fs_get_next_page_offset(&dn, pgofs);
1073 1074
			if (map->m_next_extent)
				*map->m_next_extent =
C
Chao Yu 已提交
1075
					f2fs_get_next_page_offset(&dn, pgofs);
1076
		}
1077
		goto unlock_out;
1078
	}
C
Chao Yu 已提交
1079

1080
	start_pgofs = pgofs;
1081
	prealloc = 0;
1082
	last_ofs_in_node = ofs_in_node = dn.ofs_in_node;
1083
	end_offset = ADDRS_PER_PAGE(dn.node_page, inode);
C
Chao Yu 已提交
1084 1085

next_block:
1086
	blkaddr = datablock_addr(dn.inode, dn.node_page, dn.ofs_in_node);
C
Chao Yu 已提交
1087

1088 1089 1090 1091 1092 1093
	if (__is_valid_data_blkaddr(blkaddr) &&
		!f2fs_is_valid_blkaddr(sbi, blkaddr, DATA_GENERIC)) {
		err = -EFAULT;
		goto sync_out;
	}

1094 1095
	if (is_valid_data_blkaddr(sbi, blkaddr)) {
		/* use out-place-update for driect IO under LFS mode */
C
Chao Yu 已提交
1096 1097
		if (test_opt(sbi, LFS) && flag == F2FS_GET_BLOCK_DIO &&
							map->m_may_create) {
1098
			err = __allocate_data_block(&dn, map->m_seg_type);
1099 1100
			if (!err) {
				blkaddr = dn.data_blkaddr;
1101
				set_inode_flag(inode, FI_APPEND_WRITE);
1102
			}
1103 1104
		}
	} else {
C
Chao Yu 已提交
1105
		if (create) {
1106 1107
			if (unlikely(f2fs_cp_error(sbi))) {
				err = -EIO;
C
Chao Yu 已提交
1108
				goto sync_out;
1109
			}
1110
			if (flag == F2FS_GET_BLOCK_PRE_AIO) {
1111 1112 1113 1114
				if (blkaddr == NULL_ADDR) {
					prealloc++;
					last_ofs_in_node = dn.ofs_in_node;
				}
1115
			} else {
1116 1117
				WARN_ON(flag != F2FS_GET_BLOCK_PRE_DIO &&
					flag != F2FS_GET_BLOCK_DIO);
1118 1119
				err = __allocate_data_block(&dn,
							map->m_seg_type);
1120
				if (!err)
1121
					set_inode_flag(inode, FI_APPEND_WRITE);
1122
			}
C
Chao Yu 已提交
1123
			if (err)
C
Chao Yu 已提交
1124
				goto sync_out;
1125
			map->m_flags |= F2FS_MAP_NEW;
C
Chao Yu 已提交
1126
			blkaddr = dn.data_blkaddr;
C
Chao Yu 已提交
1127
		} else {
C
Chao Yu 已提交
1128 1129 1130 1131
			if (flag == F2FS_GET_BLOCK_BMAP) {
				map->m_pblk = 0;
				goto sync_out;
			}
1132 1133
			if (flag == F2FS_GET_BLOCK_PRECACHE)
				goto sync_out;
1134 1135 1136 1137
			if (flag == F2FS_GET_BLOCK_FIEMAP &&
						blkaddr == NULL_ADDR) {
				if (map->m_next_pgofs)
					*map->m_next_pgofs = pgofs + 1;
C
Chao Yu 已提交
1138
				goto sync_out;
1139
			}
1140 1141 1142 1143
			if (flag != F2FS_GET_BLOCK_FIEMAP) {
				/* for defragment case */
				if (map->m_next_pgofs)
					*map->m_next_pgofs = pgofs + 1;
C
Chao Yu 已提交
1144
				goto sync_out;
1145
			}
C
Chao Yu 已提交
1146 1147
		}
	}
1148

1149 1150 1151
	if (flag == F2FS_GET_BLOCK_PRE_AIO)
		goto skip;

C
Chao Yu 已提交
1152 1153 1154 1155 1156 1157 1158 1159 1160 1161
	if (map->m_len == 0) {
		/* preallocated unwritten block should be mapped for fiemap. */
		if (blkaddr == NEW_ADDR)
			map->m_flags |= F2FS_MAP_UNWRITTEN;
		map->m_flags |= F2FS_MAP_MAPPED;

		map->m_pblk = blkaddr;
		map->m_len = 1;
	} else if ((map->m_pblk != NEW_ADDR &&
			blkaddr == (map->m_pblk + ofs)) ||
1162
			(map->m_pblk == NEW_ADDR && blkaddr == NEW_ADDR) ||
1163
			flag == F2FS_GET_BLOCK_PRE_DIO) {
C
Chao Yu 已提交
1164 1165 1166 1167 1168
		ofs++;
		map->m_len++;
	} else {
		goto sync_out;
	}
1169

1170
skip:
1171 1172 1173
	dn.ofs_in_node++;
	pgofs++;

1174 1175 1176
	/* preallocate blocks in batch for one dnode page */
	if (flag == F2FS_GET_BLOCK_PRE_AIO &&
			(pgofs == end || dn.ofs_in_node == end_offset)) {
1177

1178
		dn.ofs_in_node = ofs_in_node;
C
Chao Yu 已提交
1179
		err = f2fs_reserve_new_blocks(&dn, prealloc);
1180 1181
		if (err)
			goto sync_out;
1182

1183 1184 1185 1186
		map->m_len += dn.ofs_in_node - ofs_in_node;
		if (prealloc && dn.ofs_in_node != last_ofs_in_node + 1) {
			err = -ENOSPC;
			goto sync_out;
1187
		}
1188 1189 1190 1191 1192 1193 1194 1195
		dn.ofs_in_node = end_offset;
	}

	if (pgofs >= end)
		goto sync_out;
	else if (dn.ofs_in_node < end_offset)
		goto next_block;

1196 1197 1198 1199 1200 1201 1202 1203 1204 1205
	if (flag == F2FS_GET_BLOCK_PRECACHE) {
		if (map->m_flags & F2FS_MAP_MAPPED) {
			unsigned int ofs = start_pgofs - map->m_lblk;

			f2fs_update_extent_cache_range(&dn,
				start_pgofs, map->m_pblk + ofs,
				map->m_len - ofs);
		}
	}

1206 1207
	f2fs_put_dnode(&dn);

C
Chao Yu 已提交
1208
	if (map->m_may_create) {
1209
		__do_map_lock(sbi, flag, false);
1210
		f2fs_balance_fs(sbi, dn.node_changed);
1211
	}
1212
	goto next_dnode;
1213

1214
sync_out:
1215 1216 1217 1218 1219 1220

	/* for hardware encryption, but to avoid potential issue in future */
	if (flag == F2FS_GET_BLOCK_DIO && map->m_flags & F2FS_MAP_MAPPED)
		f2fs_wait_on_block_writeback_range(inode,
						map->m_pblk, map->m_len);

1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231
	if (flag == F2FS_GET_BLOCK_PRECACHE) {
		if (map->m_flags & F2FS_MAP_MAPPED) {
			unsigned int ofs = start_pgofs - map->m_lblk;

			f2fs_update_extent_cache_range(&dn,
				start_pgofs, map->m_pblk + ofs,
				map->m_len - ofs);
		}
		if (map->m_next_extent)
			*map->m_next_extent = pgofs + 1;
	}
1232
	f2fs_put_dnode(&dn);
1233
unlock_out:
C
Chao Yu 已提交
1234
	if (map->m_may_create) {
1235
		__do_map_lock(sbi, flag, false);
1236
		f2fs_balance_fs(sbi, dn.node_changed);
1237
	}
1238
out:
J
Jaegeuk Kim 已提交
1239
	trace_f2fs_map_blocks(inode, map, err);
1240
	return err;
1241 1242
}

H
Hyunchul Lee 已提交
1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255
bool f2fs_overwrite_io(struct inode *inode, loff_t pos, size_t len)
{
	struct f2fs_map_blocks map;
	block_t last_lblk;
	int err;

	if (pos + len > i_size_read(inode))
		return false;

	map.m_lblk = F2FS_BYTES_TO_BLK(pos);
	map.m_next_pgofs = NULL;
	map.m_next_extent = NULL;
	map.m_seg_type = NO_CHECK_TYPE;
1256
	map.m_may_create = false;
H
Hyunchul Lee 已提交
1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268
	last_lblk = F2FS_BLK_ALIGN(pos + len);

	while (map.m_lblk < last_lblk) {
		map.m_len = last_lblk - map.m_lblk;
		err = f2fs_map_blocks(inode, &map, 0, F2FS_GET_BLOCK_DEFAULT);
		if (err || map.m_len == 0)
			return false;
		map.m_lblk += map.m_len;
	}
	return true;
}

J
Jaegeuk Kim 已提交
1269
static int __get_data_block(struct inode *inode, sector_t iblock,
1270
			struct buffer_head *bh, int create, int flag,
C
Chao Yu 已提交
1271
			pgoff_t *next_pgofs, int seg_type, bool may_write)
J
Jaegeuk Kim 已提交
1272 1273
{
	struct f2fs_map_blocks map;
1274
	int err;
J
Jaegeuk Kim 已提交
1275 1276 1277

	map.m_lblk = iblock;
	map.m_len = bh->b_size >> inode->i_blkbits;
1278
	map.m_next_pgofs = next_pgofs;
1279
	map.m_next_extent = NULL;
1280
	map.m_seg_type = seg_type;
C
Chao Yu 已提交
1281
	map.m_may_create = may_write;
J
Jaegeuk Kim 已提交
1282

1283 1284
	err = f2fs_map_blocks(inode, &map, create, flag);
	if (!err) {
J
Jaegeuk Kim 已提交
1285 1286
		map_bh(bh, inode->i_sb, map.m_pblk);
		bh->b_state = (bh->b_state & ~F2FS_MAP_FLAGS) | map.m_flags;
1287
		bh->b_size = (u64)map.m_len << inode->i_blkbits;
J
Jaegeuk Kim 已提交
1288
	}
1289
	return err;
J
Jaegeuk Kim 已提交
1290 1291
}

1292
static int get_data_block(struct inode *inode, sector_t iblock,
1293 1294
			struct buffer_head *bh_result, int create, int flag,
			pgoff_t *next_pgofs)
C
Chao Yu 已提交
1295
{
1296
	return __get_data_block(inode, iblock, bh_result, create,
1297
							flag, next_pgofs,
C
Chao Yu 已提交
1298 1299 1300 1301 1302 1303 1304 1305 1306 1307
							NO_CHECK_TYPE, create);
}

static int get_data_block_dio_write(struct inode *inode, sector_t iblock,
			struct buffer_head *bh_result, int create)
{
	return __get_data_block(inode, iblock, bh_result, create,
				F2FS_GET_BLOCK_DIO, NULL,
				f2fs_rw_hint_to_seg_type(inode->i_write_hint),
				true);
C
Chao Yu 已提交
1308 1309 1310
}

static int get_data_block_dio(struct inode *inode, sector_t iblock,
1311 1312
			struct buffer_head *bh_result, int create)
{
C
Chao Yu 已提交
1313
	return __get_data_block(inode, iblock, bh_result, create,
C
Chao Yu 已提交
1314 1315 1316
				F2FS_GET_BLOCK_DIO, NULL,
				f2fs_rw_hint_to_seg_type(inode->i_write_hint),
				false);
1317 1318
}

C
Chao Yu 已提交
1319
static int get_data_block_bmap(struct inode *inode, sector_t iblock,
1320 1321
			struct buffer_head *bh_result, int create)
{
1322
	/* Block number less than F2FS MAX BLOCKS */
C
Chao Yu 已提交
1323
	if (unlikely(iblock >= F2FS_I_SB(inode)->max_file_blocks))
1324 1325
		return -EFBIG;

C
Chao Yu 已提交
1326
	return __get_data_block(inode, iblock, bh_result, create,
1327
						F2FS_GET_BLOCK_BMAP, NULL,
C
Chao Yu 已提交
1328
						NO_CHECK_TYPE, create);
1329 1330
}

1331 1332 1333 1334 1335 1336 1337 1338 1339 1340
static inline sector_t logical_to_blk(struct inode *inode, loff_t offset)
{
	return (offset >> inode->i_blkbits);
}

static inline loff_t blk_to_logical(struct inode *inode, sector_t blk)
{
	return (blk << inode->i_blkbits);
}

C
Chao Yu 已提交
1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359
static int f2fs_xattr_fiemap(struct inode *inode,
				struct fiemap_extent_info *fieinfo)
{
	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
	struct page *page;
	struct node_info ni;
	__u64 phys = 0, len;
	__u32 flags;
	nid_t xnid = F2FS_I(inode)->i_xattr_nid;
	int err = 0;

	if (f2fs_has_inline_xattr(inode)) {
		int offset;

		page = f2fs_grab_cache_page(NODE_MAPPING(sbi),
						inode->i_ino, false);
		if (!page)
			return -ENOMEM;

1360 1361 1362 1363 1364
		err = f2fs_get_node_info(sbi, inode->i_ino, &ni);
		if (err) {
			f2fs_put_page(page, 1);
			return err;
		}
C
Chao Yu 已提交
1365 1366 1367 1368

		phys = (__u64)blk_to_logical(inode, ni.blk_addr);
		offset = offsetof(struct f2fs_inode, i_addr) +
					sizeof(__le32) * (DEF_ADDRS_PER_INODE -
1369
					get_inline_xattr_addrs(inode));
C
Chao Yu 已提交
1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390

		phys += offset;
		len = inline_xattr_size(inode);

		f2fs_put_page(page, 1);

		flags = FIEMAP_EXTENT_DATA_INLINE | FIEMAP_EXTENT_NOT_ALIGNED;

		if (!xnid)
			flags |= FIEMAP_EXTENT_LAST;

		err = fiemap_fill_next_extent(fieinfo, 0, phys, len, flags);
		if (err || err == 1)
			return err;
	}

	if (xnid) {
		page = f2fs_grab_cache_page(NODE_MAPPING(sbi), xnid, false);
		if (!page)
			return -ENOMEM;

1391 1392 1393 1394 1395
		err = f2fs_get_node_info(sbi, xnid, &ni);
		if (err) {
			f2fs_put_page(page, 1);
			return err;
		}
C
Chao Yu 已提交
1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410

		phys = (__u64)blk_to_logical(inode, ni.blk_addr);
		len = inode->i_sb->s_blocksize;

		f2fs_put_page(page, 1);

		flags = FIEMAP_EXTENT_LAST;
	}

	if (phys)
		err = fiemap_fill_next_extent(fieinfo, 0, phys, len, flags);

	return (err < 0 ? err : 0);
}

J
Jaegeuk Kim 已提交
1411 1412 1413
int f2fs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
		u64 start, u64 len)
{
1414 1415
	struct buffer_head map_bh;
	sector_t start_blk, last_blk;
1416
	pgoff_t next_pgofs;
1417 1418 1419 1420
	u64 logical = 0, phys = 0, size = 0;
	u32 flags = 0;
	int ret = 0;

1421 1422 1423 1424 1425 1426
	if (fieinfo->fi_flags & FIEMAP_FLAG_CACHE) {
		ret = f2fs_precache_extents(inode);
		if (ret)
			return ret;
	}

C
Chao Yu 已提交
1427
	ret = fiemap_check_flags(fieinfo, FIEMAP_FLAG_SYNC | FIEMAP_FLAG_XATTR);
1428 1429 1430
	if (ret)
		return ret;

1431 1432
	inode_lock(inode);

C
Chao Yu 已提交
1433 1434 1435 1436 1437
	if (fieinfo->fi_flags & FIEMAP_FLAG_XATTR) {
		ret = f2fs_xattr_fiemap(inode, fieinfo);
		goto out;
	}

J
Jaegeuk Kim 已提交
1438 1439 1440
	if (f2fs_has_inline_data(inode)) {
		ret = f2fs_inline_data_fiemap(inode, fieinfo, start, len);
		if (ret != -EAGAIN)
1441
			goto out;
J
Jaegeuk Kim 已提交
1442 1443
	}

1444 1445 1446 1447 1448
	if (logical_to_blk(inode, len) == 0)
		len = blk_to_logical(inode, 1);

	start_blk = logical_to_blk(inode, start);
	last_blk = logical_to_blk(inode, start + len - 1);
1449

1450 1451 1452 1453
next:
	memset(&map_bh, 0, sizeof(struct buffer_head));
	map_bh.b_size = len;

C
Chao Yu 已提交
1454
	ret = get_data_block(inode, start_blk, &map_bh, 0,
1455
					F2FS_GET_BLOCK_FIEMAP, &next_pgofs);
1456 1457 1458 1459 1460
	if (ret)
		goto out;

	/* HOLE */
	if (!buffer_mapped(&map_bh)) {
1461
		start_blk = next_pgofs;
1462 1463 1464

		if (blk_to_logical(inode, start_blk) < blk_to_logical(inode,
					F2FS_I_SB(inode)->max_file_blocks))
1465
			goto prep_next;
1466

1467 1468
		flags |= FIEMAP_EXTENT_LAST;
	}
1469

1470 1471 1472 1473
	if (size) {
		if (f2fs_encrypted_inode(inode))
			flags |= FIEMAP_EXTENT_DATA_ENCRYPTED;

1474 1475
		ret = fiemap_fill_next_extent(fieinfo, logical,
				phys, size, flags);
1476
	}
1477

1478 1479
	if (start_blk > last_blk || ret)
		goto out;
1480

1481 1482 1483 1484 1485 1486
	logical = blk_to_logical(inode, start_blk);
	phys = blk_to_logical(inode, map_bh.b_blocknr);
	size = map_bh.b_size;
	flags = 0;
	if (buffer_unwritten(&map_bh))
		flags = FIEMAP_EXTENT_UNWRITTEN;
1487

1488
	start_blk += logical_to_blk(inode, size);
1489

1490
prep_next:
1491 1492 1493 1494 1495 1496 1497 1498 1499
	cond_resched();
	if (fatal_signal_pending(current))
		ret = -EINTR;
	else
		goto next;
out:
	if (ret == 1)
		ret = 0;

A
Al Viro 已提交
1500
	inode_unlock(inode);
1501
	return ret;
J
Jaegeuk Kim 已提交
1502 1503
}

J
Jaegeuk Kim 已提交
1504 1505 1506
/*
 * This function was originally taken from fs/mpage.c, and customized for f2fs.
 * Major change was from block_size == page_size in f2fs by default.
1507 1508 1509 1510 1511
 *
 * Note that the aops->readpages() function is ONLY used for read-ahead. If
 * this function ever deviates from doing just read-ahead, it should either
 * use ->readpage() or do the necessary surgery to decouple ->readpages()
 * from read-ahead.
J
Jaegeuk Kim 已提交
1512 1513 1514
 */
static int f2fs_mpage_readpages(struct address_space *mapping,
			struct list_head *pages, struct page *page,
1515
			unsigned nr_pages, bool is_readahead)
J
Jaegeuk Kim 已提交
1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531
{
	struct bio *bio = NULL;
	sector_t last_block_in_bio = 0;
	struct inode *inode = mapping->host;
	const unsigned blkbits = inode->i_blkbits;
	const unsigned blocksize = 1 << blkbits;
	sector_t block_in_file;
	sector_t last_block;
	sector_t last_block_in_file;
	sector_t block_nr;
	struct f2fs_map_blocks map;

	map.m_pblk = 0;
	map.m_lblk = 0;
	map.m_len = 0;
	map.m_flags = 0;
1532
	map.m_next_pgofs = NULL;
1533
	map.m_next_extent = NULL;
1534
	map.m_seg_type = NO_CHECK_TYPE;
C
Chao Yu 已提交
1535
	map.m_may_create = false;
J
Jaegeuk Kim 已提交
1536

L
LiFan 已提交
1537
	for (; nr_pages; nr_pages--) {
J
Jaegeuk Kim 已提交
1538
		if (pages) {
1539
			page = list_last_entry(pages, struct page, lru);
1540 1541

			prefetchw(&page->flags);
J
Jaegeuk Kim 已提交
1542 1543
			list_del(&page->lru);
			if (add_to_page_cache_lru(page, mapping,
1544 1545
						  page->index,
						  readahead_gfp_mask(mapping)))
J
Jaegeuk Kim 已提交
1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573
				goto next_page;
		}

		block_in_file = (sector_t)page->index;
		last_block = block_in_file + nr_pages;
		last_block_in_file = (i_size_read(inode) + blocksize - 1) >>
								blkbits;
		if (last_block > last_block_in_file)
			last_block = last_block_in_file;

		/*
		 * Map blocks using the previous result first.
		 */
		if ((map.m_flags & F2FS_MAP_MAPPED) &&
				block_in_file > map.m_lblk &&
				block_in_file < (map.m_lblk + map.m_len))
			goto got_it;

		/*
		 * Then do more f2fs_map_blocks() calls until we are
		 * done with this page.
		 */
		map.m_flags = 0;

		if (block_in_file < last_block) {
			map.m_lblk = block_in_file;
			map.m_len = last_block - block_in_file;

1574
			if (f2fs_map_blocks(inode, &map, 0,
1575
						F2FS_GET_BLOCK_DEFAULT))
J
Jaegeuk Kim 已提交
1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586
				goto set_error_page;
		}
got_it:
		if ((map.m_flags & F2FS_MAP_MAPPED)) {
			block_nr = map.m_pblk + block_in_file - map.m_lblk;
			SetPageMappedToDisk(page);

			if (!PageUptodate(page) && !cleancache_get_page(page)) {
				SetPageUptodate(page);
				goto confused;
			}
1587 1588 1589 1590

			if (!f2fs_is_valid_blkaddr(F2FS_I_SB(inode), block_nr,
								DATA_GENERIC))
				goto set_error_page;
J
Jaegeuk Kim 已提交
1591
		} else {
1592
			zero_user_segment(page, 0, PAGE_SIZE);
1593 1594
			if (!PageUptodate(page))
				SetPageUptodate(page);
J
Jaegeuk Kim 已提交
1595 1596 1597 1598 1599 1600 1601 1602
			unlock_page(page);
			goto next_page;
		}

		/*
		 * This page will go to BIO.  Do we need to send this
		 * BIO off first?
		 */
J
Jaegeuk Kim 已提交
1603 1604
		if (bio && (last_block_in_bio != block_nr - 1 ||
			!__same_bdev(F2FS_I_SB(inode), block_nr, bio))) {
J
Jaegeuk Kim 已提交
1605
submit_and_realloc:
1606
			__submit_bio(F2FS_I_SB(inode), bio, DATA);
J
Jaegeuk Kim 已提交
1607 1608 1609
			bio = NULL;
		}
		if (bio == NULL) {
1610 1611
			bio = f2fs_grab_read_bio(inode, block_nr, nr_pages,
					is_readahead ? REQ_RAHEAD : 0);
J
Jaegeuk Kim 已提交
1612 1613
			if (IS_ERR(bio)) {
				bio = NULL;
J
Jaegeuk Kim 已提交
1614
				goto set_error_page;
1615
			}
J
Jaegeuk Kim 已提交
1616 1617
		}

1618 1619 1620 1621 1622 1623
		/*
		 * If the page is under writeback, we need to wait for
		 * its completion to see the correct decrypted data.
		 */
		f2fs_wait_on_block_writeback(inode, block_nr);

J
Jaegeuk Kim 已提交
1624 1625 1626
		if (bio_add_page(bio, page, blocksize, 0) < blocksize)
			goto submit_and_realloc;

1627
		inc_page_count(F2FS_I_SB(inode), F2FS_RD_DATA);
1628
		ClearPageError(page);
J
Jaegeuk Kim 已提交
1629 1630 1631 1632
		last_block_in_bio = block_nr;
		goto next_page;
set_error_page:
		SetPageError(page);
1633
		zero_user_segment(page, 0, PAGE_SIZE);
J
Jaegeuk Kim 已提交
1634 1635 1636 1637
		unlock_page(page);
		goto next_page;
confused:
		if (bio) {
1638
			__submit_bio(F2FS_I_SB(inode), bio, DATA);
J
Jaegeuk Kim 已提交
1639 1640 1641 1642 1643
			bio = NULL;
		}
		unlock_page(page);
next_page:
		if (pages)
1644
			put_page(page);
J
Jaegeuk Kim 已提交
1645 1646 1647
	}
	BUG_ON(pages && !list_empty(pages));
	if (bio)
1648
		__submit_bio(F2FS_I_SB(inode), bio, DATA);
J
Jaegeuk Kim 已提交
1649 1650 1651
	return 0;
}

1652 1653
static int f2fs_read_data_page(struct file *file, struct page *page)
{
H
Huajun Li 已提交
1654
	struct inode *inode = page->mapping->host;
1655
	int ret = -EAGAIN;
H
Huajun Li 已提交
1656

1657 1658
	trace_f2fs_readpage(page, DATA);

A
arter97 已提交
1659
	/* If the file has inline data, try to read it directly */
H
Huajun Li 已提交
1660 1661
	if (f2fs_has_inline_data(inode))
		ret = f2fs_read_inline_data(inode, page);
1662
	if (ret == -EAGAIN)
1663
		ret = f2fs_mpage_readpages(page->mapping, NULL, page, 1, false);
H
Huajun Li 已提交
1664
	return ret;
1665 1666 1667 1668 1669 1670
}

static int f2fs_read_data_pages(struct file *file,
			struct address_space *mapping,
			struct list_head *pages, unsigned nr_pages)
{
1671
	struct inode *inode = mapping->host;
1672
	struct page *page = list_last_entry(pages, struct page, lru);
1673 1674

	trace_f2fs_readpages(inode, page, nr_pages);
H
Huajun Li 已提交
1675 1676 1677 1678 1679

	/* If the file has inline data, skip readpages */
	if (f2fs_has_inline_data(inode))
		return 0;

1680
	return f2fs_mpage_readpages(mapping, pages, NULL, nr_pages, true);
1681 1682
}

1683 1684 1685
static int encrypt_one_page(struct f2fs_io_info *fio)
{
	struct inode *inode = fio->page->mapping->host;
1686
	struct page *mpage;
1687 1688
	gfp_t gfp_flags = GFP_NOFS;

1689
	if (!f2fs_encrypted_file(inode))
1690 1691
		return 0;

1692
	/* wait for GCed page writeback via META_MAPPING */
1693
	f2fs_wait_on_block_writeback(inode, fio->old_blkaddr);
1694 1695 1696 1697

retry_encrypt:
	fio->encrypted_page = fscrypt_encrypt_page(inode, fio->page,
			PAGE_SIZE, 0, fio->page->index, gfp_flags);
1698 1699 1700 1701 1702 1703 1704 1705 1706 1707
	if (IS_ERR(fio->encrypted_page)) {
		/* flush pending IOs and wait for a while in the ENOMEM case */
		if (PTR_ERR(fio->encrypted_page) == -ENOMEM) {
			f2fs_flush_merged_writes(fio->sbi);
			congestion_wait(BLK_RW_ASYNC, HZ/50);
			gfp_flags |= __GFP_NOFAIL;
			goto retry_encrypt;
		}
		return PTR_ERR(fio->encrypted_page);
	}
1708

1709 1710 1711 1712 1713 1714
	mpage = find_lock_page(META_MAPPING(fio->sbi), fio->old_blkaddr);
	if (mpage) {
		if (PageUptodate(mpage))
			memcpy(page_address(mpage),
				page_address(fio->encrypted_page), PAGE_SIZE);
		f2fs_put_page(mpage, 1);
1715
	}
1716
	return 0;
1717 1718
}

C
Chao Yu 已提交
1719 1720
static inline bool check_inplace_update_policy(struct inode *inode,
				struct f2fs_io_info *fio)
1721
{
C
Chao Yu 已提交
1722 1723
	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
	unsigned int policy = SM_I(sbi)->ipu_policy;
1724

C
Chao Yu 已提交
1725 1726
	if (policy & (0x1 << F2FS_IPU_FORCE))
		return true;
C
Chao Yu 已提交
1727
	if (policy & (0x1 << F2FS_IPU_SSR) && f2fs_need_SSR(sbi))
C
Chao Yu 已提交
1728 1729 1730 1731
		return true;
	if (policy & (0x1 << F2FS_IPU_UTIL) &&
			utilization(sbi) > SM_I(sbi)->min_ipu_util)
		return true;
C
Chao Yu 已提交
1732
	if (policy & (0x1 << F2FS_IPU_SSR_UTIL) && f2fs_need_SSR(sbi) &&
C
Chao Yu 已提交
1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749
			utilization(sbi) > SM_I(sbi)->min_ipu_util)
		return true;

	/*
	 * IPU for rewrite async pages
	 */
	if (policy & (0x1 << F2FS_IPU_ASYNC) &&
			fio && fio->op == REQ_OP_WRITE &&
			!(fio->op_flags & REQ_SYNC) &&
			!f2fs_encrypted_inode(inode))
		return true;

	/* this is only set during fdatasync */
	if (policy & (0x1 << F2FS_IPU_FSYNC) &&
			is_inode_flag_set(inode, FI_NEED_IPU))
		return true;

D
Daniel Rosenberg 已提交
1750 1751 1752 1753
	if (unlikely(fio && is_sbi_flag_set(sbi, SBI_CP_DISABLED) &&
			!f2fs_is_checkpointed_data(sbi, fio->old_blkaddr)))
		return true;

C
Chao Yu 已提交
1754 1755 1756
	return false;
}

C
Chao Yu 已提交
1757
bool f2fs_should_update_inplace(struct inode *inode, struct f2fs_io_info *fio)
C
Chao Yu 已提交
1758
{
1759 1760
	if (f2fs_is_pinned_file(inode))
		return true;
C
Chao Yu 已提交
1761 1762 1763 1764 1765 1766 1767 1768

	/* if this is cold file, we should overwrite to avoid fragmentation */
	if (file_is_cold(inode))
		return true;

	return check_inplace_update_policy(inode, fio);
}

C
Chao Yu 已提交
1769
bool f2fs_should_update_outplace(struct inode *inode, struct f2fs_io_info *fio)
C
Chao Yu 已提交
1770 1771 1772 1773 1774 1775 1776
{
	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);

	if (test_opt(sbi, LFS))
		return true;
	if (S_ISDIR(inode->i_mode))
		return true;
1777 1778
	if (IS_NOQUOTA(inode))
		return true;
C
Chao Yu 已提交
1779 1780 1781 1782 1783 1784 1785
	if (f2fs_is_atomic_file(inode))
		return true;
	if (fio) {
		if (is_cold_data(fio->page))
			return true;
		if (IS_ATOMIC_WRITTEN_PAGE(fio->page))
			return true;
D
Daniel Rosenberg 已提交
1786 1787 1788
		if (unlikely(is_sbi_flag_set(sbi, SBI_CP_DISABLED) &&
			f2fs_is_checkpointed_data(sbi, fio->old_blkaddr)))
			return true;
C
Chao Yu 已提交
1789 1790 1791 1792
	}
	return false;
}

1793 1794 1795 1796
static inline bool need_inplace_update(struct f2fs_io_info *fio)
{
	struct inode *inode = fio->page->mapping->host;

C
Chao Yu 已提交
1797
	if (f2fs_should_update_outplace(inode, fio))
1798 1799
		return false;

C
Chao Yu 已提交
1800
	return f2fs_should_update_inplace(inode, fio);
1801 1802
}

C
Chao Yu 已提交
1803
int f2fs_do_write_data_page(struct f2fs_io_info *fio)
1804
{
1805
	struct page *page = fio->page;
1806 1807
	struct inode *inode = page->mapping->host;
	struct dnode_of_data dn;
1808
	struct extent_info ei = {0,0,0};
1809
	struct node_info ni;
1810
	bool ipu_force = false;
1811 1812 1813
	int err = 0;

	set_new_dnode(&dn, inode, NULL, NULL, 0);
1814 1815 1816
	if (need_inplace_update(fio) &&
			f2fs_lookup_extent_cache(inode, page->index, &ei)) {
		fio->old_blkaddr = ei.blk + page->index - ei.fofs;
1817

1818 1819 1820 1821 1822 1823 1824
		if (!f2fs_is_valid_blkaddr(fio->sbi, fio->old_blkaddr,
							DATA_GENERIC))
			return -EFAULT;

		ipu_force = true;
		fio->need_lock = LOCK_DONE;
		goto got_it;
1825
	}
1826

1827 1828 1829
	/* Deadlock due to between page->lock and f2fs_lock_op */
	if (fio->need_lock == LOCK_REQ && !f2fs_trylock_op(fio->sbi))
		return -EAGAIN;
1830

C
Chao Yu 已提交
1831
	err = f2fs_get_dnode_of_data(&dn, page->index, LOOKUP_NODE);
1832
	if (err)
1833
		goto out;
1834

1835
	fio->old_blkaddr = dn.data_blkaddr;
1836 1837

	/* This page is already truncated */
1838
	if (fio->old_blkaddr == NULL_ADDR) {
1839
		ClearPageUptodate(page);
C
Chao Yu 已提交
1840
		clear_cold_data(page);
1841
		goto out_writepage;
1842
	}
1843
got_it:
1844 1845 1846 1847 1848 1849
	if (__is_valid_data_blkaddr(fio->old_blkaddr) &&
		!f2fs_is_valid_blkaddr(fio->sbi, fio->old_blkaddr,
							DATA_GENERIC)) {
		err = -EFAULT;
		goto out_writepage;
	}
1850 1851 1852 1853
	/*
	 * If current allocation needs SSR,
	 * it had better in-place writes for updated data.
	 */
1854
	if (ipu_force || (is_valid_data_blkaddr(fio->sbi, fio->old_blkaddr) &&
C
Chao Yu 已提交
1855
					need_inplace_update(fio))) {
1856 1857 1858 1859 1860
		err = encrypt_one_page(fio);
		if (err)
			goto out_writepage;

		set_page_writeback(page);
J
Jaegeuk Kim 已提交
1861
		ClearPageError(page);
1862
		f2fs_put_dnode(&dn);
1863
		if (fio->need_lock == LOCK_REQ)
1864
			f2fs_unlock_op(fio->sbi);
C
Chao Yu 已提交
1865
		err = f2fs_inplace_write_data(fio);
1866 1867
		if (err && PageWriteback(page))
			end_page_writeback(page);
1868
		trace_f2fs_do_write_data_page(fio->page, IPU);
1869
		set_inode_flag(inode, FI_UPDATE_WRITE);
1870
		return err;
1871
	}
1872

1873 1874 1875 1876 1877 1878 1879 1880
	if (fio->need_lock == LOCK_RETRY) {
		if (!f2fs_trylock_op(fio->sbi)) {
			err = -EAGAIN;
			goto out_writepage;
		}
		fio->need_lock = LOCK_REQ;
	}

1881 1882 1883 1884 1885 1886
	err = f2fs_get_node_info(fio->sbi, dn.nid, &ni);
	if (err)
		goto out_writepage;

	fio->version = ni.version;

1887 1888 1889 1890 1891
	err = encrypt_one_page(fio);
	if (err)
		goto out_writepage;

	set_page_writeback(page);
J
Jaegeuk Kim 已提交
1892
	ClearPageError(page);
1893

1894
	/* LFS mode write path */
C
Chao Yu 已提交
1895
	f2fs_outplace_write_data(&dn, fio);
1896 1897 1898 1899
	trace_f2fs_do_write_data_page(page, OPU);
	set_inode_flag(inode, FI_APPEND_WRITE);
	if (page->index == 0)
		set_inode_flag(inode, FI_FIRST_BLOCK_WRITTEN);
1900 1901
out_writepage:
	f2fs_put_dnode(&dn);
1902
out:
1903
	if (fio->need_lock == LOCK_REQ)
1904
		f2fs_unlock_op(fio->sbi);
1905 1906 1907
	return err;
}

1908
static int __write_data_page(struct page *page, bool *submitted,
C
Chao Yu 已提交
1909 1910
				struct writeback_control *wbc,
				enum iostat_type io_type)
1911 1912
{
	struct inode *inode = page->mapping->host;
1913
	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
1914 1915
	loff_t i_size = i_size_read(inode);
	const pgoff_t end_index = ((unsigned long long) i_size)
1916
							>> PAGE_SHIFT;
1917
	loff_t psize = (page->index + 1) << PAGE_SHIFT;
H
Huajun Li 已提交
1918
	unsigned offset = 0;
1919
	bool need_balance_fs = false;
1920
	int err = 0;
J
Jaegeuk Kim 已提交
1921
	struct f2fs_io_info fio = {
1922
		.sbi = sbi,
C
Chao Yu 已提交
1923
		.ino = inode->i_ino,
J
Jaegeuk Kim 已提交
1924
		.type = DATA,
M
Mike Christie 已提交
1925
		.op = REQ_OP_WRITE,
J
Jens Axboe 已提交
1926
		.op_flags = wbc_to_write_flags(wbc),
1927
		.old_blkaddr = NULL_ADDR,
1928
		.page = page,
1929
		.encrypted_page = NULL,
1930
		.submitted = false,
1931
		.need_lock = LOCK_RETRY,
C
Chao Yu 已提交
1932
		.io_type = io_type,
1933
		.io_wbc = wbc,
J
Jaegeuk Kim 已提交
1934
	};
1935

1936 1937
	trace_f2fs_writepage(page, DATA);

1938 1939 1940
	/* we should bypass data pages to proceed the kworkder jobs */
	if (unlikely(f2fs_cp_error(sbi))) {
		mapping_set_error(page->mapping, -EIO);
1941 1942 1943 1944 1945 1946
		/*
		 * don't drop any dirty dentry pages for keeping lastest
		 * directory structure.
		 */
		if (S_ISDIR(inode->i_mode))
			goto redirty_out;
1947 1948 1949
		goto out;
	}

1950 1951 1952
	if (unlikely(is_sbi_flag_set(sbi, SBI_POR_DOING)))
		goto redirty_out;

1953
	if (page->index < end_index)
1954
		goto write;
1955 1956 1957 1958 1959

	/*
	 * If the offset is out-of-range of file size,
	 * this page does not have to be written to disk.
	 */
1960
	offset = i_size & (PAGE_SIZE - 1);
1961
	if ((page->index >= end_index + 1) || !offset)
1962
		goto out;
1963

1964
	zero_user_segment(page, offset, PAGE_SIZE);
1965
write:
1966 1967
	if (f2fs_is_drop_cache(inode))
		goto out;
1968 1969 1970
	/* we should not write 0'th page having journal header */
	if (f2fs_is_volatile_file(inode) && (!page->index ||
			(!wbc->for_reclaim &&
C
Chao Yu 已提交
1971
			f2fs_available_free_memory(sbi, BASE_CHECK))))
1972
		goto redirty_out;
1973

1974
	/* Dentry blocks are controlled by checkpoint */
1975
	if (S_ISDIR(inode->i_mode)) {
1976
		fio.need_lock = LOCK_DONE;
C
Chao Yu 已提交
1977
		err = f2fs_do_write_data_page(&fio);
1978 1979
		goto done;
	}
H
Huajun Li 已提交
1980

1981
	if (!wbc->for_reclaim)
1982
		need_balance_fs = true;
1983
	else if (has_not_enough_free_secs(sbi, 0, 0))
1984
		goto redirty_out;
1985 1986
	else
		set_inode_flag(inode, FI_HOT_DATA);
1987

1988
	err = -EAGAIN;
1989
	if (f2fs_has_inline_data(inode)) {
1990
		err = f2fs_write_inline_data(inode, page);
1991 1992 1993
		if (!err)
			goto out;
	}
1994

1995
	if (err == -EAGAIN) {
C
Chao Yu 已提交
1996
		err = f2fs_do_write_data_page(&fio);
1997 1998
		if (err == -EAGAIN) {
			fio.need_lock = LOCK_REQ;
C
Chao Yu 已提交
1999
			err = f2fs_do_write_data_page(&fio);
2000 2001
		}
	}
2002

2003 2004 2005 2006 2007 2008 2009 2010
	if (err) {
		file_set_keep_isize(inode);
	} else {
		down_write(&F2FS_I(inode)->i_sem);
		if (F2FS_I(inode)->last_disk_size < psize)
			F2FS_I(inode)->last_disk_size = psize;
		up_write(&F2FS_I(inode)->i_sem);
	}
2011

2012 2013 2014
done:
	if (err && err != -ENOENT)
		goto redirty_out;
2015

2016
out:
2017
	inode_dec_dirty_pages(inode);
C
Chao Yu 已提交
2018
	if (err) {
2019
		ClearPageUptodate(page);
C
Chao Yu 已提交
2020 2021
		clear_cold_data(page);
	}
2022 2023

	if (wbc->for_reclaim) {
2024
		f2fs_submit_merged_write_cond(sbi, NULL, page, 0, DATA);
2025
		clear_inode_flag(inode, FI_HOT_DATA);
C
Chao Yu 已提交
2026
		f2fs_remove_dirty_inode(inode);
2027
		submitted = NULL;
2028 2029
	}

2030
	unlock_page(page);
2031
	if (!S_ISDIR(inode->i_mode) && !IS_NOQUOTA(inode))
J
Jaegeuk Kim 已提交
2032
		f2fs_balance_fs(sbi, need_balance_fs);
2033

2034
	if (unlikely(f2fs_cp_error(sbi))) {
2035
		f2fs_submit_merged_write(sbi, DATA);
2036 2037 2038 2039 2040
		submitted = NULL;
	}

	if (submitted)
		*submitted = fio.submitted;
2041

2042 2043 2044
	return 0;

redirty_out:
2045
	redirty_page_for_writepage(wbc, page);
2046 2047 2048 2049 2050 2051 2052
	/*
	 * pageout() in MM traslates EAGAIN, so calls handle_write_error()
	 * -> mapping_set_error() -> set_bit(AS_EIO, ...).
	 * file_write_and_wait_range() will see EIO error, which is critical
	 * to return value of fsync() followed by atomic_write failure to user.
	 */
	if (!err || wbc->for_reclaim)
2053
		return AOP_WRITEPAGE_ACTIVATE;
J
Jaegeuk Kim 已提交
2054 2055
	unlock_page(page);
	return err;
2056 2057
}

2058 2059 2060
static int f2fs_write_data_page(struct page *page,
					struct writeback_control *wbc)
{
C
Chao Yu 已提交
2061
	return __write_data_page(page, NULL, wbc, FS_DATA_IO);
2062 2063
}

C
Chao Yu 已提交
2064 2065 2066 2067 2068 2069
/*
 * This function was copied from write_cche_pages from mm/page-writeback.c.
 * The major change is making write step of cold data page separately from
 * warm/hot data page.
 */
static int f2fs_write_cache_pages(struct address_space *mapping,
C
Chao Yu 已提交
2070 2071
					struct writeback_control *wbc,
					enum iostat_type io_type)
C
Chao Yu 已提交
2072 2073 2074 2075
{
	int ret = 0;
	int done = 0;
	struct pagevec pvec;
2076
	struct f2fs_sb_info *sbi = F2FS_M_SB(mapping);
C
Chao Yu 已提交
2077 2078 2079 2080 2081 2082 2083
	int nr_pages;
	pgoff_t uninitialized_var(writeback_index);
	pgoff_t index;
	pgoff_t end;		/* Inclusive */
	pgoff_t done_index;
	int cycled;
	int range_whole = 0;
M
Matthew Wilcox 已提交
2084
	xa_mark_t tag;
2085
	int nwritten = 0;
C
Chao Yu 已提交
2086

2087
	pagevec_init(&pvec);
2088

2089 2090 2091 2092 2093 2094
	if (get_dirty_pages(mapping->host) <=
				SM_I(F2FS_M_SB(mapping))->min_hot_blocks)
		set_inode_flag(mapping->host, FI_HOT_DATA);
	else
		clear_inode_flag(mapping->host, FI_HOT_DATA);

C
Chao Yu 已提交
2095 2096 2097 2098 2099 2100 2101 2102 2103
	if (wbc->range_cyclic) {
		writeback_index = mapping->writeback_index; /* prev offset */
		index = writeback_index;
		if (index == 0)
			cycled = 1;
		else
			cycled = 0;
		end = -1;
	} else {
2104 2105
		index = wbc->range_start >> PAGE_SHIFT;
		end = wbc->range_end >> PAGE_SHIFT;
C
Chao Yu 已提交
2106 2107 2108 2109 2110 2111 2112 2113 2114 2115 2116 2117 2118 2119 2120
		if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX)
			range_whole = 1;
		cycled = 1; /* ignore range_cyclic tests */
	}
	if (wbc->sync_mode == WB_SYNC_ALL || wbc->tagged_writepages)
		tag = PAGECACHE_TAG_TOWRITE;
	else
		tag = PAGECACHE_TAG_DIRTY;
retry:
	if (wbc->sync_mode == WB_SYNC_ALL || wbc->tagged_writepages)
		tag_pages_for_writeback(mapping, index, end);
	done_index = index;
	while (!done && (index <= end)) {
		int i;

J
Jan Kara 已提交
2121
		nr_pages = pagevec_lookup_range_tag(&pvec, mapping, &index, end,
2122
				tag);
C
Chao Yu 已提交
2123 2124 2125 2126 2127
		if (nr_pages == 0)
			break;

		for (i = 0; i < nr_pages; i++) {
			struct page *page = pvec.pages[i];
2128
			bool submitted = false;
C
Chao Yu 已提交
2129

2130
			/* give a priority to WB_SYNC threads */
2131
			if (atomic_read(&sbi->wb_sync_req[DATA]) &&
2132 2133 2134 2135 2136
					wbc->sync_mode == WB_SYNC_NONE) {
				done = 1;
				break;
			}

C
Chao Yu 已提交
2137
			done_index = page->index;
2138
retry_write:
C
Chao Yu 已提交
2139 2140 2141 2142 2143 2144 2145 2146 2147 2148 2149 2150 2151 2152 2153
			lock_page(page);

			if (unlikely(page->mapping != mapping)) {
continue_unlock:
				unlock_page(page);
				continue;
			}

			if (!PageDirty(page)) {
				/* someone wrote it for us */
				goto continue_unlock;
			}

			if (PageWriteback(page)) {
				if (wbc->sync_mode != WB_SYNC_NONE)
2154
					f2fs_wait_on_page_writeback(page,
2155
							DATA, true, true);
C
Chao Yu 已提交
2156 2157 2158 2159 2160 2161 2162
				else
					goto continue_unlock;
			}

			if (!clear_page_dirty_for_io(page))
				goto continue_unlock;

C
Chao Yu 已提交
2163
			ret = __write_data_page(page, &submitted, wbc, io_type);
C
Chao Yu 已提交
2164
			if (unlikely(ret)) {
2165 2166 2167 2168 2169 2170 2171 2172
				/*
				 * keep nr_to_write, since vfs uses this to
				 * get # of written pages.
				 */
				if (ret == AOP_WRITEPAGE_ACTIVATE) {
					unlock_page(page);
					ret = 0;
					continue;
2173 2174 2175 2176 2177 2178 2179 2180 2181
				} else if (ret == -EAGAIN) {
					ret = 0;
					if (wbc->sync_mode == WB_SYNC_ALL) {
						cond_resched();
						congestion_wait(BLK_RW_ASYNC,
									HZ/50);
						goto retry_write;
					}
					continue;
2182
				}
J
Jaegeuk Kim 已提交
2183 2184 2185
				done_index = page->index + 1;
				done = 1;
				break;
2186
			} else if (submitted) {
2187
				nwritten++;
C
Chao Yu 已提交
2188 2189
			}

2190
			if (--wbc->nr_to_write <= 0 &&
2191
					wbc->sync_mode == WB_SYNC_NONE) {
C
Chao Yu 已提交
2192 2193 2194 2195 2196 2197 2198 2199 2200 2201 2202 2203 2204 2205 2206 2207 2208
				done = 1;
				break;
			}
		}
		pagevec_release(&pvec);
		cond_resched();
	}

	if (!cycled && !done) {
		cycled = 1;
		index = 0;
		end = writeback_index - 1;
		goto retry;
	}
	if (wbc->range_cyclic || (range_whole && wbc->nr_to_write > 0))
		mapping->writeback_index = done_index;

2209
	if (nwritten)
2210
		f2fs_submit_merged_write_cond(F2FS_M_SB(mapping), mapping->host,
2211
								NULL, 0, DATA);
C
Chao Yu 已提交
2212

C
Chao Yu 已提交
2213 2214 2215
	return ret;
}

2216 2217 2218 2219 2220
static inline bool __should_serialize_io(struct inode *inode,
					struct writeback_control *wbc)
{
	if (!S_ISREG(inode->i_mode))
		return false;
2221 2222
	if (IS_NOQUOTA(inode))
		return false;
2223 2224 2225 2226 2227 2228 2229
	if (wbc->sync_mode != WB_SYNC_ALL)
		return true;
	if (get_dirty_pages(inode) >= SM_I(F2FS_I_SB(inode))->min_seq_blocks)
		return true;
	return false;
}

2230
static int __f2fs_write_data_pages(struct address_space *mapping,
C
Chao Yu 已提交
2231 2232
						struct writeback_control *wbc,
						enum iostat_type io_type)
2233 2234
{
	struct inode *inode = mapping->host;
2235
	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
2236
	struct blk_plug plug;
2237
	int ret;
2238
	bool locked = false;
2239

P
P J P 已提交
2240 2241 2242 2243
	/* deal with chardevs and other special file */
	if (!mapping->a_ops->writepage)
		return 0;

2244 2245 2246 2247
	/* skip writing if there is no dirty page in this inode */
	if (!get_dirty_pages(inode) && wbc->sync_mode == WB_SYNC_NONE)
		return 0;

2248 2249 2250 2251
	/* during POR, we don't need to trigger writepage at all. */
	if (unlikely(is_sbi_flag_set(sbi, SBI_POR_DOING)))
		goto skip_write;

2252 2253
	if ((S_ISDIR(inode->i_mode) || IS_NOQUOTA(inode)) &&
			wbc->sync_mode == WB_SYNC_NONE &&
2254
			get_dirty_pages(inode) < nr_pages_to_skip(sbi, DATA) &&
C
Chao Yu 已提交
2255
			f2fs_available_free_memory(sbi, DIRTY_DENTS))
2256 2257
		goto skip_write;

C
Chao Yu 已提交
2258
	/* skip writing during file defragment */
2259
	if (is_inode_flag_set(inode, FI_DO_DEFRAG))
C
Chao Yu 已提交
2260 2261
		goto skip_write;

Y
Yunlei He 已提交
2262 2263
	trace_f2fs_writepages(mapping->host, wbc, DATA);

2264 2265
	/* to avoid spliting IOs due to mixed WB_SYNC_ALL and WB_SYNC_NONE */
	if (wbc->sync_mode == WB_SYNC_ALL)
2266 2267
		atomic_inc(&sbi->wb_sync_req[DATA]);
	else if (atomic_read(&sbi->wb_sync_req[DATA]))
2268 2269
		goto skip_write;

2270 2271 2272 2273 2274
	if (__should_serialize_io(inode, wbc)) {
		mutex_lock(&sbi->writepages);
		locked = true;
	}

2275
	blk_start_plug(&plug);
C
Chao Yu 已提交
2276
	ret = f2fs_write_cache_pages(mapping, wbc, io_type);
2277
	blk_finish_plug(&plug);
2278

2279 2280 2281
	if (locked)
		mutex_unlock(&sbi->writepages);

2282
	if (wbc->sync_mode == WB_SYNC_ALL)
2283
		atomic_dec(&sbi->wb_sync_req[DATA]);
2284 2285 2286 2287
	/*
	 * if some pages were truncated, we cannot guarantee its mapping->host
	 * to detect pending bios.
	 */
J
Jaegeuk Kim 已提交
2288

C
Chao Yu 已提交
2289
	f2fs_remove_dirty_inode(inode);
2290
	return ret;
2291 2292

skip_write:
2293
	wbc->pages_skipped += get_dirty_pages(inode);
Y
Yunlei He 已提交
2294
	trace_f2fs_writepages(mapping->host, wbc, DATA);
2295
	return 0;
2296 2297
}

C
Chao Yu 已提交
2298 2299 2300 2301 2302 2303 2304 2305 2306 2307
static int f2fs_write_data_pages(struct address_space *mapping,
			    struct writeback_control *wbc)
{
	struct inode *inode = mapping->host;

	return __f2fs_write_data_pages(mapping, wbc,
			F2FS_I(inode)->cp_task == current ?
			FS_CP_DATA_IO : FS_DATA_IO);
}

2308 2309 2310
static void f2fs_write_failed(struct address_space *mapping, loff_t to)
{
	struct inode *inode = mapping->host;
J
Jaegeuk Kim 已提交
2311
	loff_t i_size = i_size_read(inode);
2312

J
Jaegeuk Kim 已提交
2313
	if (to > i_size) {
2314
		down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
2315
		down_write(&F2FS_I(inode)->i_mmap_sem);
2316

J
Jaegeuk Kim 已提交
2317
		truncate_pagecache(inode, i_size);
2318
		f2fs_truncate_blocks(inode, i_size, true, true);
2319

2320
		up_write(&F2FS_I(inode)->i_mmap_sem);
2321
		up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
2322 2323 2324
	}
}

2325 2326 2327 2328 2329 2330 2331 2332
static int prepare_write_begin(struct f2fs_sb_info *sbi,
			struct page *page, loff_t pos, unsigned len,
			block_t *blk_addr, bool *node_changed)
{
	struct inode *inode = page->mapping->host;
	pgoff_t index = page->index;
	struct dnode_of_data dn;
	struct page *ipage;
2333
	bool locked = false;
2334
	struct extent_info ei = {0,0,0};
2335
	int err = 0;
2336
	int flag;
2337

2338 2339 2340 2341
	/*
	 * we already allocated all the blocks, so we don't need to get
	 * the block addresses when there is no need to fill the page.
	 */
2342 2343
	if (!f2fs_has_inline_data(inode) && len == PAGE_SIZE &&
			!is_inode_flag_set(inode, FI_NO_PREALLOC))
2344 2345
		return 0;

2346 2347 2348 2349 2350 2351
	/* f2fs_lock_op avoids race between write CP and convert_inline_page */
	if (f2fs_has_inline_data(inode) && pos + len > MAX_INLINE_DATA(inode))
		flag = F2FS_GET_BLOCK_DEFAULT;
	else
		flag = F2FS_GET_BLOCK_PRE_AIO;

2352
	if (f2fs_has_inline_data(inode) ||
2353
			(pos & PAGE_MASK) >= i_size_read(inode)) {
2354
		__do_map_lock(sbi, flag, true);
2355 2356 2357
		locked = true;
	}
restart:
2358
	/* check inline_data */
C
Chao Yu 已提交
2359
	ipage = f2fs_get_node_page(sbi, inode->i_ino);
2360 2361 2362 2363 2364 2365 2366 2367
	if (IS_ERR(ipage)) {
		err = PTR_ERR(ipage);
		goto unlock_out;
	}

	set_new_dnode(&dn, inode, ipage, ipage, 0);

	if (f2fs_has_inline_data(inode)) {
C
Chao Yu 已提交
2368
		if (pos + len <= MAX_INLINE_DATA(inode)) {
C
Chao Yu 已提交
2369
			f2fs_do_read_inline_data(page, ipage);
2370
			set_inode_flag(inode, FI_DATA_EXIST);
2371 2372
			if (inode->i_nlink)
				set_inline_node(ipage);
2373 2374 2375
		} else {
			err = f2fs_convert_inline_page(&dn, page);
			if (err)
2376 2377 2378 2379 2380 2381 2382 2383 2384 2385 2386
				goto out;
			if (dn.data_blkaddr == NULL_ADDR)
				err = f2fs_get_block(&dn, index);
		}
	} else if (locked) {
		err = f2fs_get_block(&dn, index);
	} else {
		if (f2fs_lookup_extent_cache(inode, index, &ei)) {
			dn.data_blkaddr = ei.blk + index - ei.fofs;
		} else {
			/* hole case */
C
Chao Yu 已提交
2387
			err = f2fs_get_dnode_of_data(&dn, index, LOOKUP_NODE);
2388
			if (err || dn.data_blkaddr == NULL_ADDR) {
2389
				f2fs_put_dnode(&dn);
2390 2391
				__do_map_lock(sbi, F2FS_GET_BLOCK_PRE_AIO,
								true);
2392
				WARN_ON(flag != F2FS_GET_BLOCK_PRE_AIO);
2393 2394 2395
				locked = true;
				goto restart;
			}
2396 2397
		}
	}
2398

2399 2400 2401
	/* convert_inline_page can make node_changed */
	*blk_addr = dn.data_blkaddr;
	*node_changed = dn.node_changed;
2402
out:
2403 2404
	f2fs_put_dnode(&dn);
unlock_out:
2405
	if (locked)
2406
		__do_map_lock(sbi, flag, false);
2407 2408 2409
	return err;
}

2410 2411 2412 2413 2414
static int f2fs_write_begin(struct file *file, struct address_space *mapping,
		loff_t pos, unsigned len, unsigned flags,
		struct page **pagep, void **fsdata)
{
	struct inode *inode = mapping->host;
2415
	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
2416
	struct page *page = NULL;
2417
	pgoff_t index = ((unsigned long long) pos) >> PAGE_SHIFT;
2418
	bool need_balance = false, drop_atomic = false;
2419
	block_t blkaddr = NULL_ADDR;
2420 2421
	int err = 0;

2422 2423
	trace_f2fs_write_begin(inode, pos, len, flags);

D
Daniel Rosenberg 已提交
2424 2425 2426 2427
	err = f2fs_is_checkpoint_ready(sbi);
	if (err)
		goto fail;

2428 2429 2430
	if ((f2fs_is_atomic_file(inode) &&
			!f2fs_available_free_memory(sbi, INMEM_PAGES)) ||
			is_inode_flag_set(inode, FI_ATOMIC_REVOKE_REQUEST)) {
J
Jaegeuk Kim 已提交
2431
		err = -ENOMEM;
2432
		drop_atomic = true;
J
Jaegeuk Kim 已提交
2433 2434 2435
		goto fail;
	}

2436 2437 2438 2439 2440 2441 2442 2443 2444 2445
	/*
	 * We should check this at this moment to avoid deadlock on inode page
	 * and #0 page. The locking rule for inline_data conversion should be:
	 * lock_page(page #0) -> lock_page(inode_page)
	 */
	if (index != 0) {
		err = f2fs_convert_inline_inode(inode);
		if (err)
			goto fail;
	}
2446
repeat:
2447 2448 2449 2450
	/*
	 * Do not use grab_cache_page_write_begin() to avoid deadlock due to
	 * wait_for_stable_page. Will wait that below with our IO control.
	 */
C
Chao Yu 已提交
2451
	page = f2fs_pagecache_get_page(mapping, index,
2452
				FGP_LOCK | FGP_WRITE | FGP_CREAT, GFP_NOFS);
2453 2454 2455 2456
	if (!page) {
		err = -ENOMEM;
		goto fail;
	}
2457

2458 2459
	*pagep = page;

2460 2461
	err = prepare_write_begin(sbi, page, pos, len,
					&blkaddr, &need_balance);
2462
	if (err)
2463
		goto fail;
2464

2465 2466
	if (need_balance && !IS_NOQUOTA(inode) &&
			has_not_enough_free_secs(sbi, 0, 0)) {
2467
		unlock_page(page);
J
Jaegeuk Kim 已提交
2468
		f2fs_balance_fs(sbi, true);
2469 2470 2471 2472 2473 2474 2475 2476
		lock_page(page);
		if (page->mapping != mapping) {
			/* The page got truncated from under us */
			f2fs_put_page(page, 1);
			goto repeat;
		}
	}

2477
	f2fs_wait_on_page_writeback(page, DATA, false, true);
2478

2479 2480
	if (len == PAGE_SIZE || PageUptodate(page))
		return 0;
2481

2482 2483 2484 2485 2486
	if (!(pos & (PAGE_SIZE - 1)) && (pos + len) >= i_size_read(inode)) {
		zero_user_segment(page, len, PAGE_SIZE);
		return 0;
	}

2487
	if (blkaddr == NEW_ADDR) {
2488
		zero_user_segment(page, 0, PAGE_SIZE);
2489
		SetPageUptodate(page);
2490
	} else {
2491 2492
		err = f2fs_submit_page_read(inode, page, blkaddr);
		if (err)
2493
			goto fail;
2494

2495
		lock_page(page);
2496
		if (unlikely(page->mapping != mapping)) {
2497 2498
			f2fs_put_page(page, 1);
			goto repeat;
2499
		}
2500 2501 2502
		if (unlikely(!PageUptodate(page))) {
			err = -EIO;
			goto fail;
2503
		}
2504 2505
	}
	return 0;
2506

2507
fail:
2508
	f2fs_put_page(page, 1);
2509
	f2fs_write_failed(mapping, pos + len);
2510
	if (drop_atomic)
C
Chao Yu 已提交
2511
		f2fs_drop_inmem_pages_all(sbi, false);
2512
	return err;
2513 2514
}

2515 2516 2517 2518 2519 2520 2521
static int f2fs_write_end(struct file *file,
			struct address_space *mapping,
			loff_t pos, unsigned len, unsigned copied,
			struct page *page, void *fsdata)
{
	struct inode *inode = page->mapping->host;

2522 2523
	trace_f2fs_write_end(inode, pos, len, copied);

2524 2525 2526 2527 2528 2529
	/*
	 * This should be come from len == PAGE_SIZE, and we expect copied
	 * should be PAGE_SIZE. Otherwise, we treat it with zero copied and
	 * let generic_perform_write() try to copy data again through copied=0.
	 */
	if (!PageUptodate(page)) {
2530
		if (unlikely(copied != len))
2531 2532 2533 2534 2535 2536 2537
			copied = 0;
		else
			SetPageUptodate(page);
	}
	if (!copied)
		goto unlock_out;

2538
	set_page_dirty(page);
2539

2540 2541
	if (pos + copied > i_size_read(inode))
		f2fs_i_size_write(inode, pos + copied);
2542
unlock_out:
2543
	f2fs_put_page(page, 1);
2544
	f2fs_update_time(F2FS_I_SB(inode), REQ_TIME);
2545 2546 2547
	return copied;
}

2548 2549
static int check_direct_IO(struct inode *inode, struct iov_iter *iter,
			   loff_t offset)
2550
{
2551 2552 2553 2554 2555 2556 2557 2558 2559 2560 2561 2562 2563 2564
	unsigned i_blkbits = READ_ONCE(inode->i_blkbits);
	unsigned blkbits = i_blkbits;
	unsigned blocksize_mask = (1 << blkbits) - 1;
	unsigned long align = offset | iov_iter_alignment(iter);
	struct block_device *bdev = inode->i_sb->s_bdev;

	if (align & blocksize_mask) {
		if (bdev)
			blkbits = blksize_bits(bdev_logical_block_size(bdev));
		blocksize_mask = (1 << blkbits) - 1;
		if (align & blocksize_mask)
			return -EINVAL;
		return 1;
	}
2565 2566 2567
	return 0;
}

C
Chao Yu 已提交
2568 2569 2570 2571 2572 2573 2574 2575 2576 2577
static void f2fs_dio_end_io(struct bio *bio)
{
	struct f2fs_private_dio *dio = bio->bi_private;

	dec_page_count(F2FS_I_SB(dio->inode),
			dio->write ? F2FS_DIO_WRITE : F2FS_DIO_READ);

	bio->bi_private = dio->orig_private;
	bio->bi_end_io = dio->orig_end_io;

2578
	kvfree(dio);
C
Chao Yu 已提交
2579 2580 2581 2582 2583 2584 2585 2586 2587 2588 2589 2590 2591 2592 2593 2594 2595 2596 2597 2598 2599 2600 2601 2602 2603 2604 2605 2606 2607 2608 2609 2610 2611 2612 2613 2614

	bio_endio(bio);
}

static void f2fs_dio_submit_bio(struct bio *bio, struct inode *inode,
							loff_t file_offset)
{
	struct f2fs_private_dio *dio;
	bool write = (bio_op(bio) == REQ_OP_WRITE);
	int err;

	dio = f2fs_kzalloc(F2FS_I_SB(inode),
			sizeof(struct f2fs_private_dio), GFP_NOFS);
	if (!dio) {
		err = -ENOMEM;
		goto out;
	}

	dio->inode = inode;
	dio->orig_end_io = bio->bi_end_io;
	dio->orig_private = bio->bi_private;
	dio->write = write;

	bio->bi_end_io = f2fs_dio_end_io;
	bio->bi_private = dio;

	inc_page_count(F2FS_I_SB(inode),
			write ? F2FS_DIO_WRITE : F2FS_DIO_READ);

	submit_bio(bio);
	return;
out:
	bio->bi_status = BLK_STS_IOERR;
	bio_endio(bio);
}

2615
static ssize_t f2fs_direct_IO(struct kiocb *iocb, struct iov_iter *iter)
2616
{
2617
	struct address_space *mapping = iocb->ki_filp->f_mapping;
2618
	struct inode *inode = mapping->host;
2619
	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
2620
	struct f2fs_inode_info *fi = F2FS_I(inode);
2621
	size_t count = iov_iter_count(iter);
2622
	loff_t offset = iocb->ki_pos;
2623
	int rw = iov_iter_rw(iter);
2624
	int err;
2625
	enum rw_hint hint = iocb->ki_hint;
2626
	int whint_mode = F2FS_OPTION(sbi).whint_mode;
2627
	bool do_opu;
2628

2629
	err = check_direct_IO(inode, iter, offset);
2630
	if (err)
2631
		return err < 0 ? err : 0;
H
Huajun Li 已提交
2632

2633
	if (f2fs_force_buffered_io(inode, iocb, iter))
2634
		return 0;
2635

2636 2637
	do_opu = allow_outplace_dio(inode, iocb, iter);

2638
	trace_f2fs_direct_IO_enter(inode, offset, count, rw);
2639

2640 2641 2642
	if (rw == WRITE && whint_mode == WHINT_MODE_OFF)
		iocb->ki_hint = WRITE_LIFE_NOT_SET;

2643 2644 2645 2646 2647 2648 2649 2650
	if (iocb->ki_flags & IOCB_NOWAIT) {
		if (!down_read_trylock(&fi->i_gc_rwsem[rw])) {
			iocb->ki_hint = hint;
			err = -EAGAIN;
			goto out;
		}
		if (do_opu && !down_read_trylock(&fi->i_gc_rwsem[READ])) {
			up_read(&fi->i_gc_rwsem[rw]);
H
Hyunchul Lee 已提交
2651 2652 2653 2654
			iocb->ki_hint = hint;
			err = -EAGAIN;
			goto out;
		}
2655 2656 2657 2658
	} else {
		down_read(&fi->i_gc_rwsem[rw]);
		if (do_opu)
			down_read(&fi->i_gc_rwsem[READ]);
H
Hyunchul Lee 已提交
2659 2660
	}

C
Chao Yu 已提交
2661
	err = __blockdev_direct_IO(iocb, inode, inode->i_sb->s_bdev,
C
Chao Yu 已提交
2662 2663
			iter, rw == WRITE ? get_data_block_dio_write :
			get_data_block_dio, NULL, f2fs_dio_submit_bio,
C
Chao Yu 已提交
2664
			DIO_LOCKING | DIO_SKIP_HOLES);
2665 2666 2667 2668 2669

	if (do_opu)
		up_read(&fi->i_gc_rwsem[READ]);

	up_read(&fi->i_gc_rwsem[rw]);
2670 2671

	if (rw == WRITE) {
2672 2673
		if (whint_mode == WHINT_MODE_OFF)
			iocb->ki_hint = hint;
C
Chao Yu 已提交
2674 2675 2676
		if (err > 0) {
			f2fs_update_iostat(F2FS_I_SB(inode), APP_DIRECT_IO,
									err);
2677 2678
			if (!do_opu)
				set_inode_flag(inode, FI_UPDATE_WRITE);
C
Chao Yu 已提交
2679
		} else if (err < 0) {
2680
			f2fs_write_failed(mapping, offset + count);
C
Chao Yu 已提交
2681
		}
2682
	}
2683

H
Hyunchul Lee 已提交
2684
out:
2685
	trace_f2fs_direct_IO_exit(inode, offset, count, rw, err);
2686

2687
	return err;
2688 2689
}

2690 2691
void f2fs_invalidate_page(struct page *page, unsigned int offset,
							unsigned int length)
2692 2693
{
	struct inode *inode = page->mapping->host;
2694
	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
2695

2696
	if (inode->i_ino >= F2FS_ROOT_INO(sbi) &&
2697
		(offset % PAGE_SIZE || length != PAGE_SIZE))
2698 2699
		return;

2700
	if (PageDirty(page)) {
2701
		if (inode->i_ino == F2FS_META_INO(sbi)) {
2702
			dec_page_count(sbi, F2FS_DIRTY_META);
2703
		} else if (inode->i_ino == F2FS_NODE_INO(sbi)) {
2704
			dec_page_count(sbi, F2FS_DIRTY_NODES);
2705
		} else {
2706
			inode_dec_dirty_pages(inode);
C
Chao Yu 已提交
2707
			f2fs_remove_dirty_inode(inode);
2708
		}
2709
	}
C
Chao Yu 已提交
2710

C
Chao Yu 已提交
2711 2712
	clear_cold_data(page);

C
Chao Yu 已提交
2713 2714
	/* This is atomic written page, keep Private */
	if (IS_ATOMIC_WRITTEN_PAGE(page))
C
Chao Yu 已提交
2715
		return f2fs_drop_inmem_page(inode, page);
C
Chao Yu 已提交
2716

2717
	set_page_private(page, 0);
2718 2719 2720
	ClearPagePrivate(page);
}

2721
int f2fs_release_page(struct page *page, gfp_t wait)
2722
{
2723 2724 2725 2726
	/* If this is dirty page, keep PagePrivate */
	if (PageDirty(page))
		return 0;

C
Chao Yu 已提交
2727 2728 2729 2730
	/* This is atomic written page, keep Private */
	if (IS_ATOMIC_WRITTEN_PAGE(page))
		return 0;

C
Chao Yu 已提交
2731
	clear_cold_data(page);
2732
	set_page_private(page, 0);
2733
	ClearPagePrivate(page);
2734
	return 1;
2735 2736 2737 2738 2739 2740 2741
}

static int f2fs_set_data_page_dirty(struct page *page)
{
	struct address_space *mapping = page->mapping;
	struct inode *inode = mapping->host;

2742 2743
	trace_f2fs_set_page_dirty(page, DATA);

2744 2745
	if (!PageUptodate(page))
		SetPageUptodate(page);
2746

C
Chao Yu 已提交
2747
	if (f2fs_is_atomic_file(inode) && !f2fs_is_commit_atomic_write(inode)) {
C
Chao Yu 已提交
2748
		if (!IS_ATOMIC_WRITTEN_PAGE(page)) {
C
Chao Yu 已提交
2749
			f2fs_register_inmem_page(inode, page);
C
Chao Yu 已提交
2750 2751 2752 2753 2754 2755 2756
			return 1;
		}
		/*
		 * Previously, this page has been registered, we just
		 * return here.
		 */
		return 0;
2757 2758
	}

2759
	if (!PageDirty(page)) {
2760
		__set_page_dirty_nobuffers(page);
C
Chao Yu 已提交
2761
		f2fs_update_dirty_page(inode, page);
2762 2763 2764 2765 2766
		return 1;
	}
	return 0;
}

J
Jaegeuk Kim 已提交
2767 2768
static sector_t f2fs_bmap(struct address_space *mapping, sector_t block)
{
2769 2770
	struct inode *inode = mapping->host;

J
Jaegeuk Kim 已提交
2771 2772 2773 2774 2775 2776 2777
	if (f2fs_has_inline_data(inode))
		return 0;

	/* make sure allocating whole blocks */
	if (mapping_tagged(mapping, PAGECACHE_TAG_DIRTY))
		filemap_write_and_wait(mapping);

C
Chao Yu 已提交
2778
	return generic_block_bmap(mapping, block, get_data_block_bmap);
2779 2780
}

2781 2782 2783 2784 2785 2786 2787 2788 2789 2790 2791 2792 2793
#ifdef CONFIG_MIGRATION
#include <linux/migrate.h>

int f2fs_migrate_page(struct address_space *mapping,
		struct page *newpage, struct page *page, enum migrate_mode mode)
{
	int rc, extra_count;
	struct f2fs_inode_info *fi = F2FS_I(mapping->host);
	bool atomic_written = IS_ATOMIC_WRITTEN_PAGE(page);

	BUG_ON(PageWriteback(page));

	/* migrating an atomic written page is safe with the inmem_lock hold */
2794 2795 2796 2797 2798 2799
	if (atomic_written) {
		if (mode != MIGRATE_SYNC)
			return -EBUSY;
		if (!mutex_trylock(&fi->inmem_lock))
			return -EAGAIN;
	}
2800 2801 2802 2803 2804 2805 2806 2807

	/*
	 * A reference is expected if PagePrivate set when move mapping,
	 * however F2FS breaks this for maintaining dirty page counts when
	 * truncating pages. So here adjusting the 'extra_count' make it work.
	 */
	extra_count = (atomic_written ? 1 : 0) - page_has_private(page);
	rc = migrate_page_move_mapping(mapping, newpage,
2808
				page, mode, extra_count);
2809 2810 2811 2812 2813 2814 2815 2816 2817 2818 2819 2820 2821 2822 2823 2824 2825 2826 2827 2828 2829 2830
	if (rc != MIGRATEPAGE_SUCCESS) {
		if (atomic_written)
			mutex_unlock(&fi->inmem_lock);
		return rc;
	}

	if (atomic_written) {
		struct inmem_pages *cur;
		list_for_each_entry(cur, &fi->inmem_pages, list)
			if (cur->page == page) {
				cur->page = newpage;
				break;
			}
		mutex_unlock(&fi->inmem_lock);
		put_page(page);
		get_page(newpage);
	}

	if (PagePrivate(page))
		SetPagePrivate(newpage);
	set_page_private(newpage, page_private(page));

2831 2832 2833 2834
	if (mode != MIGRATE_SYNC_NO_COPY)
		migrate_page_copy(newpage, page);
	else
		migrate_page_states(newpage, page);
2835 2836 2837 2838 2839

	return MIGRATEPAGE_SUCCESS;
}
#endif

2840 2841 2842 2843 2844 2845
const struct address_space_operations f2fs_dblock_aops = {
	.readpage	= f2fs_read_data_page,
	.readpages	= f2fs_read_data_pages,
	.writepage	= f2fs_write_data_page,
	.writepages	= f2fs_write_data_pages,
	.write_begin	= f2fs_write_begin,
2846
	.write_end	= f2fs_write_end,
2847
	.set_page_dirty	= f2fs_set_data_page_dirty,
2848 2849
	.invalidatepage	= f2fs_invalidate_page,
	.releasepage	= f2fs_release_page,
2850
	.direct_IO	= f2fs_direct_IO,
J
Jaegeuk Kim 已提交
2851
	.bmap		= f2fs_bmap,
2852 2853 2854
#ifdef CONFIG_MIGRATION
	.migratepage    = f2fs_migrate_page,
#endif
2855
};
2856

M
Matthew Wilcox 已提交
2857
void f2fs_clear_page_cache_dirty_tag(struct page *page)
2858 2859 2860 2861 2862
{
	struct address_space *mapping = page_mapping(page);
	unsigned long flags;

	xa_lock_irqsave(&mapping->i_pages, flags);
M
Matthew Wilcox 已提交
2863
	__xa_clear_mark(&mapping->i_pages, page_index(page),
2864 2865 2866 2867
						PAGECACHE_TAG_DIRTY);
	xa_unlock_irqrestore(&mapping->i_pages, flags);
}

2868 2869 2870 2871 2872 2873 2874 2875 2876 2877 2878 2879 2880 2881 2882 2883 2884 2885 2886 2887 2888 2889 2890
int __init f2fs_init_post_read_processing(void)
{
	bio_post_read_ctx_cache = KMEM_CACHE(bio_post_read_ctx, 0);
	if (!bio_post_read_ctx_cache)
		goto fail;
	bio_post_read_ctx_pool =
		mempool_create_slab_pool(NUM_PREALLOC_POST_READ_CTXS,
					 bio_post_read_ctx_cache);
	if (!bio_post_read_ctx_pool)
		goto fail_free_cache;
	return 0;

fail_free_cache:
	kmem_cache_destroy(bio_post_read_ctx_cache);
fail:
	return -ENOMEM;
}

void __exit f2fs_destroy_post_read_processing(void)
{
	mempool_destroy(bio_post_read_ctx_pool);
	kmem_cache_destroy(bio_post_read_ctx_cache);
}