data.c 75.2 KB
Newer Older
C
Chao Yu 已提交
1
// SPDX-License-Identifier: GPL-2.0
J
Jaegeuk Kim 已提交
2
/*
3 4 5 6 7 8 9 10 11 12 13
 * fs/f2fs/data.c
 *
 * Copyright (c) 2012 Samsung Electronics Co., Ltd.
 *             http://www.samsung.com/
 */
#include <linux/fs.h>
#include <linux/f2fs_fs.h>
#include <linux/buffer_head.h>
#include <linux/mpage.h>
#include <linux/writeback.h>
#include <linux/backing-dev.h>
C
Chao Yu 已提交
14
#include <linux/pagevec.h>
15 16
#include <linux/blkdev.h>
#include <linux/bio.h>
J
Jaegeuk Kim 已提交
17
#include <linux/swap.h>
18
#include <linux/prefetch.h>
19
#include <linux/uio.h>
J
Jaegeuk Kim 已提交
20
#include <linux/cleancache.h>
21
#include <linux/sched/signal.h>
22 23 24 25

#include "f2fs.h"
#include "node.h"
#include "segment.h"
J
Jaegeuk Kim 已提交
26
#include "trace.h"
27
#include <trace/events/f2fs.h>
28

29 30 31 32 33
#define NUM_PREALLOC_POST_READ_CTXS	128

static struct kmem_cache *bio_post_read_ctx_cache;
static mempool_t *bio_post_read_ctx_pool;

34 35 36 37 38 39 40 41 42 43 44 45 46 47 48
static bool __is_cp_guaranteed(struct page *page)
{
	struct address_space *mapping = page->mapping;
	struct inode *inode;
	struct f2fs_sb_info *sbi;

	if (!mapping)
		return false;

	inode = mapping->host;
	sbi = F2FS_I_SB(inode);

	if (inode->i_ino == F2FS_META_INO(sbi) ||
			inode->i_ino ==  F2FS_NODE_INO(sbi) ||
			S_ISDIR(inode->i_mode) ||
49
			(S_ISREG(inode->i_mode) &&
50
			(f2fs_is_atomic_file(inode) || IS_NOQUOTA(inode))) ||
51 52 53 54 55
			is_cold_data(page))
		return true;
	return false;
}

56 57
static enum count_type __read_io_type(struct page *page)
{
J
Jaegeuk Kim 已提交
58
	struct address_space *mapping = page_file_mapping(page);
59 60 61 62 63 64 65 66 67 68 69 70 71 72

	if (mapping) {
		struct inode *inode = mapping->host;
		struct f2fs_sb_info *sbi = F2FS_I_SB(inode);

		if (inode->i_ino == F2FS_META_INO(sbi))
			return F2FS_RD_META;

		if (inode->i_ino == F2FS_NODE_INO(sbi))
			return F2FS_RD_NODE;
	}
	return F2FS_RD_DATA;
}

73 74 75 76 77 78 79 80 81 82 83 84 85 86
/* postprocessing steps for read bios */
enum bio_post_read_step {
	STEP_INITIAL = 0,
	STEP_DECRYPT,
};

struct bio_post_read_ctx {
	struct bio *bio;
	struct work_struct work;
	unsigned int cur_step;
	unsigned int enabled_steps;
};

static void __read_end_io(struct bio *bio)
87
{
88 89
	struct page *page;
	struct bio_vec *bv;
90
	struct bvec_iter_all iter_all;
91

92
	bio_for_each_segment_all(bv, bio, iter_all) {
93 94 95 96 97
		page = bv->bv_page;

		/* PG_error was set if any post_read step failed */
		if (bio->bi_status || PageError(page)) {
			ClearPageUptodate(page);
98 99
			/* will re-read again later */
			ClearPageError(page);
100 101 102
		} else {
			SetPageUptodate(page);
		}
103
		dec_page_count(F2FS_P_SB(page), __read_io_type(page));
104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145
		unlock_page(page);
	}
	if (bio->bi_private)
		mempool_free(bio->bi_private, bio_post_read_ctx_pool);
	bio_put(bio);
}

static void bio_post_read_processing(struct bio_post_read_ctx *ctx);

static void decrypt_work(struct work_struct *work)
{
	struct bio_post_read_ctx *ctx =
		container_of(work, struct bio_post_read_ctx, work);

	fscrypt_decrypt_bio(ctx->bio);

	bio_post_read_processing(ctx);
}

static void bio_post_read_processing(struct bio_post_read_ctx *ctx)
{
	switch (++ctx->cur_step) {
	case STEP_DECRYPT:
		if (ctx->enabled_steps & (1 << STEP_DECRYPT)) {
			INIT_WORK(&ctx->work, decrypt_work);
			fscrypt_enqueue_decrypt_work(&ctx->work);
			return;
		}
		ctx->cur_step++;
		/* fall-through */
	default:
		__read_end_io(ctx->bio);
	}
}

static bool f2fs_bio_post_read_required(struct bio *bio)
{
	return bio->bi_private && !bio->bi_status;
}

static void f2fs_read_end_io(struct bio *bio)
{
146 147 148
	if (time_to_inject(F2FS_P_SB(bio_first_page_all(bio)),
						FAULT_READ_IO)) {
		f2fs_show_injection_info(FAULT_READ_IO);
149
		bio->bi_status = BLK_STS_IOERR;
150
	}
C
Chao Yu 已提交
151

152 153
	if (f2fs_bio_post_read_required(bio)) {
		struct bio_post_read_ctx *ctx = bio->bi_private;
J
Jaegeuk Kim 已提交
154

155 156 157
		ctx->cur_step = STEP_INITIAL;
		bio_post_read_processing(ctx);
		return;
J
Jaegeuk Kim 已提交
158
	}
159 160

	__read_end_io(bio);
J
Jaegeuk Kim 已提交
161 162
}

163
static void f2fs_write_end_io(struct bio *bio)
164
{
165
	struct f2fs_sb_info *sbi = bio->bi_private;
166
	struct bio_vec *bvec;
167
	struct bvec_iter_all iter_all;
168

169 170 171 172 173
	if (time_to_inject(sbi, FAULT_WRITE_IO)) {
		f2fs_show_injection_info(FAULT_WRITE_IO);
		bio->bi_status = BLK_STS_IOERR;
	}

174
	bio_for_each_segment_all(bvec, bio, iter_all) {
175
		struct page *page = bvec->bv_page;
176
		enum count_type type = WB_DATA_TYPE(page);
177

178 179 180 181 182 183
		if (IS_DUMMY_WRITTEN_PAGE(page)) {
			set_page_private(page, (unsigned long)NULL);
			ClearPagePrivate(page);
			unlock_page(page);
			mempool_free(page, sbi->write_io_dummy);

184
			if (unlikely(bio->bi_status))
185 186 187 188
				f2fs_stop_checkpoint(sbi, true);
			continue;
		}

189
		fscrypt_finalize_bounce_page(&page);
190

191
		if (unlikely(bio->bi_status)) {
192
			mapping_set_error(page->mapping, -EIO);
193 194
			if (type == F2FS_WB_CP_DATA)
				f2fs_stop_checkpoint(sbi, true);
195
		}
196 197 198 199

		f2fs_bug_on(sbi, page->mapping == NODE_MAPPING(sbi) &&
					page->index != nid_of_node(page));

200
		dec_page_count(sbi, type);
201 202
		if (f2fs_in_warm_node_list(sbi, page))
			f2fs_del_fsync_node_entry(sbi, page);
203
		clear_cold_data(page);
204
		end_page_writeback(page);
205
	}
206
	if (!get_pages(sbi, F2FS_WB_CP_DATA) &&
207
				wq_has_sleeper(&sbi->cp_wait))
208 209 210 211 212
		wake_up(&sbi->cp_wait);

	bio_put(bio);
}

J
Jaegeuk Kim 已提交
213 214 215 216 217 218 219 220 221
/*
 * Return true, if pre_bio's bdev is same as its target device.
 */
struct block_device *f2fs_target_device(struct f2fs_sb_info *sbi,
				block_t blk_addr, struct bio *bio)
{
	struct block_device *bdev = sbi->sb->s_bdev;
	int i;

222 223 224 225 226 227 228 229
	if (f2fs_is_multi_device(sbi)) {
		for (i = 0; i < sbi->s_ndevs; i++) {
			if (FDEV(i).start_blk <= blk_addr &&
			    FDEV(i).end_blk >= blk_addr) {
				blk_addr -= FDEV(i).start_blk;
				bdev = FDEV(i).bdev;
				break;
			}
J
Jaegeuk Kim 已提交
230 231 232
		}
	}
	if (bio) {
233
		bio_set_dev(bio, bdev);
J
Jaegeuk Kim 已提交
234 235 236 237 238 239 240 241 242
		bio->bi_iter.bi_sector = SECTOR_FROM_BLOCK(blk_addr);
	}
	return bdev;
}

int f2fs_target_device_index(struct f2fs_sb_info *sbi, block_t blkaddr)
{
	int i;

243 244 245
	if (!f2fs_is_multi_device(sbi))
		return 0;

J
Jaegeuk Kim 已提交
246 247 248 249 250 251 252 253 254
	for (i = 0; i < sbi->s_ndevs; i++)
		if (FDEV(i).start_blk <= blkaddr && FDEV(i).end_blk >= blkaddr)
			return i;
	return 0;
}

static bool __same_bdev(struct f2fs_sb_info *sbi,
				block_t blk_addr, struct bio *bio)
{
255 256
	struct block_device *b = f2fs_target_device(sbi, blk_addr, NULL);
	return bio->bi_disk == b->bd_disk && bio->bi_partno == b->bd_partno;
J
Jaegeuk Kim 已提交
257 258
}

259 260 261
/*
 * Low-level block read/write IO operations.
 */
262
static struct bio *__bio_alloc(struct f2fs_io_info *fio, int npages)
263
{
264
	struct f2fs_sb_info *sbi = fio->sbi;
265 266
	struct bio *bio;

267
	bio = f2fs_bio_alloc(sbi, npages, true);
268

269 270
	f2fs_target_device(sbi, fio->new_blkaddr, bio);
	if (is_read_io(fio->op)) {
271 272 273 274 275
		bio->bi_end_io = f2fs_read_end_io;
		bio->bi_private = NULL;
	} else {
		bio->bi_end_io = f2fs_write_end_io;
		bio->bi_private = sbi;
276 277
		bio->bi_write_hint = f2fs_io_type_to_rw_hint(sbi,
						fio->type, fio->temp);
278
	}
279 280
	if (fio->io_wbc)
		wbc_init_bio(fio->io_wbc, bio);
281 282 283 284

	return bio;
}

285 286
static inline void __submit_bio(struct f2fs_sb_info *sbi,
				struct bio *bio, enum page_type type)
287
{
288
	if (!is_read_io(bio_op(bio))) {
289 290 291 292 293
		unsigned int start;

		if (type != DATA && type != NODE)
			goto submit_io;

294
		if (test_opt(sbi, LFS) && current->plug)
295 296
			blk_finish_plug(current->plug);

297 298 299 300 301 302 303 304 305 306
		start = bio->bi_iter.bi_size >> F2FS_BLKSIZE_BITS;
		start %= F2FS_IO_SIZE(sbi);

		if (start == 0)
			goto submit_io;

		/* fill dummy pages */
		for (; start < F2FS_IO_SIZE(sbi); start++) {
			struct page *page =
				mempool_alloc(sbi->write_io_dummy,
307
					      GFP_NOIO | __GFP_NOFAIL);
308 309
			f2fs_bug_on(sbi, !page);

310
			zero_user_segment(page, 0, PAGE_SIZE);
311 312 313 314 315 316 317 318 319 320 321 322
			SetPagePrivate(page);
			set_page_private(page, (unsigned long)DUMMY_WRITTEN_PAGE);
			lock_page(page);
			if (bio_add_page(bio, page, PAGE_SIZE, 0) < PAGE_SIZE)
				f2fs_bug_on(sbi, 1);
		}
		/*
		 * In the NODE case, we lose next block address chain. So, we
		 * need to do checkpoint in f2fs_sync_file.
		 */
		if (type == NODE)
			set_sbi_flag(sbi, SBI_NEED_CP);
J
Jaegeuk Kim 已提交
323
	}
324
submit_io:
J
Jaegeuk Kim 已提交
325 326 327 328
	if (is_read_io(bio_op(bio)))
		trace_f2fs_submit_read_bio(sbi->sb, type, bio);
	else
		trace_f2fs_submit_write_bio(sbi->sb, type, bio);
329
	submit_bio(bio);
330 331
}

J
Jaegeuk Kim 已提交
332
static void __submit_merged_bio(struct f2fs_bio_info *io)
333
{
J
Jaegeuk Kim 已提交
334
	struct f2fs_io_info *fio = &io->fio;
335 336 337 338

	if (!io->bio)
		return;

J
Jaegeuk Kim 已提交
339 340
	bio_set_op_attrs(io->bio, fio->op, fio->op_flags);

M
Mike Christie 已提交
341
	if (is_read_io(fio->op))
J
Jaegeuk Kim 已提交
342
		trace_f2fs_prepare_read_bio(io->sbi->sb, fio->type, io->bio);
343
	else
J
Jaegeuk Kim 已提交
344
		trace_f2fs_prepare_write_bio(io->sbi->sb, fio->type, io->bio);
M
Mike Christie 已提交
345

346
	__submit_bio(io->sbi, io->bio, fio->type);
347 348 349
	io->bio = NULL;
}

C
Chao Yu 已提交
350
static bool __has_merged_page(struct bio *bio, struct inode *inode,
351
						struct page *page, nid_t ino)
C
Chao Yu 已提交
352 353 354
{
	struct bio_vec *bvec;
	struct page *target;
355
	struct bvec_iter_all iter_all;
C
Chao Yu 已提交
356

C
Chao Yu 已提交
357
	if (!bio)
C
Chao Yu 已提交
358
		return false;
359

360
	if (!inode && !page && !ino)
361
		return true;
C
Chao Yu 已提交
362

C
Chao Yu 已提交
363
	bio_for_each_segment_all(bvec, bio, iter_all) {
C
Chao Yu 已提交
364

365 366 367
		target = bvec->bv_page;
		if (fscrypt_is_bounce_page(target))
			target = fscrypt_pagecache_page(target);
C
Chao Yu 已提交
368

369 370
		if (inode && inode == target->mapping->host)
			return true;
371 372
		if (page && page == target)
			return true;
373
		if (ino && ino == ino_of_node(target))
C
Chao Yu 已提交
374 375 376 377 378 379
			return true;
	}

	return false;
}

380
static void __f2fs_submit_merged_write(struct f2fs_sb_info *sbi,
J
Jaegeuk Kim 已提交
381
				enum page_type type, enum temp_type temp)
382 383
{
	enum page_type btype = PAGE_TYPE_OF_BIO(type);
J
Jaegeuk Kim 已提交
384
	struct f2fs_bio_info *io = sbi->write_io[btype] + temp;
385

386
	down_write(&io->io_rwsem);
J
Jaegeuk Kim 已提交
387 388 389 390

	/* change META to META_FLUSH in the checkpoint procedure */
	if (type >= META_FLUSH) {
		io->fio.type = META_FLUSH;
M
Mike Christie 已提交
391
		io->fio.op = REQ_OP_WRITE;
392
		io->fio.op_flags = REQ_META | REQ_PRIO | REQ_SYNC;
393
		if (!test_opt(sbi, NOBARRIER))
394
			io->fio.op_flags |= REQ_PREFLUSH | REQ_FUA;
J
Jaegeuk Kim 已提交
395 396
	}
	__submit_merged_bio(io);
397
	up_write(&io->io_rwsem);
398 399
}

J
Jaegeuk Kim 已提交
400
static void __submit_merged_write_cond(struct f2fs_sb_info *sbi,
401 402
				struct inode *inode, struct page *page,
				nid_t ino, enum page_type type, bool force)
403
{
J
Jaegeuk Kim 已提交
404
	enum temp_type temp;
405
	bool ret = true;
J
Jaegeuk Kim 已提交
406 407

	for (temp = HOT; temp < NR_TEMP_TYPE; temp++) {
408 409 410
		if (!force)	{
			enum page_type btype = PAGE_TYPE_OF_BIO(type);
			struct f2fs_bio_info *io = sbi->write_io[btype] + temp;
J
Jaegeuk Kim 已提交
411

412
			down_read(&io->io_rwsem);
C
Chao Yu 已提交
413
			ret = __has_merged_page(io->bio, inode, page, ino);
414 415 416 417
			up_read(&io->io_rwsem);
		}
		if (ret)
			__f2fs_submit_merged_write(sbi, type, temp);
J
Jaegeuk Kim 已提交
418 419 420 421 422

		/* TODO: use HOT temp only for meta pages now. */
		if (type >= META)
			break;
	}
423 424
}

425
void f2fs_submit_merged_write(struct f2fs_sb_info *sbi, enum page_type type)
426
{
427
	__submit_merged_write_cond(sbi, NULL, NULL, 0, type, true);
428 429
}

430
void f2fs_submit_merged_write_cond(struct f2fs_sb_info *sbi,
431 432
				struct inode *inode, struct page *page,
				nid_t ino, enum page_type type)
433
{
434
	__submit_merged_write_cond(sbi, inode, page, ino, type, false);
435 436
}

437
void f2fs_flush_merged_writes(struct f2fs_sb_info *sbi)
438
{
439 440 441
	f2fs_submit_merged_write(sbi, DATA);
	f2fs_submit_merged_write(sbi, NODE);
	f2fs_submit_merged_write(sbi, META);
442 443
}

444 445
/*
 * Fill the locked page with data located in the block address.
446
 * A caller needs to unlock the page on failure.
447
 */
448
int f2fs_submit_page_bio(struct f2fs_io_info *fio)
449 450
{
	struct bio *bio;
451 452
	struct page *page = fio->encrypted_page ?
			fio->encrypted_page : fio->page;
453

454
	if (!f2fs_is_valid_blkaddr(fio->sbi, fio->new_blkaddr,
C
Chao Yu 已提交
455 456
			fio->is_por ? META_POR : (__is_meta_io(fio) ?
			META_GENERIC : DATA_GENERIC_ENHANCE)))
457
		return -EFSCORRUPTED;
458

459
	trace_f2fs_submit_page_bio(page, fio);
460
	f2fs_trace_ios(fio, 0);
461 462

	/* Allocate a new bio */
463
	bio = __bio_alloc(fio, 1);
464

465
	if (bio_add_page(bio, page, PAGE_SIZE, 0) < PAGE_SIZE) {
466 467 468
		bio_put(bio);
		return -EFAULT;
	}
469 470

	if (fio->io_wbc && !is_read_io(fio->op))
471
		wbc_account_cgroup_owner(fio->io_wbc, page, PAGE_SIZE);
472

M
Mike Christie 已提交
473
	bio_set_op_attrs(bio, fio->op, fio->op_flags);
474

475 476
	inc_page_count(fio->sbi, is_read_io(fio->op) ?
			__read_io_type(page): WB_DATA_TYPE(fio->page));
C
Chao Yu 已提交
477 478

	__submit_bio(fio->sbi, bio, fio->type);
479 480 481
	return 0;
}

482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503
static bool page_is_mergeable(struct f2fs_sb_info *sbi, struct bio *bio,
				block_t last_blkaddr, block_t cur_blkaddr)
{
	if (last_blkaddr + 1 != cur_blkaddr)
		return false;
	return __same_bdev(sbi, cur_blkaddr, bio);
}

static bool io_type_is_mergeable(struct f2fs_bio_info *io,
						struct f2fs_io_info *fio)
{
	if (io->fio.op != fio->op)
		return false;
	return io->fio.op_flags == fio->op_flags;
}

static bool io_is_mergeable(struct f2fs_sb_info *sbi, struct bio *bio,
					struct f2fs_bio_info *io,
					struct f2fs_io_info *fio,
					block_t last_blkaddr,
					block_t cur_blkaddr)
{
504 505 506 507 508 509 510 511 512 513
	if (F2FS_IO_ALIGNED(sbi) && (fio->type == DATA || fio->type == NODE)) {
		unsigned int filled_blocks =
				F2FS_BYTES_TO_BLK(bio->bi_iter.bi_size);
		unsigned int io_size = F2FS_IO_SIZE(sbi);
		unsigned int left_vecs = bio->bi_max_vecs - bio->bi_vcnt;

		/* IOs in bio is aligned and left space of vectors is not enough */
		if (!(filled_blocks % io_size) && left_vecs < io_size)
			return false;
	}
514 515 516 517 518
	if (!page_is_mergeable(sbi, bio, last_blkaddr, cur_blkaddr))
		return false;
	return io_type_is_mergeable(io, fio);
}

C
Chao Yu 已提交
519 520 521 522 523 524 525 526
int f2fs_merge_page_bio(struct f2fs_io_info *fio)
{
	struct bio *bio = *fio->bio;
	struct page *page = fio->encrypted_page ?
			fio->encrypted_page : fio->page;

	if (!f2fs_is_valid_blkaddr(fio->sbi, fio->new_blkaddr,
			__is_meta_io(fio) ? META_GENERIC : DATA_GENERIC))
527
		return -EFSCORRUPTED;
C
Chao Yu 已提交
528 529 530 531

	trace_f2fs_submit_page_bio(page, fio);
	f2fs_trace_ios(fio, 0);

532 533
	if (bio && !page_is_mergeable(fio->sbi, bio, *fio->last_block,
						fio->new_blkaddr)) {
C
Chao Yu 已提交
534 535 536 537 538
		__submit_bio(fio->sbi, bio, fio->type);
		bio = NULL;
	}
alloc_new:
	if (!bio) {
539
		bio = __bio_alloc(fio, BIO_MAX_PAGES);
C
Chao Yu 已提交
540 541 542 543 544 545 546 547 548 549
		bio_set_op_attrs(bio, fio->op, fio->op_flags);
	}

	if (bio_add_page(bio, page, PAGE_SIZE, 0) < PAGE_SIZE) {
		__submit_bio(fio->sbi, bio, fio->type);
		bio = NULL;
		goto alloc_new;
	}

	if (fio->io_wbc)
550
		wbc_account_cgroup_owner(fio->io_wbc, page, PAGE_SIZE);
C
Chao Yu 已提交
551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572

	inc_page_count(fio->sbi, WB_DATA_TYPE(page));

	*fio->last_block = fio->new_blkaddr;
	*fio->bio = bio;

	return 0;
}

static void f2fs_submit_ipu_bio(struct f2fs_sb_info *sbi, struct bio **bio,
							struct page *page)
{
	if (!bio)
		return;

	if (!__has_merged_page(*bio, NULL, page, 0))
		return;

	__submit_bio(sbi, *bio, DATA);
	*bio = NULL;
}

573
void f2fs_submit_page_write(struct f2fs_io_info *fio)
574
{
575
	struct f2fs_sb_info *sbi = fio->sbi;
J
Jaegeuk Kim 已提交
576
	enum page_type btype = PAGE_TYPE_OF_BIO(fio->type);
J
Jaegeuk Kim 已提交
577
	struct f2fs_bio_info *io = sbi->write_io[btype] + fio->temp;
578
	struct page *bio_page;
579

580
	f2fs_bug_on(sbi, is_read_io(fio->op));
581

582 583 584 585 586 587
	down_write(&io->io_rwsem);
next:
	if (fio->in_list) {
		spin_lock(&io->io_lock);
		if (list_empty(&io->io_list)) {
			spin_unlock(&io->io_lock);
588
			goto out;
589 590 591 592 593 594
		}
		fio = list_first_entry(&io->io_list,
						struct f2fs_io_info, list);
		list_del(&fio->list);
		spin_unlock(&io->io_lock);
	}
595

C
Chao Yu 已提交
596
	verify_fio_blkaddr(fio);
597

598 599
	bio_page = fio->encrypted_page ? fio->encrypted_page : fio->page;

600 601
	/* set submitted = true as a return value */
	fio->submitted = true;
602

603
	inc_page_count(sbi, WB_DATA_TYPE(bio_page));
604

605 606
	if (io->bio && !io_is_mergeable(sbi, io->bio, io, fio,
			io->last_block_in_bio, fio->new_blkaddr))
J
Jaegeuk Kim 已提交
607
		__submit_merged_bio(io);
608 609
alloc_new:
	if (io->bio == NULL) {
610 611
		if ((fio->type == DATA || fio->type == NODE) &&
				fio->new_blkaddr & F2FS_IO_SIZE_MASK(sbi)) {
612
			dec_page_count(sbi, WB_DATA_TYPE(bio_page));
613 614
			fio->retry = true;
			goto skip;
615
		}
616
		io->bio = __bio_alloc(fio, BIO_MAX_PAGES);
J
Jaegeuk Kim 已提交
617
		io->fio = *fio;
618 619
	}

J
Jaegeuk Kim 已提交
620
	if (bio_add_page(io->bio, bio_page, PAGE_SIZE, 0) < PAGE_SIZE) {
J
Jaegeuk Kim 已提交
621
		__submit_merged_bio(io);
622 623 624
		goto alloc_new;
	}

625
	if (fio->io_wbc)
626
		wbc_account_cgroup_owner(fio->io_wbc, bio_page, PAGE_SIZE);
627

628
	io->last_block_in_bio = fio->new_blkaddr;
629
	f2fs_trace_ios(fio, 0);
630 631

	trace_f2fs_submit_page_write(fio->page, fio);
632
skip:
633 634
	if (fio->in_list)
		goto next;
635
out:
D
Daniel Rosenberg 已提交
636
	if (is_sbi_flag_set(sbi, SBI_IS_SHUTDOWN) ||
637
				!f2fs_is_checkpoint_ready(sbi))
J
Jaegeuk Kim 已提交
638
		__submit_merged_bio(io);
639
	up_write(&io->io_rwsem);
640 641
}

642
static struct bio *f2fs_grab_read_bio(struct inode *inode, block_t blkaddr,
643
					unsigned nr_pages, unsigned op_flag)
644 645 646
{
	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
	struct bio *bio;
647 648
	struct bio_post_read_ctx *ctx;
	unsigned int post_read_steps = 0;
649

650
	bio = f2fs_bio_alloc(sbi, min_t(int, nr_pages, BIO_MAX_PAGES), false);
651
	if (!bio)
652 653 654
		return ERR_PTR(-ENOMEM);
	f2fs_target_device(sbi, blkaddr, bio);
	bio->bi_end_io = f2fs_read_end_io;
655
	bio_set_op_attrs(bio, REQ_OP_READ, op_flag);
656

657 658 659 660 661 662 663 664 665 666 667 668 669
	if (f2fs_encrypted_file(inode))
		post_read_steps |= 1 << STEP_DECRYPT;
	if (post_read_steps) {
		ctx = mempool_alloc(bio_post_read_ctx_pool, GFP_NOFS);
		if (!ctx) {
			bio_put(bio);
			return ERR_PTR(-ENOMEM);
		}
		ctx->bio = bio;
		ctx->enabled_steps = post_read_steps;
		bio->bi_private = ctx;
	}

670 671 672 673 674 675 676
	return bio;
}

/* This can handle encryption stuffs */
static int f2fs_submit_page_read(struct inode *inode, struct page *page,
							block_t blkaddr)
{
C
Chao Yu 已提交
677 678
	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
	struct bio *bio;
679

C
Chao Yu 已提交
680
	bio = f2fs_grab_read_bio(inode, blkaddr, 1, 0);
681 682 683
	if (IS_ERR(bio))
		return PTR_ERR(bio);

684 685 686
	/* wait for GCed page writeback via META_MAPPING */
	f2fs_wait_on_block_writeback(inode, blkaddr);

687 688 689 690
	if (bio_add_page(bio, page, PAGE_SIZE, 0) < PAGE_SIZE) {
		bio_put(bio);
		return -EFAULT;
	}
691
	ClearPageError(page);
C
Chao Yu 已提交
692 693
	inc_page_count(sbi, F2FS_RD_DATA);
	__submit_bio(sbi, bio, DATA);
694 695 696
	return 0;
}

697 698 699 700
static void __set_data_blkaddr(struct dnode_of_data *dn)
{
	struct f2fs_node *rn = F2FS_NODE(dn->node_page);
	__le32 *addr_array;
701 702 703 704
	int base = 0;

	if (IS_INODE(dn->node_page) && f2fs_has_extra_attr(dn->inode))
		base = get_extra_isize(dn->inode);
705 706 707

	/* Get physical address of data block */
	addr_array = blkaddr_in_node(rn);
708
	addr_array[base + dn->ofs_in_node] = cpu_to_le32(dn->data_blkaddr);
709 710
}

J
Jaegeuk Kim 已提交
711
/*
712 713 714 715 716
 * Lock ordering for the change of data block address:
 * ->data_page
 *  ->node_page
 *    update block addresses in the node page
 */
C
Chao Yu 已提交
717
void f2fs_set_data_blkaddr(struct dnode_of_data *dn)
718
{
719
	f2fs_wait_on_page_writeback(dn->node_page, NODE, true, true);
720 721
	__set_data_blkaddr(dn);
	if (set_page_dirty(dn->node_page))
722
		dn->node_changed = true;
723 724
}

725 726 727
void f2fs_update_data_blkaddr(struct dnode_of_data *dn, block_t blkaddr)
{
	dn->data_blkaddr = blkaddr;
C
Chao Yu 已提交
728
	f2fs_set_data_blkaddr(dn);
729 730 731
	f2fs_update_extent_cache(dn);
}

732
/* dn->ofs_in_node will be returned with up-to-date last block pointer */
C
Chao Yu 已提交
733
int f2fs_reserve_new_blocks(struct dnode_of_data *dn, blkcnt_t count)
734
{
735
	struct f2fs_sb_info *sbi = F2FS_I_SB(dn->inode);
C
Chao Yu 已提交
736
	int err;
737

738 739 740
	if (!count)
		return 0;

741
	if (unlikely(is_inode_flag_set(dn->inode, FI_NO_ALLOC)))
742
		return -EPERM;
C
Chao Yu 已提交
743 744
	if (unlikely((err = inc_valid_block_count(sbi, dn->inode, &count))))
		return err;
745

746 747 748
	trace_f2fs_reserve_new_blocks(dn->inode, dn->nid,
						dn->ofs_in_node, count);

749
	f2fs_wait_on_page_writeback(dn->node_page, NODE, true, true);
750 751

	for (; count > 0; dn->ofs_in_node++) {
752 753
		block_t blkaddr = datablock_addr(dn->inode,
					dn->node_page, dn->ofs_in_node);
754 755 756 757 758 759 760 761 762
		if (blkaddr == NULL_ADDR) {
			dn->data_blkaddr = NEW_ADDR;
			__set_data_blkaddr(dn);
			count--;
		}
	}

	if (set_page_dirty(dn->node_page))
		dn->node_changed = true;
763 764 765
	return 0;
}

766
/* Should keep dn->ofs_in_node unchanged */
C
Chao Yu 已提交
767
int f2fs_reserve_new_block(struct dnode_of_data *dn)
768 769 770 771
{
	unsigned int ofs_in_node = dn->ofs_in_node;
	int ret;

C
Chao Yu 已提交
772
	ret = f2fs_reserve_new_blocks(dn, 1);
773 774 775 776
	dn->ofs_in_node = ofs_in_node;
	return ret;
}

777 778 779 780 781
int f2fs_reserve_block(struct dnode_of_data *dn, pgoff_t index)
{
	bool need_put = dn->inode_page ? false : true;
	int err;

C
Chao Yu 已提交
782
	err = f2fs_get_dnode_of_data(dn, index, ALLOC_NODE);
783 784
	if (err)
		return err;
785

786
	if (dn->data_blkaddr == NULL_ADDR)
C
Chao Yu 已提交
787
		err = f2fs_reserve_new_block(dn);
788
	if (err || need_put)
789 790 791 792
		f2fs_put_dnode(dn);
	return err;
}

793
int f2fs_get_block(struct dnode_of_data *dn, pgoff_t index)
794
{
795
	struct extent_info ei  = {0,0,0};
796
	struct inode *inode = dn->inode;
797

798 799 800
	if (f2fs_lookup_extent_cache(inode, index, &ei)) {
		dn->data_blkaddr = ei.blk + index - ei.fofs;
		return 0;
801
	}
802

803
	return f2fs_reserve_block(dn, index);
804 805
}

C
Chao Yu 已提交
806
struct page *f2fs_get_read_data_page(struct inode *inode, pgoff_t index,
M
Mike Christie 已提交
807
						int op_flags, bool for_write)
808 809 810 811
{
	struct address_space *mapping = inode->i_mapping;
	struct dnode_of_data dn;
	struct page *page;
812
	struct extent_info ei = {0,0,0};
813
	int err;
814

815
	page = f2fs_grab_cache_page(mapping, index, for_write);
816 817 818
	if (!page)
		return ERR_PTR(-ENOMEM);

C
Chao Yu 已提交
819 820
	if (f2fs_lookup_extent_cache(inode, index, &ei)) {
		dn.data_blkaddr = ei.blk + index - ei.fofs;
C
Chao Yu 已提交
821 822
		if (!f2fs_is_valid_blkaddr(F2FS_I_SB(inode), dn.data_blkaddr,
						DATA_GENERIC_ENHANCE_READ)) {
823
			err = -EFSCORRUPTED;
C
Chao Yu 已提交
824 825
			goto put_err;
		}
C
Chao Yu 已提交
826 827 828
		goto got_it;
	}

829
	set_new_dnode(&dn, inode, NULL, NULL, 0);
C
Chao Yu 已提交
830
	err = f2fs_get_dnode_of_data(&dn, index, LOOKUP_NODE);
831 832
	if (err)
		goto put_err;
833 834
	f2fs_put_dnode(&dn);

835
	if (unlikely(dn.data_blkaddr == NULL_ADDR)) {
836 837
		err = -ENOENT;
		goto put_err;
838
	}
C
Chao Yu 已提交
839 840 841 842
	if (dn.data_blkaddr != NEW_ADDR &&
			!f2fs_is_valid_blkaddr(F2FS_I_SB(inode),
						dn.data_blkaddr,
						DATA_GENERIC_ENHANCE)) {
843
		err = -EFSCORRUPTED;
C
Chao Yu 已提交
844 845
		goto put_err;
	}
C
Chao Yu 已提交
846
got_it:
847 848
	if (PageUptodate(page)) {
		unlock_page(page);
849
		return page;
850
	}
851

J
Jaegeuk Kim 已提交
852 853 854 855
	/*
	 * A new dentry page is allocated but not able to be written, since its
	 * new inode page couldn't be allocated due to -ENOSPC.
	 * In such the case, its blkaddr can be remained as NEW_ADDR.
C
Chao Yu 已提交
856 857
	 * see, f2fs_add_link -> f2fs_get_new_data_page ->
	 * f2fs_init_inode_metadata.
J
Jaegeuk Kim 已提交
858 859
	 */
	if (dn.data_blkaddr == NEW_ADDR) {
860
		zero_user_segment(page, 0, PAGE_SIZE);
861 862
		if (!PageUptodate(page))
			SetPageUptodate(page);
863
		unlock_page(page);
J
Jaegeuk Kim 已提交
864 865
		return page;
	}
866

867
	err = f2fs_submit_page_read(inode, page, dn.data_blkaddr);
868
	if (err)
869
		goto put_err;
870
	return page;
871 872 873 874

put_err:
	f2fs_put_page(page, 1);
	return ERR_PTR(err);
875 876
}

C
Chao Yu 已提交
877
struct page *f2fs_find_data_page(struct inode *inode, pgoff_t index)
878 879 880 881 882 883 884 885 886
{
	struct address_space *mapping = inode->i_mapping;
	struct page *page;

	page = find_get_page(mapping, index);
	if (page && PageUptodate(page))
		return page;
	f2fs_put_page(page, 0);

C
Chao Yu 已提交
887
	page = f2fs_get_read_data_page(inode, index, 0, false);
888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906
	if (IS_ERR(page))
		return page;

	if (PageUptodate(page))
		return page;

	wait_on_page_locked(page);
	if (unlikely(!PageUptodate(page))) {
		f2fs_put_page(page, 0);
		return ERR_PTR(-EIO);
	}
	return page;
}

/*
 * If it tries to access a hole, return an error.
 * Because, the callers, functions in dir.c and GC, should be able to know
 * whether this page exists or not.
 */
C
Chao Yu 已提交
907
struct page *f2fs_get_lock_data_page(struct inode *inode, pgoff_t index,
908
							bool for_write)
909 910 911 912
{
	struct address_space *mapping = inode->i_mapping;
	struct page *page;
repeat:
C
Chao Yu 已提交
913
	page = f2fs_get_read_data_page(inode, index, 0, for_write);
914 915
	if (IS_ERR(page))
		return page;
916

917
	/* wait for read completion */
918
	lock_page(page);
919
	if (unlikely(page->mapping != mapping)) {
920 921
		f2fs_put_page(page, 1);
		goto repeat;
922
	}
923 924 925 926
	if (unlikely(!PageUptodate(page))) {
		f2fs_put_page(page, 1);
		return ERR_PTR(-EIO);
	}
927 928 929
	return page;
}

J
Jaegeuk Kim 已提交
930
/*
931 932
 * Caller ensures that this data page is never allocated.
 * A new zero-filled data page is allocated in the page cache.
933
 *
C
Chao Yu 已提交
934 935
 * Also, caller should grab and release a rwsem by calling f2fs_lock_op() and
 * f2fs_unlock_op().
936 937
 * Note that, ipage is set only by make_empty_dir, and if any error occur,
 * ipage should be released by this function.
938
 */
C
Chao Yu 已提交
939
struct page *f2fs_get_new_data_page(struct inode *inode,
940
		struct page *ipage, pgoff_t index, bool new_i_size)
941 942 943 944 945
{
	struct address_space *mapping = inode->i_mapping;
	struct page *page;
	struct dnode_of_data dn;
	int err;
946

947
	page = f2fs_grab_cache_page(mapping, index, true);
948 949 950 951 952 953
	if (!page) {
		/*
		 * before exiting, we should make sure ipage will be released
		 * if any error occur.
		 */
		f2fs_put_page(ipage, 1);
954
		return ERR_PTR(-ENOMEM);
955
	}
956

957
	set_new_dnode(&dn, inode, ipage, NULL, 0);
958
	err = f2fs_reserve_block(&dn, index);
959 960
	if (err) {
		f2fs_put_page(page, 1);
961
		return ERR_PTR(err);
962
	}
963 964
	if (!ipage)
		f2fs_put_dnode(&dn);
965 966

	if (PageUptodate(page))
967
		goto got_it;
968 969

	if (dn.data_blkaddr == NEW_ADDR) {
970
		zero_user_segment(page, 0, PAGE_SIZE);
971 972
		if (!PageUptodate(page))
			SetPageUptodate(page);
973
	} else {
974
		f2fs_put_page(page, 1);
975

976 977
		/* if ipage exists, blkaddr should be NEW_ADDR */
		f2fs_bug_on(F2FS_I_SB(inode), ipage);
C
Chao Yu 已提交
978
		page = f2fs_get_lock_data_page(inode, index, true);
979
		if (IS_ERR(page))
980
			return page;
981
	}
982
got_it:
C
Chao Yu 已提交
983
	if (new_i_size && i_size_read(inode) <
984
				((loff_t)(index + 1) << PAGE_SHIFT))
985
		f2fs_i_size_write(inode, ((loff_t)(index + 1) << PAGE_SHIFT));
986 987 988
	return page;
}

989
static int __allocate_data_block(struct dnode_of_data *dn, int seg_type)
990
{
991
	struct f2fs_sb_info *sbi = F2FS_I_SB(dn->inode);
992 993
	struct f2fs_summary sum;
	struct node_info ni;
994
	block_t old_blkaddr;
995
	blkcnt_t count = 1;
C
Chao Yu 已提交
996
	int err;
997

998
	if (unlikely(is_inode_flag_set(dn->inode, FI_NO_ALLOC)))
999
		return -EPERM;
1000

1001 1002 1003 1004
	err = f2fs_get_node_info(sbi, dn->nid, &ni);
	if (err)
		return err;

1005 1006
	dn->data_blkaddr = datablock_addr(dn->inode,
				dn->node_page, dn->ofs_in_node);
1007
	if (dn->data_blkaddr != NULL_ADDR)
1008 1009
		goto alloc;

C
Chao Yu 已提交
1010 1011
	if (unlikely((err = inc_valid_block_count(sbi, dn->inode, &count))))
		return err;
1012

1013
alloc:
1014
	set_summary(&sum, dn->nid, dn->ofs_in_node, ni.version);
1015 1016
	old_blkaddr = dn->data_blkaddr;
	f2fs_allocate_data_block(sbi, NULL, old_blkaddr, &dn->data_blkaddr,
1017
					&sum, seg_type, NULL, false);
1018 1019 1020
	if (GET_SEGNO(sbi, old_blkaddr) != NULL_SEGNO)
		invalidate_mapping_pages(META_MAPPING(sbi),
					old_blkaddr, old_blkaddr);
C
Chao Yu 已提交
1021
	f2fs_set_data_blkaddr(dn);
1022

1023 1024 1025 1026
	/*
	 * i_size will be updated by direct_IO. Otherwise, we'll get stale
	 * data from unwritten block via dio_read.
	 */
1027 1028 1029
	return 0;
}

1030
int f2fs_preallocate_blocks(struct kiocb *iocb, struct iov_iter *from)
1031
{
1032
	struct inode *inode = file_inode(iocb->ki_filp);
C
Chao Yu 已提交
1033
	struct f2fs_map_blocks map;
1034
	int flag;
1035
	int err = 0;
1036
	bool direct_io = iocb->ki_flags & IOCB_DIRECT;
1037

1038
	/* convert inline data for Direct I/O*/
1039
	if (direct_io) {
1040 1041 1042 1043 1044
		err = f2fs_convert_inline_inode(inode);
		if (err)
			return err;
	}

C
Chao Yu 已提交
1045 1046 1047
	if (direct_io && allow_outplace_dio(inode, iocb, from))
		return 0;

1048 1049 1050
	if (is_inode_flag_set(inode, FI_NO_PREALLOC))
		return 0;

1051
	map.m_lblk = F2FS_BLK_ALIGN(iocb->ki_pos);
1052 1053 1054 1055 1056 1057
	map.m_len = F2FS_BYTES_TO_BLK(iocb->ki_pos + iov_iter_count(from));
	if (map.m_len > map.m_lblk)
		map.m_len -= map.m_lblk;
	else
		map.m_len = 0;

1058
	map.m_next_pgofs = NULL;
1059
	map.m_next_extent = NULL;
1060
	map.m_seg_type = NO_CHECK_TYPE;
C
Chao Yu 已提交
1061
	map.m_may_create = true;
1062

1063
	if (direct_io) {
C
Chao Yu 已提交
1064
		map.m_seg_type = f2fs_rw_hint_to_seg_type(iocb->ki_hint);
1065
		flag = f2fs_force_buffered_io(inode, iocb, from) ?
1066 1067 1068
					F2FS_GET_BLOCK_PRE_AIO :
					F2FS_GET_BLOCK_PRE_DIO;
		goto map_blocks;
1069
	}
C
Chao Yu 已提交
1070
	if (iocb->ki_pos + iov_iter_count(from) > MAX_INLINE_DATA(inode)) {
1071 1072 1073
		err = f2fs_convert_inline_inode(inode);
		if (err)
			return err;
1074
	}
1075
	if (f2fs_has_inline_data(inode))
1076
		return err;
1077 1078 1079 1080 1081 1082 1083 1084 1085

	flag = F2FS_GET_BLOCK_PRE_AIO;

map_blocks:
	err = f2fs_map_blocks(inode, &map, 1, flag);
	if (map.m_len > 0 && err == -ENOSPC) {
		if (!direct_io)
			set_inode_flag(inode, FI_NO_PREALLOC);
		err = 0;
1086
	}
1087
	return err;
1088 1089
}

C
Chao Yu 已提交
1090
void __do_map_lock(struct f2fs_sb_info *sbi, int flag, bool lock)
1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104
{
	if (flag == F2FS_GET_BLOCK_PRE_AIO) {
		if (lock)
			down_read(&sbi->node_change);
		else
			up_read(&sbi->node_change);
	} else {
		if (lock)
			f2fs_lock_op(sbi);
		else
			f2fs_unlock_op(sbi);
	}
}

J
Jaegeuk Kim 已提交
1105
/*
J
Jaegeuk Kim 已提交
1106 1107
 * f2fs_map_blocks() now supported readahead/bmap/rw direct_IO with
 * f2fs_map_blocks structure.
C
Chao Yu 已提交
1108 1109 1110 1111 1112
 * If original data blocks are allocated, then give them to blockdev.
 * Otherwise,
 *     a. preallocate requested block addresses
 *     b. do not use extent cache for better performance
 *     c. give the block addresses to blockdev
1113
 */
C
Chao Yu 已提交
1114
int f2fs_map_blocks(struct inode *inode, struct f2fs_map_blocks *map,
C
Chao Yu 已提交
1115
						int create, int flag)
1116
{
J
Jaegeuk Kim 已提交
1117
	unsigned int maxblocks = map->m_len;
1118
	struct dnode_of_data dn;
1119
	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
C
Chao Yu 已提交
1120
	int mode = map->m_may_create ? ALLOC_NODE : LOOKUP_NODE;
1121
	pgoff_t pgofs, end_offset, end;
1122
	int err = 0, ofs = 1;
1123 1124
	unsigned int ofs_in_node, last_ofs_in_node;
	blkcnt_t prealloc;
1125
	struct extent_info ei = {0,0,0};
1126
	block_t blkaddr;
1127
	unsigned int start_pgofs;
1128

1129 1130 1131
	if (!maxblocks)
		return 0;

J
Jaegeuk Kim 已提交
1132 1133 1134 1135 1136
	map->m_len = 0;
	map->m_flags = 0;

	/* it only supports block size == page size */
	pgofs =	(pgoff_t)map->m_lblk;
1137
	end = pgofs + maxblocks;
1138

1139
	if (!create && f2fs_lookup_extent_cache(inode, pgofs, &ei)) {
1140 1141 1142 1143
		if (test_opt(sbi, LFS) && flag == F2FS_GET_BLOCK_DIO &&
							map->m_may_create)
			goto next_dnode;

J
Jaegeuk Kim 已提交
1144 1145 1146
		map->m_pblk = ei.blk + pgofs - ei.fofs;
		map->m_len = min((pgoff_t)maxblocks, ei.fofs + ei.len - pgofs);
		map->m_flags = F2FS_MAP_MAPPED;
1147 1148
		if (map->m_next_extent)
			*map->m_next_extent = pgofs + map->m_len;
1149 1150 1151 1152 1153

		/* for hardware encryption, but to avoid potential issue in future */
		if (flag == F2FS_GET_BLOCK_DIO)
			f2fs_wait_on_block_writeback_range(inode,
						map->m_pblk, map->m_len);
1154
		goto out;
1155
	}
1156

C
Chao Yu 已提交
1157
next_dnode:
C
Chao Yu 已提交
1158
	if (map->m_may_create)
1159
		__do_map_lock(sbi, flag, true);
1160 1161 1162

	/* When reading holes, we need its node page */
	set_new_dnode(&dn, inode, NULL, NULL, 0);
C
Chao Yu 已提交
1163
	err = f2fs_get_dnode_of_data(&dn, pgofs, mode);
1164
	if (err) {
C
Chao Yu 已提交
1165 1166
		if (flag == F2FS_GET_BLOCK_BMAP)
			map->m_pblk = 0;
1167
		if (err == -ENOENT) {
1168
			err = 0;
1169 1170
			if (map->m_next_pgofs)
				*map->m_next_pgofs =
C
Chao Yu 已提交
1171
					f2fs_get_next_page_offset(&dn, pgofs);
1172 1173
			if (map->m_next_extent)
				*map->m_next_extent =
C
Chao Yu 已提交
1174
					f2fs_get_next_page_offset(&dn, pgofs);
1175
		}
1176
		goto unlock_out;
1177
	}
C
Chao Yu 已提交
1178

1179
	start_pgofs = pgofs;
1180
	prealloc = 0;
1181
	last_ofs_in_node = ofs_in_node = dn.ofs_in_node;
1182
	end_offset = ADDRS_PER_PAGE(dn.node_page, inode);
C
Chao Yu 已提交
1183 1184

next_block:
1185
	blkaddr = datablock_addr(dn.inode, dn.node_page, dn.ofs_in_node);
C
Chao Yu 已提交
1186

1187
	if (__is_valid_data_blkaddr(blkaddr) &&
C
Chao Yu 已提交
1188
		!f2fs_is_valid_blkaddr(sbi, blkaddr, DATA_GENERIC_ENHANCE)) {
1189
		err = -EFSCORRUPTED;
1190 1191 1192
		goto sync_out;
	}

C
Chao Yu 已提交
1193
	if (__is_valid_data_blkaddr(blkaddr)) {
1194
		/* use out-place-update for driect IO under LFS mode */
C
Chao Yu 已提交
1195 1196
		if (test_opt(sbi, LFS) && flag == F2FS_GET_BLOCK_DIO &&
							map->m_may_create) {
1197
			err = __allocate_data_block(&dn, map->m_seg_type);
1198 1199
			if (!err) {
				blkaddr = dn.data_blkaddr;
1200
				set_inode_flag(inode, FI_APPEND_WRITE);
1201
			}
1202 1203
		}
	} else {
C
Chao Yu 已提交
1204
		if (create) {
1205 1206
			if (unlikely(f2fs_cp_error(sbi))) {
				err = -EIO;
C
Chao Yu 已提交
1207
				goto sync_out;
1208
			}
1209
			if (flag == F2FS_GET_BLOCK_PRE_AIO) {
1210 1211 1212 1213
				if (blkaddr == NULL_ADDR) {
					prealloc++;
					last_ofs_in_node = dn.ofs_in_node;
				}
1214
			} else {
1215 1216
				WARN_ON(flag != F2FS_GET_BLOCK_PRE_DIO &&
					flag != F2FS_GET_BLOCK_DIO);
1217 1218
				err = __allocate_data_block(&dn,
							map->m_seg_type);
1219
				if (!err)
1220
					set_inode_flag(inode, FI_APPEND_WRITE);
1221
			}
C
Chao Yu 已提交
1222
			if (err)
C
Chao Yu 已提交
1223
				goto sync_out;
1224
			map->m_flags |= F2FS_MAP_NEW;
C
Chao Yu 已提交
1225
			blkaddr = dn.data_blkaddr;
C
Chao Yu 已提交
1226
		} else {
C
Chao Yu 已提交
1227 1228 1229 1230
			if (flag == F2FS_GET_BLOCK_BMAP) {
				map->m_pblk = 0;
				goto sync_out;
			}
1231 1232
			if (flag == F2FS_GET_BLOCK_PRECACHE)
				goto sync_out;
1233 1234 1235 1236
			if (flag == F2FS_GET_BLOCK_FIEMAP &&
						blkaddr == NULL_ADDR) {
				if (map->m_next_pgofs)
					*map->m_next_pgofs = pgofs + 1;
C
Chao Yu 已提交
1237
				goto sync_out;
1238
			}
1239 1240 1241 1242
			if (flag != F2FS_GET_BLOCK_FIEMAP) {
				/* for defragment case */
				if (map->m_next_pgofs)
					*map->m_next_pgofs = pgofs + 1;
C
Chao Yu 已提交
1243
				goto sync_out;
1244
			}
C
Chao Yu 已提交
1245 1246
		}
	}
1247

1248 1249 1250
	if (flag == F2FS_GET_BLOCK_PRE_AIO)
		goto skip;

C
Chao Yu 已提交
1251 1252 1253 1254 1255 1256 1257 1258 1259 1260
	if (map->m_len == 0) {
		/* preallocated unwritten block should be mapped for fiemap. */
		if (blkaddr == NEW_ADDR)
			map->m_flags |= F2FS_MAP_UNWRITTEN;
		map->m_flags |= F2FS_MAP_MAPPED;

		map->m_pblk = blkaddr;
		map->m_len = 1;
	} else if ((map->m_pblk != NEW_ADDR &&
			blkaddr == (map->m_pblk + ofs)) ||
1261
			(map->m_pblk == NEW_ADDR && blkaddr == NEW_ADDR) ||
1262
			flag == F2FS_GET_BLOCK_PRE_DIO) {
C
Chao Yu 已提交
1263 1264 1265 1266 1267
		ofs++;
		map->m_len++;
	} else {
		goto sync_out;
	}
1268

1269
skip:
1270 1271 1272
	dn.ofs_in_node++;
	pgofs++;

1273 1274 1275
	/* preallocate blocks in batch for one dnode page */
	if (flag == F2FS_GET_BLOCK_PRE_AIO &&
			(pgofs == end || dn.ofs_in_node == end_offset)) {
1276

1277
		dn.ofs_in_node = ofs_in_node;
C
Chao Yu 已提交
1278
		err = f2fs_reserve_new_blocks(&dn, prealloc);
1279 1280
		if (err)
			goto sync_out;
1281

1282 1283 1284 1285
		map->m_len += dn.ofs_in_node - ofs_in_node;
		if (prealloc && dn.ofs_in_node != last_ofs_in_node + 1) {
			err = -ENOSPC;
			goto sync_out;
1286
		}
1287 1288 1289 1290 1291 1292 1293 1294
		dn.ofs_in_node = end_offset;
	}

	if (pgofs >= end)
		goto sync_out;
	else if (dn.ofs_in_node < end_offset)
		goto next_block;

1295 1296 1297 1298 1299 1300 1301 1302 1303 1304
	if (flag == F2FS_GET_BLOCK_PRECACHE) {
		if (map->m_flags & F2FS_MAP_MAPPED) {
			unsigned int ofs = start_pgofs - map->m_lblk;

			f2fs_update_extent_cache_range(&dn,
				start_pgofs, map->m_pblk + ofs,
				map->m_len - ofs);
		}
	}

1305 1306
	f2fs_put_dnode(&dn);

C
Chao Yu 已提交
1307
	if (map->m_may_create) {
1308
		__do_map_lock(sbi, flag, false);
1309
		f2fs_balance_fs(sbi, dn.node_changed);
1310
	}
1311
	goto next_dnode;
1312

1313
sync_out:
1314 1315 1316 1317 1318 1319

	/* for hardware encryption, but to avoid potential issue in future */
	if (flag == F2FS_GET_BLOCK_DIO && map->m_flags & F2FS_MAP_MAPPED)
		f2fs_wait_on_block_writeback_range(inode,
						map->m_pblk, map->m_len);

1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330
	if (flag == F2FS_GET_BLOCK_PRECACHE) {
		if (map->m_flags & F2FS_MAP_MAPPED) {
			unsigned int ofs = start_pgofs - map->m_lblk;

			f2fs_update_extent_cache_range(&dn,
				start_pgofs, map->m_pblk + ofs,
				map->m_len - ofs);
		}
		if (map->m_next_extent)
			*map->m_next_extent = pgofs + 1;
	}
1331
	f2fs_put_dnode(&dn);
1332
unlock_out:
C
Chao Yu 已提交
1333
	if (map->m_may_create) {
1334
		__do_map_lock(sbi, flag, false);
1335
		f2fs_balance_fs(sbi, dn.node_changed);
1336
	}
1337
out:
J
Jaegeuk Kim 已提交
1338
	trace_f2fs_map_blocks(inode, map, err);
1339
	return err;
1340 1341
}

H
Hyunchul Lee 已提交
1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354
bool f2fs_overwrite_io(struct inode *inode, loff_t pos, size_t len)
{
	struct f2fs_map_blocks map;
	block_t last_lblk;
	int err;

	if (pos + len > i_size_read(inode))
		return false;

	map.m_lblk = F2FS_BYTES_TO_BLK(pos);
	map.m_next_pgofs = NULL;
	map.m_next_extent = NULL;
	map.m_seg_type = NO_CHECK_TYPE;
1355
	map.m_may_create = false;
H
Hyunchul Lee 已提交
1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367
	last_lblk = F2FS_BLK_ALIGN(pos + len);

	while (map.m_lblk < last_lblk) {
		map.m_len = last_lblk - map.m_lblk;
		err = f2fs_map_blocks(inode, &map, 0, F2FS_GET_BLOCK_DEFAULT);
		if (err || map.m_len == 0)
			return false;
		map.m_lblk += map.m_len;
	}
	return true;
}

J
Jaegeuk Kim 已提交
1368
static int __get_data_block(struct inode *inode, sector_t iblock,
1369
			struct buffer_head *bh, int create, int flag,
C
Chao Yu 已提交
1370
			pgoff_t *next_pgofs, int seg_type, bool may_write)
J
Jaegeuk Kim 已提交
1371 1372
{
	struct f2fs_map_blocks map;
1373
	int err;
J
Jaegeuk Kim 已提交
1374 1375 1376

	map.m_lblk = iblock;
	map.m_len = bh->b_size >> inode->i_blkbits;
1377
	map.m_next_pgofs = next_pgofs;
1378
	map.m_next_extent = NULL;
1379
	map.m_seg_type = seg_type;
C
Chao Yu 已提交
1380
	map.m_may_create = may_write;
J
Jaegeuk Kim 已提交
1381

1382 1383
	err = f2fs_map_blocks(inode, &map, create, flag);
	if (!err) {
J
Jaegeuk Kim 已提交
1384 1385
		map_bh(bh, inode->i_sb, map.m_pblk);
		bh->b_state = (bh->b_state & ~F2FS_MAP_FLAGS) | map.m_flags;
1386
		bh->b_size = (u64)map.m_len << inode->i_blkbits;
J
Jaegeuk Kim 已提交
1387
	}
1388
	return err;
J
Jaegeuk Kim 已提交
1389 1390
}

1391
static int get_data_block(struct inode *inode, sector_t iblock,
1392 1393
			struct buffer_head *bh_result, int create, int flag,
			pgoff_t *next_pgofs)
C
Chao Yu 已提交
1394
{
1395
	return __get_data_block(inode, iblock, bh_result, create,
1396
							flag, next_pgofs,
C
Chao Yu 已提交
1397 1398 1399 1400 1401 1402 1403 1404 1405
							NO_CHECK_TYPE, create);
}

static int get_data_block_dio_write(struct inode *inode, sector_t iblock,
			struct buffer_head *bh_result, int create)
{
	return __get_data_block(inode, iblock, bh_result, create,
				F2FS_GET_BLOCK_DIO, NULL,
				f2fs_rw_hint_to_seg_type(inode->i_write_hint),
1406
				IS_SWAPFILE(inode) ? false : true);
C
Chao Yu 已提交
1407 1408 1409
}

static int get_data_block_dio(struct inode *inode, sector_t iblock,
1410 1411
			struct buffer_head *bh_result, int create)
{
C
Chao Yu 已提交
1412
	return __get_data_block(inode, iblock, bh_result, create,
C
Chao Yu 已提交
1413 1414 1415
				F2FS_GET_BLOCK_DIO, NULL,
				f2fs_rw_hint_to_seg_type(inode->i_write_hint),
				false);
1416 1417
}

C
Chao Yu 已提交
1418
static int get_data_block_bmap(struct inode *inode, sector_t iblock,
1419 1420
			struct buffer_head *bh_result, int create)
{
1421
	/* Block number less than F2FS MAX BLOCKS */
C
Chao Yu 已提交
1422
	if (unlikely(iblock >= F2FS_I_SB(inode)->max_file_blocks))
1423 1424
		return -EFBIG;

C
Chao Yu 已提交
1425
	return __get_data_block(inode, iblock, bh_result, create,
1426
						F2FS_GET_BLOCK_BMAP, NULL,
C
Chao Yu 已提交
1427
						NO_CHECK_TYPE, create);
1428 1429
}

1430 1431 1432 1433 1434 1435 1436 1437 1438 1439
static inline sector_t logical_to_blk(struct inode *inode, loff_t offset)
{
	return (offset >> inode->i_blkbits);
}

static inline loff_t blk_to_logical(struct inode *inode, sector_t blk)
{
	return (blk << inode->i_blkbits);
}

C
Chao Yu 已提交
1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458
static int f2fs_xattr_fiemap(struct inode *inode,
				struct fiemap_extent_info *fieinfo)
{
	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
	struct page *page;
	struct node_info ni;
	__u64 phys = 0, len;
	__u32 flags;
	nid_t xnid = F2FS_I(inode)->i_xattr_nid;
	int err = 0;

	if (f2fs_has_inline_xattr(inode)) {
		int offset;

		page = f2fs_grab_cache_page(NODE_MAPPING(sbi),
						inode->i_ino, false);
		if (!page)
			return -ENOMEM;

1459 1460 1461 1462 1463
		err = f2fs_get_node_info(sbi, inode->i_ino, &ni);
		if (err) {
			f2fs_put_page(page, 1);
			return err;
		}
C
Chao Yu 已提交
1464 1465 1466 1467

		phys = (__u64)blk_to_logical(inode, ni.blk_addr);
		offset = offsetof(struct f2fs_inode, i_addr) +
					sizeof(__le32) * (DEF_ADDRS_PER_INODE -
1468
					get_inline_xattr_addrs(inode));
C
Chao Yu 已提交
1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489

		phys += offset;
		len = inline_xattr_size(inode);

		f2fs_put_page(page, 1);

		flags = FIEMAP_EXTENT_DATA_INLINE | FIEMAP_EXTENT_NOT_ALIGNED;

		if (!xnid)
			flags |= FIEMAP_EXTENT_LAST;

		err = fiemap_fill_next_extent(fieinfo, 0, phys, len, flags);
		if (err || err == 1)
			return err;
	}

	if (xnid) {
		page = f2fs_grab_cache_page(NODE_MAPPING(sbi), xnid, false);
		if (!page)
			return -ENOMEM;

1490 1491 1492 1493 1494
		err = f2fs_get_node_info(sbi, xnid, &ni);
		if (err) {
			f2fs_put_page(page, 1);
			return err;
		}
C
Chao Yu 已提交
1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509

		phys = (__u64)blk_to_logical(inode, ni.blk_addr);
		len = inode->i_sb->s_blocksize;

		f2fs_put_page(page, 1);

		flags = FIEMAP_EXTENT_LAST;
	}

	if (phys)
		err = fiemap_fill_next_extent(fieinfo, 0, phys, len, flags);

	return (err < 0 ? err : 0);
}

J
Jaegeuk Kim 已提交
1510 1511 1512
int f2fs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
		u64 start, u64 len)
{
1513 1514
	struct buffer_head map_bh;
	sector_t start_blk, last_blk;
1515
	pgoff_t next_pgofs;
1516 1517 1518 1519
	u64 logical = 0, phys = 0, size = 0;
	u32 flags = 0;
	int ret = 0;

1520 1521 1522 1523 1524 1525
	if (fieinfo->fi_flags & FIEMAP_FLAG_CACHE) {
		ret = f2fs_precache_extents(inode);
		if (ret)
			return ret;
	}

C
Chao Yu 已提交
1526
	ret = fiemap_check_flags(fieinfo, FIEMAP_FLAG_SYNC | FIEMAP_FLAG_XATTR);
1527 1528 1529
	if (ret)
		return ret;

1530 1531
	inode_lock(inode);

C
Chao Yu 已提交
1532 1533 1534 1535 1536
	if (fieinfo->fi_flags & FIEMAP_FLAG_XATTR) {
		ret = f2fs_xattr_fiemap(inode, fieinfo);
		goto out;
	}

1537
	if (f2fs_has_inline_data(inode) || f2fs_has_inline_dentry(inode)) {
J
Jaegeuk Kim 已提交
1538 1539
		ret = f2fs_inline_data_fiemap(inode, fieinfo, start, len);
		if (ret != -EAGAIN)
1540
			goto out;
J
Jaegeuk Kim 已提交
1541 1542
	}

1543 1544 1545 1546 1547
	if (logical_to_blk(inode, len) == 0)
		len = blk_to_logical(inode, 1);

	start_blk = logical_to_blk(inode, start);
	last_blk = logical_to_blk(inode, start + len - 1);
1548

1549 1550 1551 1552
next:
	memset(&map_bh, 0, sizeof(struct buffer_head));
	map_bh.b_size = len;

C
Chao Yu 已提交
1553
	ret = get_data_block(inode, start_blk, &map_bh, 0,
1554
					F2FS_GET_BLOCK_FIEMAP, &next_pgofs);
1555 1556 1557 1558 1559
	if (ret)
		goto out;

	/* HOLE */
	if (!buffer_mapped(&map_bh)) {
1560
		start_blk = next_pgofs;
1561 1562 1563

		if (blk_to_logical(inode, start_blk) < blk_to_logical(inode,
					F2FS_I_SB(inode)->max_file_blocks))
1564
			goto prep_next;
1565

1566 1567
		flags |= FIEMAP_EXTENT_LAST;
	}
1568

1569
	if (size) {
1570
		if (IS_ENCRYPTED(inode))
1571 1572
			flags |= FIEMAP_EXTENT_DATA_ENCRYPTED;

1573 1574
		ret = fiemap_fill_next_extent(fieinfo, logical,
				phys, size, flags);
1575
	}
1576

1577 1578
	if (start_blk > last_blk || ret)
		goto out;
1579

1580 1581 1582 1583 1584 1585
	logical = blk_to_logical(inode, start_blk);
	phys = blk_to_logical(inode, map_bh.b_blocknr);
	size = map_bh.b_size;
	flags = 0;
	if (buffer_unwritten(&map_bh))
		flags = FIEMAP_EXTENT_UNWRITTEN;
1586

1587
	start_blk += logical_to_blk(inode, size);
1588

1589
prep_next:
1590 1591 1592 1593 1594 1595 1596 1597 1598
	cond_resched();
	if (fatal_signal_pending(current))
		ret = -EINTR;
	else
		goto next;
out:
	if (ret == 1)
		ret = 0;

A
Al Viro 已提交
1599
	inode_unlock(inode);
1600
	return ret;
J
Jaegeuk Kim 已提交
1601 1602
}

1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618
static int f2fs_read_single_page(struct inode *inode, struct page *page,
					unsigned nr_pages,
					struct f2fs_map_blocks *map,
					struct bio **bio_ret,
					sector_t *last_block_in_bio,
					bool is_readahead)
{
	struct bio *bio = *bio_ret;
	const unsigned blkbits = inode->i_blkbits;
	const unsigned blocksize = 1 << blkbits;
	sector_t block_in_file;
	sector_t last_block;
	sector_t last_block_in_file;
	sector_t block_nr;
	int ret = 0;

J
Jaegeuk Kim 已提交
1619
	block_in_file = (sector_t)page_index(page);
1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651
	last_block = block_in_file + nr_pages;
	last_block_in_file = (i_size_read(inode) + blocksize - 1) >>
							blkbits;
	if (last_block > last_block_in_file)
		last_block = last_block_in_file;

	/* just zeroing out page which is beyond EOF */
	if (block_in_file >= last_block)
		goto zero_out;
	/*
	 * Map blocks using the previous result first.
	 */
	if ((map->m_flags & F2FS_MAP_MAPPED) &&
			block_in_file > map->m_lblk &&
			block_in_file < (map->m_lblk + map->m_len))
		goto got_it;

	/*
	 * Then do more f2fs_map_blocks() calls until we are
	 * done with this page.
	 */
	map->m_lblk = block_in_file;
	map->m_len = last_block - block_in_file;

	ret = f2fs_map_blocks(inode, map, 0, F2FS_GET_BLOCK_DEFAULT);
	if (ret)
		goto out;
got_it:
	if ((map->m_flags & F2FS_MAP_MAPPED)) {
		block_nr = map->m_pblk + block_in_file - map->m_lblk;
		SetPageMappedToDisk(page);

J
Jaegeuk Kim 已提交
1652 1653
		if (!PageUptodate(page) && (!PageSwapCache(page) &&
					!cleancache_get_page(page))) {
1654 1655 1656 1657 1658
			SetPageUptodate(page);
			goto confused;
		}

		if (!f2fs_is_valid_blkaddr(F2FS_I_SB(inode), block_nr,
C
Chao Yu 已提交
1659
						DATA_GENERIC_ENHANCE_READ)) {
1660
			ret = -EFSCORRUPTED;
1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675
			goto out;
		}
	} else {
zero_out:
		zero_user_segment(page, 0, PAGE_SIZE);
		if (!PageUptodate(page))
			SetPageUptodate(page);
		unlock_page(page);
		goto out;
	}

	/*
	 * This page will go to BIO.  Do we need to send this
	 * BIO off first?
	 */
1676 1677
	if (bio && !page_is_mergeable(F2FS_I_SB(inode), bio,
				*last_block_in_bio, block_nr)) {
1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715
submit_and_realloc:
		__submit_bio(F2FS_I_SB(inode), bio, DATA);
		bio = NULL;
	}
	if (bio == NULL) {
		bio = f2fs_grab_read_bio(inode, block_nr, nr_pages,
				is_readahead ? REQ_RAHEAD : 0);
		if (IS_ERR(bio)) {
			ret = PTR_ERR(bio);
			bio = NULL;
			goto out;
		}
	}

	/*
	 * If the page is under writeback, we need to wait for
	 * its completion to see the correct decrypted data.
	 */
	f2fs_wait_on_block_writeback(inode, block_nr);

	if (bio_add_page(bio, page, blocksize, 0) < blocksize)
		goto submit_and_realloc;

	inc_page_count(F2FS_I_SB(inode), F2FS_RD_DATA);
	ClearPageError(page);
	*last_block_in_bio = block_nr;
	goto out;
confused:
	if (bio) {
		__submit_bio(F2FS_I_SB(inode), bio, DATA);
		bio = NULL;
	}
	unlock_page(page);
out:
	*bio_ret = bio;
	return ret;
}

J
Jaegeuk Kim 已提交
1716 1717 1718
/*
 * This function was originally taken from fs/mpage.c, and customized for f2fs.
 * Major change was from block_size == page_size in f2fs by default.
1719 1720 1721 1722 1723
 *
 * Note that the aops->readpages() function is ONLY used for read-ahead. If
 * this function ever deviates from doing just read-ahead, it should either
 * use ->readpage() or do the necessary surgery to decouple ->readpages()
 * from read-ahead.
J
Jaegeuk Kim 已提交
1724 1725 1726
 */
static int f2fs_mpage_readpages(struct address_space *mapping,
			struct list_head *pages, struct page *page,
1727
			unsigned nr_pages, bool is_readahead)
J
Jaegeuk Kim 已提交
1728 1729 1730 1731 1732
{
	struct bio *bio = NULL;
	sector_t last_block_in_bio = 0;
	struct inode *inode = mapping->host;
	struct f2fs_map_blocks map;
1733
	int ret = 0;
J
Jaegeuk Kim 已提交
1734 1735 1736 1737 1738

	map.m_pblk = 0;
	map.m_lblk = 0;
	map.m_len = 0;
	map.m_flags = 0;
1739
	map.m_next_pgofs = NULL;
1740
	map.m_next_extent = NULL;
1741
	map.m_seg_type = NO_CHECK_TYPE;
C
Chao Yu 已提交
1742
	map.m_may_create = false;
J
Jaegeuk Kim 已提交
1743

L
LiFan 已提交
1744
	for (; nr_pages; nr_pages--) {
J
Jaegeuk Kim 已提交
1745
		if (pages) {
1746
			page = list_last_entry(pages, struct page, lru);
1747 1748

			prefetchw(&page->flags);
J
Jaegeuk Kim 已提交
1749 1750
			list_del(&page->lru);
			if (add_to_page_cache_lru(page, mapping,
J
Jaegeuk Kim 已提交
1751
						  page_index(page),
1752
						  readahead_gfp_mask(mapping)))
J
Jaegeuk Kim 已提交
1753 1754 1755
				goto next_page;
		}

1756 1757 1758 1759
		ret = f2fs_read_single_page(inode, page, nr_pages, &map, &bio,
					&last_block_in_bio, is_readahead);
		if (ret) {
			SetPageError(page);
1760
			zero_user_segment(page, 0, PAGE_SIZE);
J
Jaegeuk Kim 已提交
1761 1762 1763 1764
			unlock_page(page);
		}
next_page:
		if (pages)
1765
			put_page(page);
J
Jaegeuk Kim 已提交
1766 1767 1768
	}
	BUG_ON(pages && !list_empty(pages));
	if (bio)
1769
		__submit_bio(F2FS_I_SB(inode), bio, DATA);
1770
	return pages ? 0 : ret;
J
Jaegeuk Kim 已提交
1771 1772
}

1773 1774
static int f2fs_read_data_page(struct file *file, struct page *page)
{
J
Jaegeuk Kim 已提交
1775
	struct inode *inode = page_file_mapping(page)->host;
1776
	int ret = -EAGAIN;
H
Huajun Li 已提交
1777

1778 1779
	trace_f2fs_readpage(page, DATA);

A
arter97 已提交
1780
	/* If the file has inline data, try to read it directly */
H
Huajun Li 已提交
1781 1782
	if (f2fs_has_inline_data(inode))
		ret = f2fs_read_inline_data(inode, page);
1783
	if (ret == -EAGAIN)
J
Jaegeuk Kim 已提交
1784 1785
		ret = f2fs_mpage_readpages(page_file_mapping(page),
						NULL, page, 1, false);
H
Huajun Li 已提交
1786
	return ret;
1787 1788 1789 1790 1791 1792
}

static int f2fs_read_data_pages(struct file *file,
			struct address_space *mapping,
			struct list_head *pages, unsigned nr_pages)
{
1793
	struct inode *inode = mapping->host;
1794
	struct page *page = list_last_entry(pages, struct page, lru);
1795 1796

	trace_f2fs_readpages(inode, page, nr_pages);
H
Huajun Li 已提交
1797 1798 1799 1800 1801

	/* If the file has inline data, skip readpages */
	if (f2fs_has_inline_data(inode))
		return 0;

1802
	return f2fs_mpage_readpages(mapping, pages, NULL, nr_pages, true);
1803 1804
}

1805 1806 1807
static int encrypt_one_page(struct f2fs_io_info *fio)
{
	struct inode *inode = fio->page->mapping->host;
1808
	struct page *mpage;
1809 1810
	gfp_t gfp_flags = GFP_NOFS;

1811
	if (!f2fs_encrypted_file(inode))
1812 1813
		return 0;

1814
	/* wait for GCed page writeback via META_MAPPING */
1815
	f2fs_wait_on_block_writeback(inode, fio->old_blkaddr);
1816 1817

retry_encrypt:
1818 1819 1820
	fio->encrypted_page = fscrypt_encrypt_pagecache_blocks(fio->page,
							       PAGE_SIZE, 0,
							       gfp_flags);
1821 1822 1823 1824 1825 1826 1827 1828 1829 1830
	if (IS_ERR(fio->encrypted_page)) {
		/* flush pending IOs and wait for a while in the ENOMEM case */
		if (PTR_ERR(fio->encrypted_page) == -ENOMEM) {
			f2fs_flush_merged_writes(fio->sbi);
			congestion_wait(BLK_RW_ASYNC, HZ/50);
			gfp_flags |= __GFP_NOFAIL;
			goto retry_encrypt;
		}
		return PTR_ERR(fio->encrypted_page);
	}
1831

1832 1833 1834 1835 1836 1837
	mpage = find_lock_page(META_MAPPING(fio->sbi), fio->old_blkaddr);
	if (mpage) {
		if (PageUptodate(mpage))
			memcpy(page_address(mpage),
				page_address(fio->encrypted_page), PAGE_SIZE);
		f2fs_put_page(mpage, 1);
1838
	}
1839
	return 0;
1840 1841
}

C
Chao Yu 已提交
1842 1843
static inline bool check_inplace_update_policy(struct inode *inode,
				struct f2fs_io_info *fio)
1844
{
C
Chao Yu 已提交
1845 1846
	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
	unsigned int policy = SM_I(sbi)->ipu_policy;
1847

C
Chao Yu 已提交
1848 1849
	if (policy & (0x1 << F2FS_IPU_FORCE))
		return true;
C
Chao Yu 已提交
1850
	if (policy & (0x1 << F2FS_IPU_SSR) && f2fs_need_SSR(sbi))
C
Chao Yu 已提交
1851 1852 1853 1854
		return true;
	if (policy & (0x1 << F2FS_IPU_UTIL) &&
			utilization(sbi) > SM_I(sbi)->min_ipu_util)
		return true;
C
Chao Yu 已提交
1855
	if (policy & (0x1 << F2FS_IPU_SSR_UTIL) && f2fs_need_SSR(sbi) &&
C
Chao Yu 已提交
1856 1857 1858 1859 1860 1861 1862 1863 1864
			utilization(sbi) > SM_I(sbi)->min_ipu_util)
		return true;

	/*
	 * IPU for rewrite async pages
	 */
	if (policy & (0x1 << F2FS_IPU_ASYNC) &&
			fio && fio->op == REQ_OP_WRITE &&
			!(fio->op_flags & REQ_SYNC) &&
1865
			!IS_ENCRYPTED(inode))
C
Chao Yu 已提交
1866 1867 1868 1869 1870 1871 1872
		return true;

	/* this is only set during fdatasync */
	if (policy & (0x1 << F2FS_IPU_FSYNC) &&
			is_inode_flag_set(inode, FI_NEED_IPU))
		return true;

D
Daniel Rosenberg 已提交
1873 1874 1875 1876
	if (unlikely(fio && is_sbi_flag_set(sbi, SBI_CP_DISABLED) &&
			!f2fs_is_checkpointed_data(sbi, fio->old_blkaddr)))
		return true;

C
Chao Yu 已提交
1877 1878 1879
	return false;
}

C
Chao Yu 已提交
1880
bool f2fs_should_update_inplace(struct inode *inode, struct f2fs_io_info *fio)
C
Chao Yu 已提交
1881
{
1882 1883
	if (f2fs_is_pinned_file(inode))
		return true;
C
Chao Yu 已提交
1884 1885 1886 1887 1888 1889 1890 1891

	/* if this is cold file, we should overwrite to avoid fragmentation */
	if (file_is_cold(inode))
		return true;

	return check_inplace_update_policy(inode, fio);
}

C
Chao Yu 已提交
1892
bool f2fs_should_update_outplace(struct inode *inode, struct f2fs_io_info *fio)
C
Chao Yu 已提交
1893 1894 1895 1896 1897 1898 1899
{
	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);

	if (test_opt(sbi, LFS))
		return true;
	if (S_ISDIR(inode->i_mode))
		return true;
1900 1901
	if (IS_NOQUOTA(inode))
		return true;
C
Chao Yu 已提交
1902 1903 1904 1905 1906 1907 1908
	if (f2fs_is_atomic_file(inode))
		return true;
	if (fio) {
		if (is_cold_data(fio->page))
			return true;
		if (IS_ATOMIC_WRITTEN_PAGE(fio->page))
			return true;
D
Daniel Rosenberg 已提交
1909 1910 1911
		if (unlikely(is_sbi_flag_set(sbi, SBI_CP_DISABLED) &&
			f2fs_is_checkpointed_data(sbi, fio->old_blkaddr)))
			return true;
C
Chao Yu 已提交
1912 1913 1914 1915
	}
	return false;
}

1916 1917 1918 1919
static inline bool need_inplace_update(struct f2fs_io_info *fio)
{
	struct inode *inode = fio->page->mapping->host;

C
Chao Yu 已提交
1920
	if (f2fs_should_update_outplace(inode, fio))
1921 1922
		return false;

C
Chao Yu 已提交
1923
	return f2fs_should_update_inplace(inode, fio);
1924 1925
}

C
Chao Yu 已提交
1926
int f2fs_do_write_data_page(struct f2fs_io_info *fio)
1927
{
1928
	struct page *page = fio->page;
1929 1930
	struct inode *inode = page->mapping->host;
	struct dnode_of_data dn;
1931
	struct extent_info ei = {0,0,0};
1932
	struct node_info ni;
1933
	bool ipu_force = false;
1934 1935 1936
	int err = 0;

	set_new_dnode(&dn, inode, NULL, NULL, 0);
1937 1938 1939
	if (need_inplace_update(fio) &&
			f2fs_lookup_extent_cache(inode, page->index, &ei)) {
		fio->old_blkaddr = ei.blk + page->index - ei.fofs;
1940

1941
		if (!f2fs_is_valid_blkaddr(fio->sbi, fio->old_blkaddr,
C
Chao Yu 已提交
1942
						DATA_GENERIC_ENHANCE))
1943
			return -EFSCORRUPTED;
1944 1945 1946 1947

		ipu_force = true;
		fio->need_lock = LOCK_DONE;
		goto got_it;
1948
	}
1949

1950 1951 1952
	/* Deadlock due to between page->lock and f2fs_lock_op */
	if (fio->need_lock == LOCK_REQ && !f2fs_trylock_op(fio->sbi))
		return -EAGAIN;
1953

C
Chao Yu 已提交
1954
	err = f2fs_get_dnode_of_data(&dn, page->index, LOOKUP_NODE);
1955
	if (err)
1956
		goto out;
1957

1958
	fio->old_blkaddr = dn.data_blkaddr;
1959 1960

	/* This page is already truncated */
1961
	if (fio->old_blkaddr == NULL_ADDR) {
1962
		ClearPageUptodate(page);
C
Chao Yu 已提交
1963
		clear_cold_data(page);
1964
		goto out_writepage;
1965
	}
1966
got_it:
1967 1968
	if (__is_valid_data_blkaddr(fio->old_blkaddr) &&
		!f2fs_is_valid_blkaddr(fio->sbi, fio->old_blkaddr,
C
Chao Yu 已提交
1969
						DATA_GENERIC_ENHANCE)) {
1970
		err = -EFSCORRUPTED;
1971 1972
		goto out_writepage;
	}
1973 1974 1975 1976
	/*
	 * If current allocation needs SSR,
	 * it had better in-place writes for updated data.
	 */
C
Chao Yu 已提交
1977 1978
	if (ipu_force ||
		(__is_valid_data_blkaddr(fio->old_blkaddr) &&
C
Chao Yu 已提交
1979
					need_inplace_update(fio))) {
1980 1981 1982 1983 1984
		err = encrypt_one_page(fio);
		if (err)
			goto out_writepage;

		set_page_writeback(page);
J
Jaegeuk Kim 已提交
1985
		ClearPageError(page);
1986
		f2fs_put_dnode(&dn);
1987
		if (fio->need_lock == LOCK_REQ)
1988
			f2fs_unlock_op(fio->sbi);
C
Chao Yu 已提交
1989
		err = f2fs_inplace_write_data(fio);
C
Chao Yu 已提交
1990 1991
		if (err) {
			if (f2fs_encrypted_file(inode))
1992
				fscrypt_finalize_bounce_page(&fio->encrypted_page);
C
Chao Yu 已提交
1993 1994
			if (PageWriteback(page))
				end_page_writeback(page);
1995 1996
		} else {
			set_inode_flag(inode, FI_UPDATE_WRITE);
C
Chao Yu 已提交
1997
		}
1998
		trace_f2fs_do_write_data_page(fio->page, IPU);
1999
		return err;
2000
	}
2001

2002 2003 2004 2005 2006 2007 2008 2009
	if (fio->need_lock == LOCK_RETRY) {
		if (!f2fs_trylock_op(fio->sbi)) {
			err = -EAGAIN;
			goto out_writepage;
		}
		fio->need_lock = LOCK_REQ;
	}

2010 2011 2012 2013 2014 2015
	err = f2fs_get_node_info(fio->sbi, dn.nid, &ni);
	if (err)
		goto out_writepage;

	fio->version = ni.version;

2016 2017 2018 2019 2020
	err = encrypt_one_page(fio);
	if (err)
		goto out_writepage;

	set_page_writeback(page);
J
Jaegeuk Kim 已提交
2021
	ClearPageError(page);
2022

2023
	/* LFS mode write path */
C
Chao Yu 已提交
2024
	f2fs_outplace_write_data(&dn, fio);
2025 2026 2027 2028
	trace_f2fs_do_write_data_page(page, OPU);
	set_inode_flag(inode, FI_APPEND_WRITE);
	if (page->index == 0)
		set_inode_flag(inode, FI_FIRST_BLOCK_WRITTEN);
2029 2030
out_writepage:
	f2fs_put_dnode(&dn);
2031
out:
2032
	if (fio->need_lock == LOCK_REQ)
2033
		f2fs_unlock_op(fio->sbi);
2034 2035 2036
	return err;
}

2037
static int __write_data_page(struct page *page, bool *submitted,
C
Chao Yu 已提交
2038 2039
				struct bio **bio,
				sector_t *last_block,
C
Chao Yu 已提交
2040 2041
				struct writeback_control *wbc,
				enum iostat_type io_type)
2042 2043
{
	struct inode *inode = page->mapping->host;
2044
	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
2045 2046
	loff_t i_size = i_size_read(inode);
	const pgoff_t end_index = ((unsigned long long) i_size)
2047
							>> PAGE_SHIFT;
2048
	loff_t psize = (page->index + 1) << PAGE_SHIFT;
H
Huajun Li 已提交
2049
	unsigned offset = 0;
2050
	bool need_balance_fs = false;
2051
	int err = 0;
J
Jaegeuk Kim 已提交
2052
	struct f2fs_io_info fio = {
2053
		.sbi = sbi,
C
Chao Yu 已提交
2054
		.ino = inode->i_ino,
J
Jaegeuk Kim 已提交
2055
		.type = DATA,
M
Mike Christie 已提交
2056
		.op = REQ_OP_WRITE,
J
Jens Axboe 已提交
2057
		.op_flags = wbc_to_write_flags(wbc),
2058
		.old_blkaddr = NULL_ADDR,
2059
		.page = page,
2060
		.encrypted_page = NULL,
2061
		.submitted = false,
2062
		.need_lock = LOCK_RETRY,
C
Chao Yu 已提交
2063
		.io_type = io_type,
2064
		.io_wbc = wbc,
C
Chao Yu 已提交
2065 2066
		.bio = bio,
		.last_block = last_block,
J
Jaegeuk Kim 已提交
2067
	};
2068

2069 2070
	trace_f2fs_writepage(page, DATA);

2071 2072 2073
	/* we should bypass data pages to proceed the kworkder jobs */
	if (unlikely(f2fs_cp_error(sbi))) {
		mapping_set_error(page->mapping, -EIO);
2074 2075 2076 2077 2078 2079
		/*
		 * don't drop any dirty dentry pages for keeping lastest
		 * directory structure.
		 */
		if (S_ISDIR(inode->i_mode))
			goto redirty_out;
2080 2081 2082
		goto out;
	}

2083 2084 2085
	if (unlikely(is_sbi_flag_set(sbi, SBI_POR_DOING)))
		goto redirty_out;

2086
	if (page->index < end_index)
2087
		goto write;
2088 2089 2090 2091 2092

	/*
	 * If the offset is out-of-range of file size,
	 * this page does not have to be written to disk.
	 */
2093
	offset = i_size & (PAGE_SIZE - 1);
2094
	if ((page->index >= end_index + 1) || !offset)
2095
		goto out;
2096

2097
	zero_user_segment(page, offset, PAGE_SIZE);
2098
write:
2099 2100
	if (f2fs_is_drop_cache(inode))
		goto out;
2101 2102 2103
	/* we should not write 0'th page having journal header */
	if (f2fs_is_volatile_file(inode) && (!page->index ||
			(!wbc->for_reclaim &&
C
Chao Yu 已提交
2104
			f2fs_available_free_memory(sbi, BASE_CHECK))))
2105
		goto redirty_out;
2106

2107
	/* Dentry blocks are controlled by checkpoint */
2108
	if (S_ISDIR(inode->i_mode)) {
2109
		fio.need_lock = LOCK_DONE;
C
Chao Yu 已提交
2110
		err = f2fs_do_write_data_page(&fio);
2111 2112
		goto done;
	}
H
Huajun Li 已提交
2113

2114
	if (!wbc->for_reclaim)
2115
		need_balance_fs = true;
2116
	else if (has_not_enough_free_secs(sbi, 0, 0))
2117
		goto redirty_out;
2118 2119
	else
		set_inode_flag(inode, FI_HOT_DATA);
2120

2121
	err = -EAGAIN;
2122
	if (f2fs_has_inline_data(inode)) {
2123
		err = f2fs_write_inline_data(inode, page);
2124 2125 2126
		if (!err)
			goto out;
	}
2127

2128
	if (err == -EAGAIN) {
C
Chao Yu 已提交
2129
		err = f2fs_do_write_data_page(&fio);
2130 2131
		if (err == -EAGAIN) {
			fio.need_lock = LOCK_REQ;
C
Chao Yu 已提交
2132
			err = f2fs_do_write_data_page(&fio);
2133 2134
		}
	}
2135

2136 2137 2138 2139 2140 2141 2142 2143
	if (err) {
		file_set_keep_isize(inode);
	} else {
		down_write(&F2FS_I(inode)->i_sem);
		if (F2FS_I(inode)->last_disk_size < psize)
			F2FS_I(inode)->last_disk_size = psize;
		up_write(&F2FS_I(inode)->i_sem);
	}
2144

2145 2146 2147
done:
	if (err && err != -ENOENT)
		goto redirty_out;
2148

2149
out:
2150
	inode_dec_dirty_pages(inode);
C
Chao Yu 已提交
2151
	if (err) {
2152
		ClearPageUptodate(page);
C
Chao Yu 已提交
2153 2154
		clear_cold_data(page);
	}
2155 2156

	if (wbc->for_reclaim) {
2157
		f2fs_submit_merged_write_cond(sbi, NULL, page, 0, DATA);
2158
		clear_inode_flag(inode, FI_HOT_DATA);
C
Chao Yu 已提交
2159
		f2fs_remove_dirty_inode(inode);
2160
		submitted = NULL;
2161 2162
	}

2163
	unlock_page(page);
2164
	if (!S_ISDIR(inode->i_mode) && !IS_NOQUOTA(inode) &&
C
Chao Yu 已提交
2165 2166
					!F2FS_I(inode)->cp_task) {
		f2fs_submit_ipu_bio(sbi, bio, page);
J
Jaegeuk Kim 已提交
2167
		f2fs_balance_fs(sbi, need_balance_fs);
C
Chao Yu 已提交
2168
	}
2169

2170
	if (unlikely(f2fs_cp_error(sbi))) {
C
Chao Yu 已提交
2171
		f2fs_submit_ipu_bio(sbi, bio, page);
2172
		f2fs_submit_merged_write(sbi, DATA);
2173 2174 2175 2176 2177
		submitted = NULL;
	}

	if (submitted)
		*submitted = fio.submitted;
2178

2179 2180 2181
	return 0;

redirty_out:
2182
	redirty_page_for_writepage(wbc, page);
2183 2184 2185 2186 2187 2188 2189
	/*
	 * pageout() in MM traslates EAGAIN, so calls handle_write_error()
	 * -> mapping_set_error() -> set_bit(AS_EIO, ...).
	 * file_write_and_wait_range() will see EIO error, which is critical
	 * to return value of fsync() followed by atomic_write failure to user.
	 */
	if (!err || wbc->for_reclaim)
2190
		return AOP_WRITEPAGE_ACTIVATE;
J
Jaegeuk Kim 已提交
2191 2192
	unlock_page(page);
	return err;
2193 2194
}

2195 2196 2197
static int f2fs_write_data_page(struct page *page,
					struct writeback_control *wbc)
{
C
Chao Yu 已提交
2198
	return __write_data_page(page, NULL, NULL, NULL, wbc, FS_DATA_IO);
2199 2200
}

C
Chao Yu 已提交
2201 2202 2203 2204 2205 2206
/*
 * This function was copied from write_cche_pages from mm/page-writeback.c.
 * The major change is making write step of cold data page separately from
 * warm/hot data page.
 */
static int f2fs_write_cache_pages(struct address_space *mapping,
C
Chao Yu 已提交
2207 2208
					struct writeback_control *wbc,
					enum iostat_type io_type)
C
Chao Yu 已提交
2209 2210 2211 2212
{
	int ret = 0;
	int done = 0;
	struct pagevec pvec;
2213
	struct f2fs_sb_info *sbi = F2FS_M_SB(mapping);
C
Chao Yu 已提交
2214 2215
	struct bio *bio = NULL;
	sector_t last_block;
C
Chao Yu 已提交
2216 2217 2218 2219 2220 2221 2222
	int nr_pages;
	pgoff_t uninitialized_var(writeback_index);
	pgoff_t index;
	pgoff_t end;		/* Inclusive */
	pgoff_t done_index;
	int cycled;
	int range_whole = 0;
M
Matthew Wilcox 已提交
2223
	xa_mark_t tag;
2224
	int nwritten = 0;
C
Chao Yu 已提交
2225

2226
	pagevec_init(&pvec);
2227

2228 2229 2230 2231 2232 2233
	if (get_dirty_pages(mapping->host) <=
				SM_I(F2FS_M_SB(mapping))->min_hot_blocks)
		set_inode_flag(mapping->host, FI_HOT_DATA);
	else
		clear_inode_flag(mapping->host, FI_HOT_DATA);

C
Chao Yu 已提交
2234 2235 2236 2237 2238 2239 2240 2241 2242
	if (wbc->range_cyclic) {
		writeback_index = mapping->writeback_index; /* prev offset */
		index = writeback_index;
		if (index == 0)
			cycled = 1;
		else
			cycled = 0;
		end = -1;
	} else {
2243 2244
		index = wbc->range_start >> PAGE_SHIFT;
		end = wbc->range_end >> PAGE_SHIFT;
C
Chao Yu 已提交
2245 2246 2247 2248 2249 2250 2251 2252 2253 2254 2255 2256 2257 2258 2259
		if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX)
			range_whole = 1;
		cycled = 1; /* ignore range_cyclic tests */
	}
	if (wbc->sync_mode == WB_SYNC_ALL || wbc->tagged_writepages)
		tag = PAGECACHE_TAG_TOWRITE;
	else
		tag = PAGECACHE_TAG_DIRTY;
retry:
	if (wbc->sync_mode == WB_SYNC_ALL || wbc->tagged_writepages)
		tag_pages_for_writeback(mapping, index, end);
	done_index = index;
	while (!done && (index <= end)) {
		int i;

J
Jan Kara 已提交
2260
		nr_pages = pagevec_lookup_range_tag(&pvec, mapping, &index, end,
2261
				tag);
C
Chao Yu 已提交
2262 2263 2264 2265 2266
		if (nr_pages == 0)
			break;

		for (i = 0; i < nr_pages; i++) {
			struct page *page = pvec.pages[i];
2267
			bool submitted = false;
C
Chao Yu 已提交
2268

2269
			/* give a priority to WB_SYNC threads */
2270
			if (atomic_read(&sbi->wb_sync_req[DATA]) &&
2271 2272 2273 2274 2275
					wbc->sync_mode == WB_SYNC_NONE) {
				done = 1;
				break;
			}

C
Chao Yu 已提交
2276
			done_index = page->index;
2277
retry_write:
C
Chao Yu 已提交
2278 2279 2280 2281 2282 2283 2284 2285 2286 2287 2288 2289 2290 2291
			lock_page(page);

			if (unlikely(page->mapping != mapping)) {
continue_unlock:
				unlock_page(page);
				continue;
			}

			if (!PageDirty(page)) {
				/* someone wrote it for us */
				goto continue_unlock;
			}

			if (PageWriteback(page)) {
C
Chao Yu 已提交
2292
				if (wbc->sync_mode != WB_SYNC_NONE) {
2293
					f2fs_wait_on_page_writeback(page,
2294
							DATA, true, true);
C
Chao Yu 已提交
2295 2296
					f2fs_submit_ipu_bio(sbi, &bio, page);
				} else {
C
Chao Yu 已提交
2297
					goto continue_unlock;
C
Chao Yu 已提交
2298
				}
C
Chao Yu 已提交
2299 2300 2301 2302 2303
			}

			if (!clear_page_dirty_for_io(page))
				goto continue_unlock;

C
Chao Yu 已提交
2304 2305
			ret = __write_data_page(page, &submitted, &bio,
					&last_block, wbc, io_type);
C
Chao Yu 已提交
2306
			if (unlikely(ret)) {
2307 2308 2309 2310 2311 2312 2313 2314
				/*
				 * keep nr_to_write, since vfs uses this to
				 * get # of written pages.
				 */
				if (ret == AOP_WRITEPAGE_ACTIVATE) {
					unlock_page(page);
					ret = 0;
					continue;
2315 2316 2317 2318 2319 2320 2321 2322 2323
				} else if (ret == -EAGAIN) {
					ret = 0;
					if (wbc->sync_mode == WB_SYNC_ALL) {
						cond_resched();
						congestion_wait(BLK_RW_ASYNC,
									HZ/50);
						goto retry_write;
					}
					continue;
2324
				}
J
Jaegeuk Kim 已提交
2325 2326 2327
				done_index = page->index + 1;
				done = 1;
				break;
2328
			} else if (submitted) {
2329
				nwritten++;
C
Chao Yu 已提交
2330 2331
			}

2332
			if (--wbc->nr_to_write <= 0 &&
2333
					wbc->sync_mode == WB_SYNC_NONE) {
C
Chao Yu 已提交
2334 2335 2336 2337 2338 2339 2340 2341 2342 2343 2344 2345 2346 2347 2348 2349 2350
				done = 1;
				break;
			}
		}
		pagevec_release(&pvec);
		cond_resched();
	}

	if (!cycled && !done) {
		cycled = 1;
		index = 0;
		end = writeback_index - 1;
		goto retry;
	}
	if (wbc->range_cyclic || (range_whole && wbc->nr_to_write > 0))
		mapping->writeback_index = done_index;

2351
	if (nwritten)
2352
		f2fs_submit_merged_write_cond(F2FS_M_SB(mapping), mapping->host,
2353
								NULL, 0, DATA);
C
Chao Yu 已提交
2354 2355 2356
	/* submit cached bio of IPU write */
	if (bio)
		__submit_bio(sbi, bio, DATA);
C
Chao Yu 已提交
2357

C
Chao Yu 已提交
2358 2359 2360
	return ret;
}

2361 2362 2363 2364 2365
static inline bool __should_serialize_io(struct inode *inode,
					struct writeback_control *wbc)
{
	if (!S_ISREG(inode->i_mode))
		return false;
2366 2367
	if (IS_NOQUOTA(inode))
		return false;
2368 2369 2370
	/* to avoid deadlock in path of data flush */
	if (F2FS_I(inode)->cp_task)
		return false;
2371 2372 2373 2374 2375 2376 2377
	if (wbc->sync_mode != WB_SYNC_ALL)
		return true;
	if (get_dirty_pages(inode) >= SM_I(F2FS_I_SB(inode))->min_seq_blocks)
		return true;
	return false;
}

2378
static int __f2fs_write_data_pages(struct address_space *mapping,
C
Chao Yu 已提交
2379 2380
						struct writeback_control *wbc,
						enum iostat_type io_type)
2381 2382
{
	struct inode *inode = mapping->host;
2383
	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
2384
	struct blk_plug plug;
2385
	int ret;
2386
	bool locked = false;
2387

P
P J P 已提交
2388 2389 2390 2391
	/* deal with chardevs and other special file */
	if (!mapping->a_ops->writepage)
		return 0;

2392 2393 2394 2395
	/* skip writing if there is no dirty page in this inode */
	if (!get_dirty_pages(inode) && wbc->sync_mode == WB_SYNC_NONE)
		return 0;

2396 2397 2398 2399
	/* during POR, we don't need to trigger writepage at all. */
	if (unlikely(is_sbi_flag_set(sbi, SBI_POR_DOING)))
		goto skip_write;

2400 2401
	if ((S_ISDIR(inode->i_mode) || IS_NOQUOTA(inode)) &&
			wbc->sync_mode == WB_SYNC_NONE &&
2402
			get_dirty_pages(inode) < nr_pages_to_skip(sbi, DATA) &&
C
Chao Yu 已提交
2403
			f2fs_available_free_memory(sbi, DIRTY_DENTS))
2404 2405
		goto skip_write;

C
Chao Yu 已提交
2406
	/* skip writing during file defragment */
2407
	if (is_inode_flag_set(inode, FI_DO_DEFRAG))
C
Chao Yu 已提交
2408 2409
		goto skip_write;

Y
Yunlei He 已提交
2410 2411
	trace_f2fs_writepages(mapping->host, wbc, DATA);

2412 2413
	/* to avoid spliting IOs due to mixed WB_SYNC_ALL and WB_SYNC_NONE */
	if (wbc->sync_mode == WB_SYNC_ALL)
2414 2415
		atomic_inc(&sbi->wb_sync_req[DATA]);
	else if (atomic_read(&sbi->wb_sync_req[DATA]))
2416 2417
		goto skip_write;

2418 2419 2420 2421 2422
	if (__should_serialize_io(inode, wbc)) {
		mutex_lock(&sbi->writepages);
		locked = true;
	}

2423
	blk_start_plug(&plug);
C
Chao Yu 已提交
2424
	ret = f2fs_write_cache_pages(mapping, wbc, io_type);
2425
	blk_finish_plug(&plug);
2426

2427 2428 2429
	if (locked)
		mutex_unlock(&sbi->writepages);

2430
	if (wbc->sync_mode == WB_SYNC_ALL)
2431
		atomic_dec(&sbi->wb_sync_req[DATA]);
2432 2433 2434 2435
	/*
	 * if some pages were truncated, we cannot guarantee its mapping->host
	 * to detect pending bios.
	 */
J
Jaegeuk Kim 已提交
2436

C
Chao Yu 已提交
2437
	f2fs_remove_dirty_inode(inode);
2438
	return ret;
2439 2440

skip_write:
2441
	wbc->pages_skipped += get_dirty_pages(inode);
Y
Yunlei He 已提交
2442
	trace_f2fs_writepages(mapping->host, wbc, DATA);
2443
	return 0;
2444 2445
}

C
Chao Yu 已提交
2446 2447 2448 2449 2450 2451 2452 2453 2454 2455
static int f2fs_write_data_pages(struct address_space *mapping,
			    struct writeback_control *wbc)
{
	struct inode *inode = mapping->host;

	return __f2fs_write_data_pages(mapping, wbc,
			F2FS_I(inode)->cp_task == current ?
			FS_CP_DATA_IO : FS_DATA_IO);
}

2456 2457 2458
static void f2fs_write_failed(struct address_space *mapping, loff_t to)
{
	struct inode *inode = mapping->host;
J
Jaegeuk Kim 已提交
2459
	loff_t i_size = i_size_read(inode);
2460

J
Jaegeuk Kim 已提交
2461
	if (to > i_size) {
2462
		down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
2463
		down_write(&F2FS_I(inode)->i_mmap_sem);
2464

J
Jaegeuk Kim 已提交
2465
		truncate_pagecache(inode, i_size);
2466 2467
		if (!IS_NOQUOTA(inode))
			f2fs_truncate_blocks(inode, i_size, true);
2468

2469
		up_write(&F2FS_I(inode)->i_mmap_sem);
2470
		up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
2471 2472 2473
	}
}

2474 2475 2476 2477 2478 2479 2480 2481
static int prepare_write_begin(struct f2fs_sb_info *sbi,
			struct page *page, loff_t pos, unsigned len,
			block_t *blk_addr, bool *node_changed)
{
	struct inode *inode = page->mapping->host;
	pgoff_t index = page->index;
	struct dnode_of_data dn;
	struct page *ipage;
2482
	bool locked = false;
2483
	struct extent_info ei = {0,0,0};
2484
	int err = 0;
2485
	int flag;
2486

2487 2488 2489 2490
	/*
	 * we already allocated all the blocks, so we don't need to get
	 * the block addresses when there is no need to fill the page.
	 */
2491 2492
	if (!f2fs_has_inline_data(inode) && len == PAGE_SIZE &&
			!is_inode_flag_set(inode, FI_NO_PREALLOC))
2493 2494
		return 0;

2495 2496 2497 2498 2499 2500
	/* f2fs_lock_op avoids race between write CP and convert_inline_page */
	if (f2fs_has_inline_data(inode) && pos + len > MAX_INLINE_DATA(inode))
		flag = F2FS_GET_BLOCK_DEFAULT;
	else
		flag = F2FS_GET_BLOCK_PRE_AIO;

2501
	if (f2fs_has_inline_data(inode) ||
2502
			(pos & PAGE_MASK) >= i_size_read(inode)) {
2503
		__do_map_lock(sbi, flag, true);
2504 2505 2506
		locked = true;
	}
restart:
2507
	/* check inline_data */
C
Chao Yu 已提交
2508
	ipage = f2fs_get_node_page(sbi, inode->i_ino);
2509 2510 2511 2512 2513 2514 2515 2516
	if (IS_ERR(ipage)) {
		err = PTR_ERR(ipage);
		goto unlock_out;
	}

	set_new_dnode(&dn, inode, ipage, ipage, 0);

	if (f2fs_has_inline_data(inode)) {
C
Chao Yu 已提交
2517
		if (pos + len <= MAX_INLINE_DATA(inode)) {
C
Chao Yu 已提交
2518
			f2fs_do_read_inline_data(page, ipage);
2519
			set_inode_flag(inode, FI_DATA_EXIST);
2520 2521
			if (inode->i_nlink)
				set_inline_node(ipage);
2522 2523 2524
		} else {
			err = f2fs_convert_inline_page(&dn, page);
			if (err)
2525 2526 2527 2528 2529 2530 2531 2532 2533 2534 2535
				goto out;
			if (dn.data_blkaddr == NULL_ADDR)
				err = f2fs_get_block(&dn, index);
		}
	} else if (locked) {
		err = f2fs_get_block(&dn, index);
	} else {
		if (f2fs_lookup_extent_cache(inode, index, &ei)) {
			dn.data_blkaddr = ei.blk + index - ei.fofs;
		} else {
			/* hole case */
C
Chao Yu 已提交
2536
			err = f2fs_get_dnode_of_data(&dn, index, LOOKUP_NODE);
2537
			if (err || dn.data_blkaddr == NULL_ADDR) {
2538
				f2fs_put_dnode(&dn);
2539 2540
				__do_map_lock(sbi, F2FS_GET_BLOCK_PRE_AIO,
								true);
2541
				WARN_ON(flag != F2FS_GET_BLOCK_PRE_AIO);
2542 2543 2544
				locked = true;
				goto restart;
			}
2545 2546
		}
	}
2547

2548 2549 2550
	/* convert_inline_page can make node_changed */
	*blk_addr = dn.data_blkaddr;
	*node_changed = dn.node_changed;
2551
out:
2552 2553
	f2fs_put_dnode(&dn);
unlock_out:
2554
	if (locked)
2555
		__do_map_lock(sbi, flag, false);
2556 2557 2558
	return err;
}

2559 2560 2561 2562 2563
static int f2fs_write_begin(struct file *file, struct address_space *mapping,
		loff_t pos, unsigned len, unsigned flags,
		struct page **pagep, void **fsdata)
{
	struct inode *inode = mapping->host;
2564
	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
2565
	struct page *page = NULL;
2566
	pgoff_t index = ((unsigned long long) pos) >> PAGE_SHIFT;
2567
	bool need_balance = false, drop_atomic = false;
2568
	block_t blkaddr = NULL_ADDR;
2569 2570
	int err = 0;

2571 2572
	trace_f2fs_write_begin(inode, pos, len, flags);

2573 2574
	if (!f2fs_is_checkpoint_ready(sbi)) {
		err = -ENOSPC;
D
Daniel Rosenberg 已提交
2575
		goto fail;
2576
	}
D
Daniel Rosenberg 已提交
2577

2578 2579 2580
	if ((f2fs_is_atomic_file(inode) &&
			!f2fs_available_free_memory(sbi, INMEM_PAGES)) ||
			is_inode_flag_set(inode, FI_ATOMIC_REVOKE_REQUEST)) {
J
Jaegeuk Kim 已提交
2581
		err = -ENOMEM;
2582
		drop_atomic = true;
J
Jaegeuk Kim 已提交
2583 2584 2585
		goto fail;
	}

2586 2587 2588 2589 2590 2591 2592 2593 2594 2595
	/*
	 * We should check this at this moment to avoid deadlock on inode page
	 * and #0 page. The locking rule for inline_data conversion should be:
	 * lock_page(page #0) -> lock_page(inode_page)
	 */
	if (index != 0) {
		err = f2fs_convert_inline_inode(inode);
		if (err)
			goto fail;
	}
2596
repeat:
2597 2598 2599 2600
	/*
	 * Do not use grab_cache_page_write_begin() to avoid deadlock due to
	 * wait_for_stable_page. Will wait that below with our IO control.
	 */
C
Chao Yu 已提交
2601
	page = f2fs_pagecache_get_page(mapping, index,
2602
				FGP_LOCK | FGP_WRITE | FGP_CREAT, GFP_NOFS);
2603 2604 2605 2606
	if (!page) {
		err = -ENOMEM;
		goto fail;
	}
2607

2608 2609
	*pagep = page;

2610 2611
	err = prepare_write_begin(sbi, page, pos, len,
					&blkaddr, &need_balance);
2612
	if (err)
2613
		goto fail;
2614

2615 2616
	if (need_balance && !IS_NOQUOTA(inode) &&
			has_not_enough_free_secs(sbi, 0, 0)) {
2617
		unlock_page(page);
J
Jaegeuk Kim 已提交
2618
		f2fs_balance_fs(sbi, true);
2619 2620 2621 2622 2623 2624 2625 2626
		lock_page(page);
		if (page->mapping != mapping) {
			/* The page got truncated from under us */
			f2fs_put_page(page, 1);
			goto repeat;
		}
	}

2627
	f2fs_wait_on_page_writeback(page, DATA, false, true);
2628

2629 2630
	if (len == PAGE_SIZE || PageUptodate(page))
		return 0;
2631

2632 2633 2634 2635 2636
	if (!(pos & (PAGE_SIZE - 1)) && (pos + len) >= i_size_read(inode)) {
		zero_user_segment(page, len, PAGE_SIZE);
		return 0;
	}

2637
	if (blkaddr == NEW_ADDR) {
2638
		zero_user_segment(page, 0, PAGE_SIZE);
2639
		SetPageUptodate(page);
2640
	} else {
C
Chao Yu 已提交
2641 2642
		if (!f2fs_is_valid_blkaddr(sbi, blkaddr,
				DATA_GENERIC_ENHANCE_READ)) {
2643
			err = -EFSCORRUPTED;
C
Chao Yu 已提交
2644 2645
			goto fail;
		}
2646 2647
		err = f2fs_submit_page_read(inode, page, blkaddr);
		if (err)
2648
			goto fail;
2649

2650
		lock_page(page);
2651
		if (unlikely(page->mapping != mapping)) {
2652 2653
			f2fs_put_page(page, 1);
			goto repeat;
2654
		}
2655 2656 2657
		if (unlikely(!PageUptodate(page))) {
			err = -EIO;
			goto fail;
2658
		}
2659 2660
	}
	return 0;
2661

2662
fail:
2663
	f2fs_put_page(page, 1);
2664
	f2fs_write_failed(mapping, pos + len);
2665
	if (drop_atomic)
C
Chao Yu 已提交
2666
		f2fs_drop_inmem_pages_all(sbi, false);
2667
	return err;
2668 2669
}

2670 2671 2672 2673 2674 2675 2676
static int f2fs_write_end(struct file *file,
			struct address_space *mapping,
			loff_t pos, unsigned len, unsigned copied,
			struct page *page, void *fsdata)
{
	struct inode *inode = page->mapping->host;

2677 2678
	trace_f2fs_write_end(inode, pos, len, copied);

2679 2680 2681 2682 2683 2684
	/*
	 * This should be come from len == PAGE_SIZE, and we expect copied
	 * should be PAGE_SIZE. Otherwise, we treat it with zero copied and
	 * let generic_perform_write() try to copy data again through copied=0.
	 */
	if (!PageUptodate(page)) {
2685
		if (unlikely(copied != len))
2686 2687 2688 2689 2690 2691 2692
			copied = 0;
		else
			SetPageUptodate(page);
	}
	if (!copied)
		goto unlock_out;

2693
	set_page_dirty(page);
2694

2695 2696
	if (pos + copied > i_size_read(inode))
		f2fs_i_size_write(inode, pos + copied);
2697
unlock_out:
2698
	f2fs_put_page(page, 1);
2699
	f2fs_update_time(F2FS_I_SB(inode), REQ_TIME);
2700 2701 2702
	return copied;
}

2703 2704
static int check_direct_IO(struct inode *inode, struct iov_iter *iter,
			   loff_t offset)
2705
{
2706 2707 2708 2709 2710 2711 2712 2713 2714 2715 2716 2717 2718 2719
	unsigned i_blkbits = READ_ONCE(inode->i_blkbits);
	unsigned blkbits = i_blkbits;
	unsigned blocksize_mask = (1 << blkbits) - 1;
	unsigned long align = offset | iov_iter_alignment(iter);
	struct block_device *bdev = inode->i_sb->s_bdev;

	if (align & blocksize_mask) {
		if (bdev)
			blkbits = blksize_bits(bdev_logical_block_size(bdev));
		blocksize_mask = (1 << blkbits) - 1;
		if (align & blocksize_mask)
			return -EINVAL;
		return 1;
	}
2720 2721 2722
	return 0;
}

C
Chao Yu 已提交
2723 2724 2725 2726 2727 2728 2729 2730 2731 2732
static void f2fs_dio_end_io(struct bio *bio)
{
	struct f2fs_private_dio *dio = bio->bi_private;

	dec_page_count(F2FS_I_SB(dio->inode),
			dio->write ? F2FS_DIO_WRITE : F2FS_DIO_READ);

	bio->bi_private = dio->orig_private;
	bio->bi_end_io = dio->orig_end_io;

2733
	kvfree(dio);
C
Chao Yu 已提交
2734 2735 2736 2737 2738 2739 2740 2741 2742 2743 2744 2745

	bio_endio(bio);
}

static void f2fs_dio_submit_bio(struct bio *bio, struct inode *inode,
							loff_t file_offset)
{
	struct f2fs_private_dio *dio;
	bool write = (bio_op(bio) == REQ_OP_WRITE);

	dio = f2fs_kzalloc(F2FS_I_SB(inode),
			sizeof(struct f2fs_private_dio), GFP_NOFS);
2746
	if (!dio)
C
Chao Yu 已提交
2747 2748 2749 2750 2751 2752 2753 2754 2755 2756 2757 2758 2759 2760 2761 2762 2763 2764 2765 2766
		goto out;

	dio->inode = inode;
	dio->orig_end_io = bio->bi_end_io;
	dio->orig_private = bio->bi_private;
	dio->write = write;

	bio->bi_end_io = f2fs_dio_end_io;
	bio->bi_private = dio;

	inc_page_count(F2FS_I_SB(inode),
			write ? F2FS_DIO_WRITE : F2FS_DIO_READ);

	submit_bio(bio);
	return;
out:
	bio->bi_status = BLK_STS_IOERR;
	bio_endio(bio);
}

2767
static ssize_t f2fs_direct_IO(struct kiocb *iocb, struct iov_iter *iter)
2768
{
2769
	struct address_space *mapping = iocb->ki_filp->f_mapping;
2770
	struct inode *inode = mapping->host;
2771
	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
2772
	struct f2fs_inode_info *fi = F2FS_I(inode);
2773
	size_t count = iov_iter_count(iter);
2774
	loff_t offset = iocb->ki_pos;
2775
	int rw = iov_iter_rw(iter);
2776
	int err;
2777
	enum rw_hint hint = iocb->ki_hint;
2778
	int whint_mode = F2FS_OPTION(sbi).whint_mode;
2779
	bool do_opu;
2780

2781
	err = check_direct_IO(inode, iter, offset);
2782
	if (err)
2783
		return err < 0 ? err : 0;
H
Huajun Li 已提交
2784

2785
	if (f2fs_force_buffered_io(inode, iocb, iter))
2786
		return 0;
2787

2788 2789
	do_opu = allow_outplace_dio(inode, iocb, iter);

2790
	trace_f2fs_direct_IO_enter(inode, offset, count, rw);
2791

2792 2793 2794
	if (rw == WRITE && whint_mode == WHINT_MODE_OFF)
		iocb->ki_hint = WRITE_LIFE_NOT_SET;

2795 2796 2797 2798 2799 2800 2801 2802
	if (iocb->ki_flags & IOCB_NOWAIT) {
		if (!down_read_trylock(&fi->i_gc_rwsem[rw])) {
			iocb->ki_hint = hint;
			err = -EAGAIN;
			goto out;
		}
		if (do_opu && !down_read_trylock(&fi->i_gc_rwsem[READ])) {
			up_read(&fi->i_gc_rwsem[rw]);
H
Hyunchul Lee 已提交
2803 2804 2805 2806
			iocb->ki_hint = hint;
			err = -EAGAIN;
			goto out;
		}
2807 2808 2809 2810
	} else {
		down_read(&fi->i_gc_rwsem[rw]);
		if (do_opu)
			down_read(&fi->i_gc_rwsem[READ]);
H
Hyunchul Lee 已提交
2811 2812
	}

C
Chao Yu 已提交
2813
	err = __blockdev_direct_IO(iocb, inode, inode->i_sb->s_bdev,
C
Chao Yu 已提交
2814 2815
			iter, rw == WRITE ? get_data_block_dio_write :
			get_data_block_dio, NULL, f2fs_dio_submit_bio,
C
Chao Yu 已提交
2816
			DIO_LOCKING | DIO_SKIP_HOLES);
2817 2818 2819 2820 2821

	if (do_opu)
		up_read(&fi->i_gc_rwsem[READ]);

	up_read(&fi->i_gc_rwsem[rw]);
2822 2823

	if (rw == WRITE) {
2824 2825
		if (whint_mode == WHINT_MODE_OFF)
			iocb->ki_hint = hint;
C
Chao Yu 已提交
2826 2827 2828
		if (err > 0) {
			f2fs_update_iostat(F2FS_I_SB(inode), APP_DIRECT_IO,
									err);
2829 2830
			if (!do_opu)
				set_inode_flag(inode, FI_UPDATE_WRITE);
C
Chao Yu 已提交
2831
		} else if (err < 0) {
2832
			f2fs_write_failed(mapping, offset + count);
C
Chao Yu 已提交
2833
		}
2834
	}
2835

H
Hyunchul Lee 已提交
2836
out:
2837
	trace_f2fs_direct_IO_exit(inode, offset, count, rw, err);
2838

2839
	return err;
2840 2841
}

2842 2843
void f2fs_invalidate_page(struct page *page, unsigned int offset,
							unsigned int length)
2844 2845
{
	struct inode *inode = page->mapping->host;
2846
	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
2847

2848
	if (inode->i_ino >= F2FS_ROOT_INO(sbi) &&
2849
		(offset % PAGE_SIZE || length != PAGE_SIZE))
2850 2851
		return;

2852
	if (PageDirty(page)) {
2853
		if (inode->i_ino == F2FS_META_INO(sbi)) {
2854
			dec_page_count(sbi, F2FS_DIRTY_META);
2855
		} else if (inode->i_ino == F2FS_NODE_INO(sbi)) {
2856
			dec_page_count(sbi, F2FS_DIRTY_NODES);
2857
		} else {
2858
			inode_dec_dirty_pages(inode);
C
Chao Yu 已提交
2859
			f2fs_remove_dirty_inode(inode);
2860
		}
2861
	}
C
Chao Yu 已提交
2862

C
Chao Yu 已提交
2863 2864
	clear_cold_data(page);

C
Chao Yu 已提交
2865
	if (IS_ATOMIC_WRITTEN_PAGE(page))
C
Chao Yu 已提交
2866
		return f2fs_drop_inmem_page(inode, page);
C
Chao Yu 已提交
2867

2868
	f2fs_clear_page_private(page);
2869 2870
}

2871
int f2fs_release_page(struct page *page, gfp_t wait)
2872
{
2873 2874 2875 2876
	/* If this is dirty page, keep PagePrivate */
	if (PageDirty(page))
		return 0;

C
Chao Yu 已提交
2877 2878 2879 2880
	/* This is atomic written page, keep Private */
	if (IS_ATOMIC_WRITTEN_PAGE(page))
		return 0;

C
Chao Yu 已提交
2881
	clear_cold_data(page);
2882
	f2fs_clear_page_private(page);
2883
	return 1;
2884 2885 2886 2887
}

static int f2fs_set_data_page_dirty(struct page *page)
{
J
Jaegeuk Kim 已提交
2888
	struct inode *inode = page_file_mapping(page)->host;
2889

2890 2891
	trace_f2fs_set_page_dirty(page, DATA);

2892 2893
	if (!PageUptodate(page))
		SetPageUptodate(page);
J
Jaegeuk Kim 已提交
2894 2895
	if (PageSwapCache(page))
		return __set_page_dirty_nobuffers(page);
2896

C
Chao Yu 已提交
2897
	if (f2fs_is_atomic_file(inode) && !f2fs_is_commit_atomic_write(inode)) {
C
Chao Yu 已提交
2898
		if (!IS_ATOMIC_WRITTEN_PAGE(page)) {
C
Chao Yu 已提交
2899
			f2fs_register_inmem_page(inode, page);
C
Chao Yu 已提交
2900 2901 2902 2903 2904 2905 2906
			return 1;
		}
		/*
		 * Previously, this page has been registered, we just
		 * return here.
		 */
		return 0;
2907 2908
	}

2909
	if (!PageDirty(page)) {
2910
		__set_page_dirty_nobuffers(page);
C
Chao Yu 已提交
2911
		f2fs_update_dirty_page(inode, page);
2912 2913 2914 2915 2916
		return 1;
	}
	return 0;
}

J
Jaegeuk Kim 已提交
2917 2918
static sector_t f2fs_bmap(struct address_space *mapping, sector_t block)
{
2919 2920
	struct inode *inode = mapping->host;

J
Jaegeuk Kim 已提交
2921 2922 2923 2924 2925 2926 2927
	if (f2fs_has_inline_data(inode))
		return 0;

	/* make sure allocating whole blocks */
	if (mapping_tagged(mapping, PAGECACHE_TAG_DIRTY))
		filemap_write_and_wait(mapping);

C
Chao Yu 已提交
2928
	return generic_block_bmap(mapping, block, get_data_block_bmap);
2929 2930
}

2931 2932 2933 2934 2935 2936 2937 2938 2939 2940 2941 2942 2943
#ifdef CONFIG_MIGRATION
#include <linux/migrate.h>

int f2fs_migrate_page(struct address_space *mapping,
		struct page *newpage, struct page *page, enum migrate_mode mode)
{
	int rc, extra_count;
	struct f2fs_inode_info *fi = F2FS_I(mapping->host);
	bool atomic_written = IS_ATOMIC_WRITTEN_PAGE(page);

	BUG_ON(PageWriteback(page));

	/* migrating an atomic written page is safe with the inmem_lock hold */
2944 2945 2946 2947 2948 2949
	if (atomic_written) {
		if (mode != MIGRATE_SYNC)
			return -EBUSY;
		if (!mutex_trylock(&fi->inmem_lock))
			return -EAGAIN;
	}
2950

2951 2952
	/* one extra reference was held for atomic_write page */
	extra_count = atomic_written ? 1 : 0;
2953
	rc = migrate_page_move_mapping(mapping, newpage,
2954
				page, extra_count);
2955 2956 2957 2958 2959 2960 2961 2962 2963 2964 2965 2966 2967 2968 2969 2970 2971 2972
	if (rc != MIGRATEPAGE_SUCCESS) {
		if (atomic_written)
			mutex_unlock(&fi->inmem_lock);
		return rc;
	}

	if (atomic_written) {
		struct inmem_pages *cur;
		list_for_each_entry(cur, &fi->inmem_pages, list)
			if (cur->page == page) {
				cur->page = newpage;
				break;
			}
		mutex_unlock(&fi->inmem_lock);
		put_page(page);
		get_page(newpage);
	}

2973 2974 2975 2976
	if (PagePrivate(page)) {
		f2fs_set_page_private(newpage, page_private(page));
		f2fs_clear_page_private(page);
	}
2977

2978 2979 2980 2981
	if (mode != MIGRATE_SYNC_NO_COPY)
		migrate_page_copy(newpage, page);
	else
		migrate_page_states(newpage, page);
2982 2983 2984 2985 2986

	return MIGRATEPAGE_SUCCESS;
}
#endif

J
Jaegeuk Kim 已提交
2987 2988 2989 2990 2991 2992 2993 2994 2995 2996 2997 2998 2999 3000 3001 3002 3003 3004 3005 3006 3007 3008 3009 3010 3011 3012 3013 3014 3015 3016 3017 3018 3019 3020 3021 3022 3023 3024 3025 3026 3027 3028 3029 3030 3031 3032 3033 3034 3035 3036 3037 3038 3039 3040 3041 3042 3043 3044 3045 3046 3047 3048 3049 3050 3051 3052 3053 3054 3055 3056 3057 3058 3059 3060 3061 3062 3063 3064 3065 3066 3067 3068 3069 3070 3071 3072 3073 3074 3075 3076 3077 3078 3079 3080 3081 3082 3083 3084 3085 3086 3087 3088 3089 3090 3091 3092 3093 3094 3095 3096 3097 3098 3099 3100 3101 3102 3103 3104 3105 3106
#ifdef CONFIG_SWAP
/* Copied from generic_swapfile_activate() to check any holes */
static int check_swap_activate(struct file *swap_file, unsigned int max)
{
	struct address_space *mapping = swap_file->f_mapping;
	struct inode *inode = mapping->host;
	unsigned blocks_per_page;
	unsigned long page_no;
	unsigned blkbits;
	sector_t probe_block;
	sector_t last_block;
	sector_t lowest_block = -1;
	sector_t highest_block = 0;

	blkbits = inode->i_blkbits;
	blocks_per_page = PAGE_SIZE >> blkbits;

	/*
	 * Map all the blocks into the extent list.  This code doesn't try
	 * to be very smart.
	 */
	probe_block = 0;
	page_no = 0;
	last_block = i_size_read(inode) >> blkbits;
	while ((probe_block + blocks_per_page) <= last_block && page_no < max) {
		unsigned block_in_page;
		sector_t first_block;

		cond_resched();

		first_block = bmap(inode, probe_block);
		if (first_block == 0)
			goto bad_bmap;

		/*
		 * It must be PAGE_SIZE aligned on-disk
		 */
		if (first_block & (blocks_per_page - 1)) {
			probe_block++;
			goto reprobe;
		}

		for (block_in_page = 1; block_in_page < blocks_per_page;
					block_in_page++) {
			sector_t block;

			block = bmap(inode, probe_block + block_in_page);
			if (block == 0)
				goto bad_bmap;
			if (block != first_block + block_in_page) {
				/* Discontiguity */
				probe_block++;
				goto reprobe;
			}
		}

		first_block >>= (PAGE_SHIFT - blkbits);
		if (page_no) {	/* exclude the header page */
			if (first_block < lowest_block)
				lowest_block = first_block;
			if (first_block > highest_block)
				highest_block = first_block;
		}

		page_no++;
		probe_block += blocks_per_page;
reprobe:
		continue;
	}
	return 0;

bad_bmap:
	pr_err("swapon: swapfile has holes\n");
	return -EINVAL;
}

static int f2fs_swap_activate(struct swap_info_struct *sis, struct file *file,
				sector_t *span)
{
	struct inode *inode = file_inode(file);
	int ret;

	if (!S_ISREG(inode->i_mode))
		return -EINVAL;

	if (f2fs_readonly(F2FS_I_SB(inode)->sb))
		return -EROFS;

	ret = f2fs_convert_inline_inode(inode);
	if (ret)
		return ret;

	ret = check_swap_activate(file, sis->max);
	if (ret)
		return ret;

	set_inode_flag(inode, FI_PIN_FILE);
	f2fs_precache_extents(inode);
	f2fs_update_time(F2FS_I_SB(inode), REQ_TIME);
	return 0;
}

static void f2fs_swap_deactivate(struct file *file)
{
	struct inode *inode = file_inode(file);

	clear_inode_flag(inode, FI_PIN_FILE);
}
#else
static int f2fs_swap_activate(struct swap_info_struct *sis, struct file *file,
				sector_t *span)
{
	return -EOPNOTSUPP;
}

static void f2fs_swap_deactivate(struct file *file)
{
}
#endif

3107 3108 3109 3110 3111 3112
const struct address_space_operations f2fs_dblock_aops = {
	.readpage	= f2fs_read_data_page,
	.readpages	= f2fs_read_data_pages,
	.writepage	= f2fs_write_data_page,
	.writepages	= f2fs_write_data_pages,
	.write_begin	= f2fs_write_begin,
3113
	.write_end	= f2fs_write_end,
3114
	.set_page_dirty	= f2fs_set_data_page_dirty,
3115 3116
	.invalidatepage	= f2fs_invalidate_page,
	.releasepage	= f2fs_release_page,
3117
	.direct_IO	= f2fs_direct_IO,
J
Jaegeuk Kim 已提交
3118
	.bmap		= f2fs_bmap,
J
Jaegeuk Kim 已提交
3119 3120
	.swap_activate  = f2fs_swap_activate,
	.swap_deactivate = f2fs_swap_deactivate,
3121 3122 3123
#ifdef CONFIG_MIGRATION
	.migratepage    = f2fs_migrate_page,
#endif
3124
};
3125

M
Matthew Wilcox 已提交
3126
void f2fs_clear_page_cache_dirty_tag(struct page *page)
3127 3128 3129 3130 3131
{
	struct address_space *mapping = page_mapping(page);
	unsigned long flags;

	xa_lock_irqsave(&mapping->i_pages, flags);
M
Matthew Wilcox 已提交
3132
	__xa_clear_mark(&mapping->i_pages, page_index(page),
3133 3134 3135 3136
						PAGECACHE_TAG_DIRTY);
	xa_unlock_irqrestore(&mapping->i_pages, flags);
}

3137 3138 3139 3140 3141 3142 3143 3144 3145 3146 3147 3148 3149 3150 3151 3152 3153 3154 3155 3156 3157 3158 3159
int __init f2fs_init_post_read_processing(void)
{
	bio_post_read_ctx_cache = KMEM_CACHE(bio_post_read_ctx, 0);
	if (!bio_post_read_ctx_cache)
		goto fail;
	bio_post_read_ctx_pool =
		mempool_create_slab_pool(NUM_PREALLOC_POST_READ_CTXS,
					 bio_post_read_ctx_cache);
	if (!bio_post_read_ctx_pool)
		goto fail_free_cache;
	return 0;

fail_free_cache:
	kmem_cache_destroy(bio_post_read_ctx_cache);
fail:
	return -ENOMEM;
}

void __exit f2fs_destroy_post_read_processing(void)
{
	mempool_destroy(bio_post_read_ctx_pool);
	kmem_cache_destroy(bio_post_read_ctx_cache);
}