data.c 75.4 KB
Newer Older
C
Chao Yu 已提交
1
// SPDX-License-Identifier: GPL-2.0
J
Jaegeuk Kim 已提交
2
/*
3 4 5 6 7 8 9 10 11 12 13
 * fs/f2fs/data.c
 *
 * Copyright (c) 2012 Samsung Electronics Co., Ltd.
 *             http://www.samsung.com/
 */
#include <linux/fs.h>
#include <linux/f2fs_fs.h>
#include <linux/buffer_head.h>
#include <linux/mpage.h>
#include <linux/writeback.h>
#include <linux/backing-dev.h>
C
Chao Yu 已提交
14
#include <linux/pagevec.h>
15 16
#include <linux/blkdev.h>
#include <linux/bio.h>
J
Jaegeuk Kim 已提交
17
#include <linux/swap.h>
18
#include <linux/prefetch.h>
19
#include <linux/uio.h>
J
Jaegeuk Kim 已提交
20
#include <linux/cleancache.h>
21
#include <linux/sched/signal.h>
22 23 24 25

#include "f2fs.h"
#include "node.h"
#include "segment.h"
J
Jaegeuk Kim 已提交
26
#include "trace.h"
27
#include <trace/events/f2fs.h>
28

29 30 31 32 33
#define NUM_PREALLOC_POST_READ_CTXS	128

static struct kmem_cache *bio_post_read_ctx_cache;
static mempool_t *bio_post_read_ctx_pool;

34 35 36 37 38 39 40 41 42 43 44 45 46 47 48
static bool __is_cp_guaranteed(struct page *page)
{
	struct address_space *mapping = page->mapping;
	struct inode *inode;
	struct f2fs_sb_info *sbi;

	if (!mapping)
		return false;

	inode = mapping->host;
	sbi = F2FS_I_SB(inode);

	if (inode->i_ino == F2FS_META_INO(sbi) ||
			inode->i_ino ==  F2FS_NODE_INO(sbi) ||
			S_ISDIR(inode->i_mode) ||
49
			(S_ISREG(inode->i_mode) &&
50
			(f2fs_is_atomic_file(inode) || IS_NOQUOTA(inode))) ||
51 52 53 54 55
			is_cold_data(page))
		return true;
	return false;
}

56 57
static enum count_type __read_io_type(struct page *page)
{
J
Jaegeuk Kim 已提交
58
	struct address_space *mapping = page_file_mapping(page);
59 60 61 62 63 64 65 66 67 68 69 70 71 72

	if (mapping) {
		struct inode *inode = mapping->host;
		struct f2fs_sb_info *sbi = F2FS_I_SB(inode);

		if (inode->i_ino == F2FS_META_INO(sbi))
			return F2FS_RD_META;

		if (inode->i_ino == F2FS_NODE_INO(sbi))
			return F2FS_RD_NODE;
	}
	return F2FS_RD_DATA;
}

73 74 75 76 77 78 79 80 81 82 83 84 85 86
/* postprocessing steps for read bios */
enum bio_post_read_step {
	STEP_INITIAL = 0,
	STEP_DECRYPT,
};

struct bio_post_read_ctx {
	struct bio *bio;
	struct work_struct work;
	unsigned int cur_step;
	unsigned int enabled_steps;
};

static void __read_end_io(struct bio *bio)
87
{
88 89
	struct page *page;
	struct bio_vec *bv;
90
	struct bvec_iter_all iter_all;
91

92
	bio_for_each_segment_all(bv, bio, iter_all) {
93 94 95 96 97
		page = bv->bv_page;

		/* PG_error was set if any post_read step failed */
		if (bio->bi_status || PageError(page)) {
			ClearPageUptodate(page);
98 99
			/* will re-read again later */
			ClearPageError(page);
100 101 102
		} else {
			SetPageUptodate(page);
		}
103
		dec_page_count(F2FS_P_SB(page), __read_io_type(page));
104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145
		unlock_page(page);
	}
	if (bio->bi_private)
		mempool_free(bio->bi_private, bio_post_read_ctx_pool);
	bio_put(bio);
}

static void bio_post_read_processing(struct bio_post_read_ctx *ctx);

static void decrypt_work(struct work_struct *work)
{
	struct bio_post_read_ctx *ctx =
		container_of(work, struct bio_post_read_ctx, work);

	fscrypt_decrypt_bio(ctx->bio);

	bio_post_read_processing(ctx);
}

static void bio_post_read_processing(struct bio_post_read_ctx *ctx)
{
	switch (++ctx->cur_step) {
	case STEP_DECRYPT:
		if (ctx->enabled_steps & (1 << STEP_DECRYPT)) {
			INIT_WORK(&ctx->work, decrypt_work);
			fscrypt_enqueue_decrypt_work(&ctx->work);
			return;
		}
		ctx->cur_step++;
		/* fall-through */
	default:
		__read_end_io(ctx->bio);
	}
}

static bool f2fs_bio_post_read_required(struct bio *bio)
{
	return bio->bi_private && !bio->bi_status;
}

static void f2fs_read_end_io(struct bio *bio)
{
146 147 148
	if (time_to_inject(F2FS_P_SB(bio_first_page_all(bio)),
						FAULT_READ_IO)) {
		f2fs_show_injection_info(FAULT_READ_IO);
149
		bio->bi_status = BLK_STS_IOERR;
150
	}
C
Chao Yu 已提交
151

152 153
	if (f2fs_bio_post_read_required(bio)) {
		struct bio_post_read_ctx *ctx = bio->bi_private;
J
Jaegeuk Kim 已提交
154

155 156 157
		ctx->cur_step = STEP_INITIAL;
		bio_post_read_processing(ctx);
		return;
J
Jaegeuk Kim 已提交
158
	}
159 160

	__read_end_io(bio);
J
Jaegeuk Kim 已提交
161 162
}

163
static void f2fs_write_end_io(struct bio *bio)
164
{
165
	struct f2fs_sb_info *sbi = bio->bi_private;
166
	struct bio_vec *bvec;
167
	struct bvec_iter_all iter_all;
168

169 170 171 172 173
	if (time_to_inject(sbi, FAULT_WRITE_IO)) {
		f2fs_show_injection_info(FAULT_WRITE_IO);
		bio->bi_status = BLK_STS_IOERR;
	}

174
	bio_for_each_segment_all(bvec, bio, iter_all) {
175
		struct page *page = bvec->bv_page;
176
		enum count_type type = WB_DATA_TYPE(page);
177

178 179 180 181 182 183
		if (IS_DUMMY_WRITTEN_PAGE(page)) {
			set_page_private(page, (unsigned long)NULL);
			ClearPagePrivate(page);
			unlock_page(page);
			mempool_free(page, sbi->write_io_dummy);

184
			if (unlikely(bio->bi_status))
185 186 187 188
				f2fs_stop_checkpoint(sbi, true);
			continue;
		}

189
		fscrypt_finalize_bounce_page(&page);
190

191
		if (unlikely(bio->bi_status)) {
192
			mapping_set_error(page->mapping, -EIO);
193 194
			if (type == F2FS_WB_CP_DATA)
				f2fs_stop_checkpoint(sbi, true);
195
		}
196 197 198 199

		f2fs_bug_on(sbi, page->mapping == NODE_MAPPING(sbi) &&
					page->index != nid_of_node(page));

200
		dec_page_count(sbi, type);
201 202
		if (f2fs_in_warm_node_list(sbi, page))
			f2fs_del_fsync_node_entry(sbi, page);
203
		clear_cold_data(page);
204
		end_page_writeback(page);
205
	}
206
	if (!get_pages(sbi, F2FS_WB_CP_DATA) &&
207
				wq_has_sleeper(&sbi->cp_wait))
208 209 210 211 212
		wake_up(&sbi->cp_wait);

	bio_put(bio);
}

J
Jaegeuk Kim 已提交
213 214 215 216 217 218 219 220 221
/*
 * Return true, if pre_bio's bdev is same as its target device.
 */
struct block_device *f2fs_target_device(struct f2fs_sb_info *sbi,
				block_t blk_addr, struct bio *bio)
{
	struct block_device *bdev = sbi->sb->s_bdev;
	int i;

222 223 224 225 226 227 228 229
	if (f2fs_is_multi_device(sbi)) {
		for (i = 0; i < sbi->s_ndevs; i++) {
			if (FDEV(i).start_blk <= blk_addr &&
			    FDEV(i).end_blk >= blk_addr) {
				blk_addr -= FDEV(i).start_blk;
				bdev = FDEV(i).bdev;
				break;
			}
J
Jaegeuk Kim 已提交
230 231 232
		}
	}
	if (bio) {
233
		bio_set_dev(bio, bdev);
J
Jaegeuk Kim 已提交
234 235 236 237 238 239 240 241 242
		bio->bi_iter.bi_sector = SECTOR_FROM_BLOCK(blk_addr);
	}
	return bdev;
}

int f2fs_target_device_index(struct f2fs_sb_info *sbi, block_t blkaddr)
{
	int i;

243 244 245
	if (!f2fs_is_multi_device(sbi))
		return 0;

J
Jaegeuk Kim 已提交
246 247 248 249 250 251 252 253 254
	for (i = 0; i < sbi->s_ndevs; i++)
		if (FDEV(i).start_blk <= blkaddr && FDEV(i).end_blk >= blkaddr)
			return i;
	return 0;
}

static bool __same_bdev(struct f2fs_sb_info *sbi,
				block_t blk_addr, struct bio *bio)
{
255 256
	struct block_device *b = f2fs_target_device(sbi, blk_addr, NULL);
	return bio->bi_disk == b->bd_disk && bio->bi_partno == b->bd_partno;
J
Jaegeuk Kim 已提交
257 258
}

259 260 261 262
/*
 * Low-level block read/write IO operations.
 */
static struct bio *__bio_alloc(struct f2fs_sb_info *sbi, block_t blk_addr,
263
				struct writeback_control *wbc,
264 265
				int npages, bool is_read,
				enum page_type type, enum temp_type temp)
266 267 268
{
	struct bio *bio;

269
	bio = f2fs_bio_alloc(sbi, npages, true);
270

J
Jaegeuk Kim 已提交
271
	f2fs_target_device(sbi, blk_addr, bio);
272 273 274 275 276 277
	if (is_read) {
		bio->bi_end_io = f2fs_read_end_io;
		bio->bi_private = NULL;
	} else {
		bio->bi_end_io = f2fs_write_end_io;
		bio->bi_private = sbi;
C
Chao Yu 已提交
278
		bio->bi_write_hint = f2fs_io_type_to_rw_hint(sbi, type, temp);
279
	}
280 281
	if (wbc)
		wbc_init_bio(wbc, bio);
282 283 284 285

	return bio;
}

286 287
static inline void __submit_bio(struct f2fs_sb_info *sbi,
				struct bio *bio, enum page_type type)
288
{
289
	if (!is_read_io(bio_op(bio))) {
290 291 292 293 294
		unsigned int start;

		if (type != DATA && type != NODE)
			goto submit_io;

295
		if (test_opt(sbi, LFS) && current->plug)
296 297
			blk_finish_plug(current->plug);

298 299 300 301 302 303 304 305 306 307
		start = bio->bi_iter.bi_size >> F2FS_BLKSIZE_BITS;
		start %= F2FS_IO_SIZE(sbi);

		if (start == 0)
			goto submit_io;

		/* fill dummy pages */
		for (; start < F2FS_IO_SIZE(sbi); start++) {
			struct page *page =
				mempool_alloc(sbi->write_io_dummy,
308
					      GFP_NOIO | __GFP_NOFAIL);
309 310
			f2fs_bug_on(sbi, !page);

311
			zero_user_segment(page, 0, PAGE_SIZE);
312 313 314 315 316 317 318 319 320 321 322 323
			SetPagePrivate(page);
			set_page_private(page, (unsigned long)DUMMY_WRITTEN_PAGE);
			lock_page(page);
			if (bio_add_page(bio, page, PAGE_SIZE, 0) < PAGE_SIZE)
				f2fs_bug_on(sbi, 1);
		}
		/*
		 * In the NODE case, we lose next block address chain. So, we
		 * need to do checkpoint in f2fs_sync_file.
		 */
		if (type == NODE)
			set_sbi_flag(sbi, SBI_NEED_CP);
J
Jaegeuk Kim 已提交
324
	}
325
submit_io:
J
Jaegeuk Kim 已提交
326 327 328 329
	if (is_read_io(bio_op(bio)))
		trace_f2fs_submit_read_bio(sbi->sb, type, bio);
	else
		trace_f2fs_submit_write_bio(sbi->sb, type, bio);
330
	submit_bio(bio);
331 332
}

J
Jaegeuk Kim 已提交
333
static void __submit_merged_bio(struct f2fs_bio_info *io)
334
{
J
Jaegeuk Kim 已提交
335
	struct f2fs_io_info *fio = &io->fio;
336 337 338 339

	if (!io->bio)
		return;

J
Jaegeuk Kim 已提交
340 341
	bio_set_op_attrs(io->bio, fio->op, fio->op_flags);

M
Mike Christie 已提交
342
	if (is_read_io(fio->op))
J
Jaegeuk Kim 已提交
343
		trace_f2fs_prepare_read_bio(io->sbi->sb, fio->type, io->bio);
344
	else
J
Jaegeuk Kim 已提交
345
		trace_f2fs_prepare_write_bio(io->sbi->sb, fio->type, io->bio);
M
Mike Christie 已提交
346

347
	__submit_bio(io->sbi, io->bio, fio->type);
348 349 350
	io->bio = NULL;
}

C
Chao Yu 已提交
351
static bool __has_merged_page(struct bio *bio, struct inode *inode,
352
						struct page *page, nid_t ino)
C
Chao Yu 已提交
353 354 355
{
	struct bio_vec *bvec;
	struct page *target;
356
	struct bvec_iter_all iter_all;
C
Chao Yu 已提交
357

C
Chao Yu 已提交
358
	if (!bio)
C
Chao Yu 已提交
359
		return false;
360

361
	if (!inode && !page && !ino)
362
		return true;
C
Chao Yu 已提交
363

C
Chao Yu 已提交
364
	bio_for_each_segment_all(bvec, bio, iter_all) {
C
Chao Yu 已提交
365

366 367 368
		target = bvec->bv_page;
		if (fscrypt_is_bounce_page(target))
			target = fscrypt_pagecache_page(target);
C
Chao Yu 已提交
369

370 371
		if (inode && inode == target->mapping->host)
			return true;
372 373
		if (page && page == target)
			return true;
374
		if (ino && ino == ino_of_node(target))
C
Chao Yu 已提交
375 376 377 378 379 380
			return true;
	}

	return false;
}

381
static void __f2fs_submit_merged_write(struct f2fs_sb_info *sbi,
J
Jaegeuk Kim 已提交
382
				enum page_type type, enum temp_type temp)
383 384
{
	enum page_type btype = PAGE_TYPE_OF_BIO(type);
J
Jaegeuk Kim 已提交
385
	struct f2fs_bio_info *io = sbi->write_io[btype] + temp;
386

387
	down_write(&io->io_rwsem);
J
Jaegeuk Kim 已提交
388 389 390 391

	/* change META to META_FLUSH in the checkpoint procedure */
	if (type >= META_FLUSH) {
		io->fio.type = META_FLUSH;
M
Mike Christie 已提交
392
		io->fio.op = REQ_OP_WRITE;
393
		io->fio.op_flags = REQ_META | REQ_PRIO | REQ_SYNC;
394
		if (!test_opt(sbi, NOBARRIER))
395
			io->fio.op_flags |= REQ_PREFLUSH | REQ_FUA;
J
Jaegeuk Kim 已提交
396 397
	}
	__submit_merged_bio(io);
398
	up_write(&io->io_rwsem);
399 400
}

J
Jaegeuk Kim 已提交
401
static void __submit_merged_write_cond(struct f2fs_sb_info *sbi,
402 403
				struct inode *inode, struct page *page,
				nid_t ino, enum page_type type, bool force)
404
{
J
Jaegeuk Kim 已提交
405
	enum temp_type temp;
406
	bool ret = true;
J
Jaegeuk Kim 已提交
407 408

	for (temp = HOT; temp < NR_TEMP_TYPE; temp++) {
409 410 411
		if (!force)	{
			enum page_type btype = PAGE_TYPE_OF_BIO(type);
			struct f2fs_bio_info *io = sbi->write_io[btype] + temp;
J
Jaegeuk Kim 已提交
412

413
			down_read(&io->io_rwsem);
C
Chao Yu 已提交
414
			ret = __has_merged_page(io->bio, inode, page, ino);
415 416 417 418
			up_read(&io->io_rwsem);
		}
		if (ret)
			__f2fs_submit_merged_write(sbi, type, temp);
J
Jaegeuk Kim 已提交
419 420 421 422 423

		/* TODO: use HOT temp only for meta pages now. */
		if (type >= META)
			break;
	}
424 425
}

426
void f2fs_submit_merged_write(struct f2fs_sb_info *sbi, enum page_type type)
427
{
428
	__submit_merged_write_cond(sbi, NULL, NULL, 0, type, true);
429 430
}

431
void f2fs_submit_merged_write_cond(struct f2fs_sb_info *sbi,
432 433
				struct inode *inode, struct page *page,
				nid_t ino, enum page_type type)
434
{
435
	__submit_merged_write_cond(sbi, inode, page, ino, type, false);
436 437
}

438
void f2fs_flush_merged_writes(struct f2fs_sb_info *sbi)
439
{
440 441 442
	f2fs_submit_merged_write(sbi, DATA);
	f2fs_submit_merged_write(sbi, NODE);
	f2fs_submit_merged_write(sbi, META);
443 444
}

445 446
/*
 * Fill the locked page with data located in the block address.
447
 * A caller needs to unlock the page on failure.
448
 */
449
int f2fs_submit_page_bio(struct f2fs_io_info *fio)
450 451
{
	struct bio *bio;
452 453
	struct page *page = fio->encrypted_page ?
			fio->encrypted_page : fio->page;
454

455
	if (!f2fs_is_valid_blkaddr(fio->sbi, fio->new_blkaddr,
C
Chao Yu 已提交
456 457
			fio->is_por ? META_POR : (__is_meta_io(fio) ?
			META_GENERIC : DATA_GENERIC_ENHANCE)))
458
		return -EFSCORRUPTED;
459

460
	trace_f2fs_submit_page_bio(page, fio);
461
	f2fs_trace_ios(fio, 0);
462 463

	/* Allocate a new bio */
464
	bio = __bio_alloc(fio->sbi, fio->new_blkaddr, fio->io_wbc,
465
				1, is_read_io(fio->op), fio->type, fio->temp);
466

467
	if (bio_add_page(bio, page, PAGE_SIZE, 0) < PAGE_SIZE) {
468 469 470
		bio_put(bio);
		return -EFAULT;
	}
471 472

	if (fio->io_wbc && !is_read_io(fio->op))
473
		wbc_account_cgroup_owner(fio->io_wbc, page, PAGE_SIZE);
474

M
Mike Christie 已提交
475
	bio_set_op_attrs(bio, fio->op, fio->op_flags);
476

477 478
	inc_page_count(fio->sbi, is_read_io(fio->op) ?
			__read_io_type(page): WB_DATA_TYPE(fio->page));
C
Chao Yu 已提交
479 480

	__submit_bio(fio->sbi, bio, fio->type);
481 482 483
	return 0;
}

484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505
static bool page_is_mergeable(struct f2fs_sb_info *sbi, struct bio *bio,
				block_t last_blkaddr, block_t cur_blkaddr)
{
	if (last_blkaddr + 1 != cur_blkaddr)
		return false;
	return __same_bdev(sbi, cur_blkaddr, bio);
}

static bool io_type_is_mergeable(struct f2fs_bio_info *io,
						struct f2fs_io_info *fio)
{
	if (io->fio.op != fio->op)
		return false;
	return io->fio.op_flags == fio->op_flags;
}

static bool io_is_mergeable(struct f2fs_sb_info *sbi, struct bio *bio,
					struct f2fs_bio_info *io,
					struct f2fs_io_info *fio,
					block_t last_blkaddr,
					block_t cur_blkaddr)
{
506 507 508 509 510 511 512 513 514 515
	if (F2FS_IO_ALIGNED(sbi) && (fio->type == DATA || fio->type == NODE)) {
		unsigned int filled_blocks =
				F2FS_BYTES_TO_BLK(bio->bi_iter.bi_size);
		unsigned int io_size = F2FS_IO_SIZE(sbi);
		unsigned int left_vecs = bio->bi_max_vecs - bio->bi_vcnt;

		/* IOs in bio is aligned and left space of vectors is not enough */
		if (!(filled_blocks % io_size) && left_vecs < io_size)
			return false;
	}
516 517 518 519 520
	if (!page_is_mergeable(sbi, bio, last_blkaddr, cur_blkaddr))
		return false;
	return io_type_is_mergeable(io, fio);
}

C
Chao Yu 已提交
521 522 523 524 525 526 527 528
int f2fs_merge_page_bio(struct f2fs_io_info *fio)
{
	struct bio *bio = *fio->bio;
	struct page *page = fio->encrypted_page ?
			fio->encrypted_page : fio->page;

	if (!f2fs_is_valid_blkaddr(fio->sbi, fio->new_blkaddr,
			__is_meta_io(fio) ? META_GENERIC : DATA_GENERIC))
529
		return -EFSCORRUPTED;
C
Chao Yu 已提交
530 531 532 533

	trace_f2fs_submit_page_bio(page, fio);
	f2fs_trace_ios(fio, 0);

534 535
	if (bio && !page_is_mergeable(fio->sbi, bio, *fio->last_block,
						fio->new_blkaddr)) {
C
Chao Yu 已提交
536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552
		__submit_bio(fio->sbi, bio, fio->type);
		bio = NULL;
	}
alloc_new:
	if (!bio) {
		bio = __bio_alloc(fio->sbi, fio->new_blkaddr, fio->io_wbc,
				BIO_MAX_PAGES, false, fio->type, fio->temp);
		bio_set_op_attrs(bio, fio->op, fio->op_flags);
	}

	if (bio_add_page(bio, page, PAGE_SIZE, 0) < PAGE_SIZE) {
		__submit_bio(fio->sbi, bio, fio->type);
		bio = NULL;
		goto alloc_new;
	}

	if (fio->io_wbc)
553
		wbc_account_cgroup_owner(fio->io_wbc, page, PAGE_SIZE);
C
Chao Yu 已提交
554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575

	inc_page_count(fio->sbi, WB_DATA_TYPE(page));

	*fio->last_block = fio->new_blkaddr;
	*fio->bio = bio;

	return 0;
}

static void f2fs_submit_ipu_bio(struct f2fs_sb_info *sbi, struct bio **bio,
							struct page *page)
{
	if (!bio)
		return;

	if (!__has_merged_page(*bio, NULL, page, 0))
		return;

	__submit_bio(sbi, *bio, DATA);
	*bio = NULL;
}

576
void f2fs_submit_page_write(struct f2fs_io_info *fio)
577
{
578
	struct f2fs_sb_info *sbi = fio->sbi;
J
Jaegeuk Kim 已提交
579
	enum page_type btype = PAGE_TYPE_OF_BIO(fio->type);
J
Jaegeuk Kim 已提交
580
	struct f2fs_bio_info *io = sbi->write_io[btype] + fio->temp;
581
	struct page *bio_page;
582

583
	f2fs_bug_on(sbi, is_read_io(fio->op));
584

585 586 587 588 589 590
	down_write(&io->io_rwsem);
next:
	if (fio->in_list) {
		spin_lock(&io->io_lock);
		if (list_empty(&io->io_list)) {
			spin_unlock(&io->io_lock);
591
			goto out;
592 593 594 595 596 597
		}
		fio = list_first_entry(&io->io_list,
						struct f2fs_io_info, list);
		list_del(&fio->list);
		spin_unlock(&io->io_lock);
	}
598

C
Chao Yu 已提交
599
	verify_fio_blkaddr(fio);
600

601 602
	bio_page = fio->encrypted_page ? fio->encrypted_page : fio->page;

603 604
	/* set submitted = true as a return value */
	fio->submitted = true;
605

606
	inc_page_count(sbi, WB_DATA_TYPE(bio_page));
607

608 609
	if (io->bio && !io_is_mergeable(sbi, io->bio, io, fio,
			io->last_block_in_bio, fio->new_blkaddr))
J
Jaegeuk Kim 已提交
610
		__submit_merged_bio(io);
611 612
alloc_new:
	if (io->bio == NULL) {
613 614
		if ((fio->type == DATA || fio->type == NODE) &&
				fio->new_blkaddr & F2FS_IO_SIZE_MASK(sbi)) {
615
			dec_page_count(sbi, WB_DATA_TYPE(bio_page));
616 617
			fio->retry = true;
			goto skip;
618
		}
619
		io->bio = __bio_alloc(sbi, fio->new_blkaddr, fio->io_wbc,
620 621
						BIO_MAX_PAGES, false,
						fio->type, fio->temp);
J
Jaegeuk Kim 已提交
622
		io->fio = *fio;
623 624
	}

J
Jaegeuk Kim 已提交
625
	if (bio_add_page(io->bio, bio_page, PAGE_SIZE, 0) < PAGE_SIZE) {
J
Jaegeuk Kim 已提交
626
		__submit_merged_bio(io);
627 628 629
		goto alloc_new;
	}

630
	if (fio->io_wbc)
631
		wbc_account_cgroup_owner(fio->io_wbc, bio_page, PAGE_SIZE);
632

633
	io->last_block_in_bio = fio->new_blkaddr;
634
	f2fs_trace_ios(fio, 0);
635 636

	trace_f2fs_submit_page_write(fio->page, fio);
637
skip:
638 639
	if (fio->in_list)
		goto next;
640
out:
D
Daniel Rosenberg 已提交
641 642
	if (is_sbi_flag_set(sbi, SBI_IS_SHUTDOWN) ||
				f2fs_is_checkpoint_ready(sbi))
J
Jaegeuk Kim 已提交
643
		__submit_merged_bio(io);
644
	up_write(&io->io_rwsem);
645 646
}

647
static struct bio *f2fs_grab_read_bio(struct inode *inode, block_t blkaddr,
648
					unsigned nr_pages, unsigned op_flag)
649 650 651
{
	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
	struct bio *bio;
652 653
	struct bio_post_read_ctx *ctx;
	unsigned int post_read_steps = 0;
654

655
	bio = f2fs_bio_alloc(sbi, min_t(int, nr_pages, BIO_MAX_PAGES), false);
656
	if (!bio)
657 658 659
		return ERR_PTR(-ENOMEM);
	f2fs_target_device(sbi, blkaddr, bio);
	bio->bi_end_io = f2fs_read_end_io;
660
	bio_set_op_attrs(bio, REQ_OP_READ, op_flag);
661

662 663 664 665 666 667 668 669 670 671 672 673 674
	if (f2fs_encrypted_file(inode))
		post_read_steps |= 1 << STEP_DECRYPT;
	if (post_read_steps) {
		ctx = mempool_alloc(bio_post_read_ctx_pool, GFP_NOFS);
		if (!ctx) {
			bio_put(bio);
			return ERR_PTR(-ENOMEM);
		}
		ctx->bio = bio;
		ctx->enabled_steps = post_read_steps;
		bio->bi_private = ctx;
	}

675 676 677 678 679 680 681
	return bio;
}

/* This can handle encryption stuffs */
static int f2fs_submit_page_read(struct inode *inode, struct page *page,
							block_t blkaddr)
{
C
Chao Yu 已提交
682 683
	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
	struct bio *bio;
684

C
Chao Yu 已提交
685
	bio = f2fs_grab_read_bio(inode, blkaddr, 1, 0);
686 687 688
	if (IS_ERR(bio))
		return PTR_ERR(bio);

689 690 691
	/* wait for GCed page writeback via META_MAPPING */
	f2fs_wait_on_block_writeback(inode, blkaddr);

692 693 694 695
	if (bio_add_page(bio, page, PAGE_SIZE, 0) < PAGE_SIZE) {
		bio_put(bio);
		return -EFAULT;
	}
696
	ClearPageError(page);
C
Chao Yu 已提交
697 698
	inc_page_count(sbi, F2FS_RD_DATA);
	__submit_bio(sbi, bio, DATA);
699 700 701
	return 0;
}

702 703 704 705
static void __set_data_blkaddr(struct dnode_of_data *dn)
{
	struct f2fs_node *rn = F2FS_NODE(dn->node_page);
	__le32 *addr_array;
706 707 708 709
	int base = 0;

	if (IS_INODE(dn->node_page) && f2fs_has_extra_attr(dn->inode))
		base = get_extra_isize(dn->inode);
710 711 712

	/* Get physical address of data block */
	addr_array = blkaddr_in_node(rn);
713
	addr_array[base + dn->ofs_in_node] = cpu_to_le32(dn->data_blkaddr);
714 715
}

J
Jaegeuk Kim 已提交
716
/*
717 718 719 720 721
 * Lock ordering for the change of data block address:
 * ->data_page
 *  ->node_page
 *    update block addresses in the node page
 */
C
Chao Yu 已提交
722
void f2fs_set_data_blkaddr(struct dnode_of_data *dn)
723
{
724
	f2fs_wait_on_page_writeback(dn->node_page, NODE, true, true);
725 726
	__set_data_blkaddr(dn);
	if (set_page_dirty(dn->node_page))
727
		dn->node_changed = true;
728 729
}

730 731 732
void f2fs_update_data_blkaddr(struct dnode_of_data *dn, block_t blkaddr)
{
	dn->data_blkaddr = blkaddr;
C
Chao Yu 已提交
733
	f2fs_set_data_blkaddr(dn);
734 735 736
	f2fs_update_extent_cache(dn);
}

737
/* dn->ofs_in_node will be returned with up-to-date last block pointer */
C
Chao Yu 已提交
738
int f2fs_reserve_new_blocks(struct dnode_of_data *dn, blkcnt_t count)
739
{
740
	struct f2fs_sb_info *sbi = F2FS_I_SB(dn->inode);
C
Chao Yu 已提交
741
	int err;
742

743 744 745
	if (!count)
		return 0;

746
	if (unlikely(is_inode_flag_set(dn->inode, FI_NO_ALLOC)))
747
		return -EPERM;
C
Chao Yu 已提交
748 749
	if (unlikely((err = inc_valid_block_count(sbi, dn->inode, &count))))
		return err;
750

751 752 753
	trace_f2fs_reserve_new_blocks(dn->inode, dn->nid,
						dn->ofs_in_node, count);

754
	f2fs_wait_on_page_writeback(dn->node_page, NODE, true, true);
755 756

	for (; count > 0; dn->ofs_in_node++) {
757 758
		block_t blkaddr = datablock_addr(dn->inode,
					dn->node_page, dn->ofs_in_node);
759 760 761 762 763 764 765 766 767
		if (blkaddr == NULL_ADDR) {
			dn->data_blkaddr = NEW_ADDR;
			__set_data_blkaddr(dn);
			count--;
		}
	}

	if (set_page_dirty(dn->node_page))
		dn->node_changed = true;
768 769 770
	return 0;
}

771
/* Should keep dn->ofs_in_node unchanged */
C
Chao Yu 已提交
772
int f2fs_reserve_new_block(struct dnode_of_data *dn)
773 774 775 776
{
	unsigned int ofs_in_node = dn->ofs_in_node;
	int ret;

C
Chao Yu 已提交
777
	ret = f2fs_reserve_new_blocks(dn, 1);
778 779 780 781
	dn->ofs_in_node = ofs_in_node;
	return ret;
}

782 783 784 785 786
int f2fs_reserve_block(struct dnode_of_data *dn, pgoff_t index)
{
	bool need_put = dn->inode_page ? false : true;
	int err;

C
Chao Yu 已提交
787
	err = f2fs_get_dnode_of_data(dn, index, ALLOC_NODE);
788 789
	if (err)
		return err;
790

791
	if (dn->data_blkaddr == NULL_ADDR)
C
Chao Yu 已提交
792
		err = f2fs_reserve_new_block(dn);
793
	if (err || need_put)
794 795 796 797
		f2fs_put_dnode(dn);
	return err;
}

798
int f2fs_get_block(struct dnode_of_data *dn, pgoff_t index)
799
{
800
	struct extent_info ei  = {0,0,0};
801
	struct inode *inode = dn->inode;
802

803 804 805
	if (f2fs_lookup_extent_cache(inode, index, &ei)) {
		dn->data_blkaddr = ei.blk + index - ei.fofs;
		return 0;
806
	}
807

808
	return f2fs_reserve_block(dn, index);
809 810
}

C
Chao Yu 已提交
811
struct page *f2fs_get_read_data_page(struct inode *inode, pgoff_t index,
M
Mike Christie 已提交
812
						int op_flags, bool for_write)
813 814 815 816
{
	struct address_space *mapping = inode->i_mapping;
	struct dnode_of_data dn;
	struct page *page;
817
	struct extent_info ei = {0,0,0};
818
	int err;
819

820
	page = f2fs_grab_cache_page(mapping, index, for_write);
821 822 823
	if (!page)
		return ERR_PTR(-ENOMEM);

C
Chao Yu 已提交
824 825
	if (f2fs_lookup_extent_cache(inode, index, &ei)) {
		dn.data_blkaddr = ei.blk + index - ei.fofs;
C
Chao Yu 已提交
826 827
		if (!f2fs_is_valid_blkaddr(F2FS_I_SB(inode), dn.data_blkaddr,
						DATA_GENERIC_ENHANCE_READ)) {
828
			err = -EFSCORRUPTED;
C
Chao Yu 已提交
829 830
			goto put_err;
		}
C
Chao Yu 已提交
831 832 833
		goto got_it;
	}

834
	set_new_dnode(&dn, inode, NULL, NULL, 0);
C
Chao Yu 已提交
835
	err = f2fs_get_dnode_of_data(&dn, index, LOOKUP_NODE);
836 837
	if (err)
		goto put_err;
838 839
	f2fs_put_dnode(&dn);

840
	if (unlikely(dn.data_blkaddr == NULL_ADDR)) {
841 842
		err = -ENOENT;
		goto put_err;
843
	}
C
Chao Yu 已提交
844 845 846 847
	if (dn.data_blkaddr != NEW_ADDR &&
			!f2fs_is_valid_blkaddr(F2FS_I_SB(inode),
						dn.data_blkaddr,
						DATA_GENERIC_ENHANCE)) {
848
		err = -EFSCORRUPTED;
C
Chao Yu 已提交
849 850
		goto put_err;
	}
C
Chao Yu 已提交
851
got_it:
852 853
	if (PageUptodate(page)) {
		unlock_page(page);
854
		return page;
855
	}
856

J
Jaegeuk Kim 已提交
857 858 859 860
	/*
	 * A new dentry page is allocated but not able to be written, since its
	 * new inode page couldn't be allocated due to -ENOSPC.
	 * In such the case, its blkaddr can be remained as NEW_ADDR.
C
Chao Yu 已提交
861 862
	 * see, f2fs_add_link -> f2fs_get_new_data_page ->
	 * f2fs_init_inode_metadata.
J
Jaegeuk Kim 已提交
863 864
	 */
	if (dn.data_blkaddr == NEW_ADDR) {
865
		zero_user_segment(page, 0, PAGE_SIZE);
866 867
		if (!PageUptodate(page))
			SetPageUptodate(page);
868
		unlock_page(page);
J
Jaegeuk Kim 已提交
869 870
		return page;
	}
871

872
	err = f2fs_submit_page_read(inode, page, dn.data_blkaddr);
873
	if (err)
874
		goto put_err;
875
	return page;
876 877 878 879

put_err:
	f2fs_put_page(page, 1);
	return ERR_PTR(err);
880 881
}

C
Chao Yu 已提交
882
struct page *f2fs_find_data_page(struct inode *inode, pgoff_t index)
883 884 885 886 887 888 889 890 891
{
	struct address_space *mapping = inode->i_mapping;
	struct page *page;

	page = find_get_page(mapping, index);
	if (page && PageUptodate(page))
		return page;
	f2fs_put_page(page, 0);

C
Chao Yu 已提交
892
	page = f2fs_get_read_data_page(inode, index, 0, false);
893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911
	if (IS_ERR(page))
		return page;

	if (PageUptodate(page))
		return page;

	wait_on_page_locked(page);
	if (unlikely(!PageUptodate(page))) {
		f2fs_put_page(page, 0);
		return ERR_PTR(-EIO);
	}
	return page;
}

/*
 * If it tries to access a hole, return an error.
 * Because, the callers, functions in dir.c and GC, should be able to know
 * whether this page exists or not.
 */
C
Chao Yu 已提交
912
struct page *f2fs_get_lock_data_page(struct inode *inode, pgoff_t index,
913
							bool for_write)
914 915 916 917
{
	struct address_space *mapping = inode->i_mapping;
	struct page *page;
repeat:
C
Chao Yu 已提交
918
	page = f2fs_get_read_data_page(inode, index, 0, for_write);
919 920
	if (IS_ERR(page))
		return page;
921

922
	/* wait for read completion */
923
	lock_page(page);
924
	if (unlikely(page->mapping != mapping)) {
925 926
		f2fs_put_page(page, 1);
		goto repeat;
927
	}
928 929 930 931
	if (unlikely(!PageUptodate(page))) {
		f2fs_put_page(page, 1);
		return ERR_PTR(-EIO);
	}
932 933 934
	return page;
}

J
Jaegeuk Kim 已提交
935
/*
936 937
 * Caller ensures that this data page is never allocated.
 * A new zero-filled data page is allocated in the page cache.
938
 *
C
Chao Yu 已提交
939 940
 * Also, caller should grab and release a rwsem by calling f2fs_lock_op() and
 * f2fs_unlock_op().
941 942
 * Note that, ipage is set only by make_empty_dir, and if any error occur,
 * ipage should be released by this function.
943
 */
C
Chao Yu 已提交
944
struct page *f2fs_get_new_data_page(struct inode *inode,
945
		struct page *ipage, pgoff_t index, bool new_i_size)
946 947 948 949 950
{
	struct address_space *mapping = inode->i_mapping;
	struct page *page;
	struct dnode_of_data dn;
	int err;
951

952
	page = f2fs_grab_cache_page(mapping, index, true);
953 954 955 956 957 958
	if (!page) {
		/*
		 * before exiting, we should make sure ipage will be released
		 * if any error occur.
		 */
		f2fs_put_page(ipage, 1);
959
		return ERR_PTR(-ENOMEM);
960
	}
961

962
	set_new_dnode(&dn, inode, ipage, NULL, 0);
963
	err = f2fs_reserve_block(&dn, index);
964 965
	if (err) {
		f2fs_put_page(page, 1);
966
		return ERR_PTR(err);
967
	}
968 969
	if (!ipage)
		f2fs_put_dnode(&dn);
970 971

	if (PageUptodate(page))
972
		goto got_it;
973 974

	if (dn.data_blkaddr == NEW_ADDR) {
975
		zero_user_segment(page, 0, PAGE_SIZE);
976 977
		if (!PageUptodate(page))
			SetPageUptodate(page);
978
	} else {
979
		f2fs_put_page(page, 1);
980

981 982
		/* if ipage exists, blkaddr should be NEW_ADDR */
		f2fs_bug_on(F2FS_I_SB(inode), ipage);
C
Chao Yu 已提交
983
		page = f2fs_get_lock_data_page(inode, index, true);
984
		if (IS_ERR(page))
985
			return page;
986
	}
987
got_it:
C
Chao Yu 已提交
988
	if (new_i_size && i_size_read(inode) <
989
				((loff_t)(index + 1) << PAGE_SHIFT))
990
		f2fs_i_size_write(inode, ((loff_t)(index + 1) << PAGE_SHIFT));
991 992 993
	return page;
}

994
static int __allocate_data_block(struct dnode_of_data *dn, int seg_type)
995
{
996
	struct f2fs_sb_info *sbi = F2FS_I_SB(dn->inode);
997 998
	struct f2fs_summary sum;
	struct node_info ni;
999
	block_t old_blkaddr;
1000
	blkcnt_t count = 1;
C
Chao Yu 已提交
1001
	int err;
1002

1003
	if (unlikely(is_inode_flag_set(dn->inode, FI_NO_ALLOC)))
1004
		return -EPERM;
1005

1006 1007 1008 1009
	err = f2fs_get_node_info(sbi, dn->nid, &ni);
	if (err)
		return err;

1010 1011
	dn->data_blkaddr = datablock_addr(dn->inode,
				dn->node_page, dn->ofs_in_node);
1012
	if (dn->data_blkaddr != NULL_ADDR)
1013 1014
		goto alloc;

C
Chao Yu 已提交
1015 1016
	if (unlikely((err = inc_valid_block_count(sbi, dn->inode, &count))))
		return err;
1017

1018
alloc:
1019
	set_summary(&sum, dn->nid, dn->ofs_in_node, ni.version);
1020 1021
	old_blkaddr = dn->data_blkaddr;
	f2fs_allocate_data_block(sbi, NULL, old_blkaddr, &dn->data_blkaddr,
1022
					&sum, seg_type, NULL, false);
1023 1024 1025
	if (GET_SEGNO(sbi, old_blkaddr) != NULL_SEGNO)
		invalidate_mapping_pages(META_MAPPING(sbi),
					old_blkaddr, old_blkaddr);
C
Chao Yu 已提交
1026
	f2fs_set_data_blkaddr(dn);
1027

1028 1029 1030 1031
	/*
	 * i_size will be updated by direct_IO. Otherwise, we'll get stale
	 * data from unwritten block via dio_read.
	 */
1032 1033 1034
	return 0;
}

1035
int f2fs_preallocate_blocks(struct kiocb *iocb, struct iov_iter *from)
1036
{
1037
	struct inode *inode = file_inode(iocb->ki_filp);
C
Chao Yu 已提交
1038
	struct f2fs_map_blocks map;
1039
	int flag;
1040
	int err = 0;
1041
	bool direct_io = iocb->ki_flags & IOCB_DIRECT;
1042

1043
	/* convert inline data for Direct I/O*/
1044
	if (direct_io) {
1045 1046 1047 1048 1049
		err = f2fs_convert_inline_inode(inode);
		if (err)
			return err;
	}

C
Chao Yu 已提交
1050 1051 1052
	if (direct_io && allow_outplace_dio(inode, iocb, from))
		return 0;

1053 1054 1055
	if (is_inode_flag_set(inode, FI_NO_PREALLOC))
		return 0;

1056
	map.m_lblk = F2FS_BLK_ALIGN(iocb->ki_pos);
1057 1058 1059 1060 1061 1062
	map.m_len = F2FS_BYTES_TO_BLK(iocb->ki_pos + iov_iter_count(from));
	if (map.m_len > map.m_lblk)
		map.m_len -= map.m_lblk;
	else
		map.m_len = 0;

1063
	map.m_next_pgofs = NULL;
1064
	map.m_next_extent = NULL;
1065
	map.m_seg_type = NO_CHECK_TYPE;
C
Chao Yu 已提交
1066
	map.m_may_create = true;
1067

1068
	if (direct_io) {
C
Chao Yu 已提交
1069
		map.m_seg_type = f2fs_rw_hint_to_seg_type(iocb->ki_hint);
1070
		flag = f2fs_force_buffered_io(inode, iocb, from) ?
1071 1072 1073
					F2FS_GET_BLOCK_PRE_AIO :
					F2FS_GET_BLOCK_PRE_DIO;
		goto map_blocks;
1074
	}
C
Chao Yu 已提交
1075
	if (iocb->ki_pos + iov_iter_count(from) > MAX_INLINE_DATA(inode)) {
1076 1077 1078
		err = f2fs_convert_inline_inode(inode);
		if (err)
			return err;
1079
	}
1080
	if (f2fs_has_inline_data(inode))
1081
		return err;
1082 1083 1084 1085 1086 1087 1088 1089 1090

	flag = F2FS_GET_BLOCK_PRE_AIO;

map_blocks:
	err = f2fs_map_blocks(inode, &map, 1, flag);
	if (map.m_len > 0 && err == -ENOSPC) {
		if (!direct_io)
			set_inode_flag(inode, FI_NO_PREALLOC);
		err = 0;
1091
	}
1092
	return err;
1093 1094
}

C
Chao Yu 已提交
1095
void __do_map_lock(struct f2fs_sb_info *sbi, int flag, bool lock)
1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109
{
	if (flag == F2FS_GET_BLOCK_PRE_AIO) {
		if (lock)
			down_read(&sbi->node_change);
		else
			up_read(&sbi->node_change);
	} else {
		if (lock)
			f2fs_lock_op(sbi);
		else
			f2fs_unlock_op(sbi);
	}
}

J
Jaegeuk Kim 已提交
1110
/*
J
Jaegeuk Kim 已提交
1111 1112
 * f2fs_map_blocks() now supported readahead/bmap/rw direct_IO with
 * f2fs_map_blocks structure.
C
Chao Yu 已提交
1113 1114 1115 1116 1117
 * If original data blocks are allocated, then give them to blockdev.
 * Otherwise,
 *     a. preallocate requested block addresses
 *     b. do not use extent cache for better performance
 *     c. give the block addresses to blockdev
1118
 */
C
Chao Yu 已提交
1119
int f2fs_map_blocks(struct inode *inode, struct f2fs_map_blocks *map,
C
Chao Yu 已提交
1120
						int create, int flag)
1121
{
J
Jaegeuk Kim 已提交
1122
	unsigned int maxblocks = map->m_len;
1123
	struct dnode_of_data dn;
1124
	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
C
Chao Yu 已提交
1125
	int mode = map->m_may_create ? ALLOC_NODE : LOOKUP_NODE;
1126
	pgoff_t pgofs, end_offset, end;
1127
	int err = 0, ofs = 1;
1128 1129
	unsigned int ofs_in_node, last_ofs_in_node;
	blkcnt_t prealloc;
1130
	struct extent_info ei = {0,0,0};
1131
	block_t blkaddr;
1132
	unsigned int start_pgofs;
1133

1134 1135 1136
	if (!maxblocks)
		return 0;

J
Jaegeuk Kim 已提交
1137 1138 1139 1140 1141
	map->m_len = 0;
	map->m_flags = 0;

	/* it only supports block size == page size */
	pgofs =	(pgoff_t)map->m_lblk;
1142
	end = pgofs + maxblocks;
1143

1144
	if (!create && f2fs_lookup_extent_cache(inode, pgofs, &ei)) {
1145 1146 1147 1148
		if (test_opt(sbi, LFS) && flag == F2FS_GET_BLOCK_DIO &&
							map->m_may_create)
			goto next_dnode;

J
Jaegeuk Kim 已提交
1149 1150 1151
		map->m_pblk = ei.blk + pgofs - ei.fofs;
		map->m_len = min((pgoff_t)maxblocks, ei.fofs + ei.len - pgofs);
		map->m_flags = F2FS_MAP_MAPPED;
1152 1153
		if (map->m_next_extent)
			*map->m_next_extent = pgofs + map->m_len;
1154 1155 1156 1157 1158

		/* for hardware encryption, but to avoid potential issue in future */
		if (flag == F2FS_GET_BLOCK_DIO)
			f2fs_wait_on_block_writeback_range(inode,
						map->m_pblk, map->m_len);
1159
		goto out;
1160
	}
1161

C
Chao Yu 已提交
1162
next_dnode:
C
Chao Yu 已提交
1163
	if (map->m_may_create)
1164
		__do_map_lock(sbi, flag, true);
1165 1166 1167

	/* When reading holes, we need its node page */
	set_new_dnode(&dn, inode, NULL, NULL, 0);
C
Chao Yu 已提交
1168
	err = f2fs_get_dnode_of_data(&dn, pgofs, mode);
1169
	if (err) {
C
Chao Yu 已提交
1170 1171
		if (flag == F2FS_GET_BLOCK_BMAP)
			map->m_pblk = 0;
1172
		if (err == -ENOENT) {
1173
			err = 0;
1174 1175
			if (map->m_next_pgofs)
				*map->m_next_pgofs =
C
Chao Yu 已提交
1176
					f2fs_get_next_page_offset(&dn, pgofs);
1177 1178
			if (map->m_next_extent)
				*map->m_next_extent =
C
Chao Yu 已提交
1179
					f2fs_get_next_page_offset(&dn, pgofs);
1180
		}
1181
		goto unlock_out;
1182
	}
C
Chao Yu 已提交
1183

1184
	start_pgofs = pgofs;
1185
	prealloc = 0;
1186
	last_ofs_in_node = ofs_in_node = dn.ofs_in_node;
1187
	end_offset = ADDRS_PER_PAGE(dn.node_page, inode);
C
Chao Yu 已提交
1188 1189

next_block:
1190
	blkaddr = datablock_addr(dn.inode, dn.node_page, dn.ofs_in_node);
C
Chao Yu 已提交
1191

1192
	if (__is_valid_data_blkaddr(blkaddr) &&
C
Chao Yu 已提交
1193
		!f2fs_is_valid_blkaddr(sbi, blkaddr, DATA_GENERIC_ENHANCE)) {
1194
		err = -EFSCORRUPTED;
1195 1196 1197
		goto sync_out;
	}

C
Chao Yu 已提交
1198
	if (__is_valid_data_blkaddr(blkaddr)) {
1199
		/* use out-place-update for driect IO under LFS mode */
C
Chao Yu 已提交
1200 1201
		if (test_opt(sbi, LFS) && flag == F2FS_GET_BLOCK_DIO &&
							map->m_may_create) {
1202
			err = __allocate_data_block(&dn, map->m_seg_type);
1203 1204
			if (!err) {
				blkaddr = dn.data_blkaddr;
1205
				set_inode_flag(inode, FI_APPEND_WRITE);
1206
			}
1207 1208
		}
	} else {
C
Chao Yu 已提交
1209
		if (create) {
1210 1211
			if (unlikely(f2fs_cp_error(sbi))) {
				err = -EIO;
C
Chao Yu 已提交
1212
				goto sync_out;
1213
			}
1214
			if (flag == F2FS_GET_BLOCK_PRE_AIO) {
1215 1216 1217 1218
				if (blkaddr == NULL_ADDR) {
					prealloc++;
					last_ofs_in_node = dn.ofs_in_node;
				}
1219
			} else {
1220 1221
				WARN_ON(flag != F2FS_GET_BLOCK_PRE_DIO &&
					flag != F2FS_GET_BLOCK_DIO);
1222 1223
				err = __allocate_data_block(&dn,
							map->m_seg_type);
1224
				if (!err)
1225
					set_inode_flag(inode, FI_APPEND_WRITE);
1226
			}
C
Chao Yu 已提交
1227
			if (err)
C
Chao Yu 已提交
1228
				goto sync_out;
1229
			map->m_flags |= F2FS_MAP_NEW;
C
Chao Yu 已提交
1230
			blkaddr = dn.data_blkaddr;
C
Chao Yu 已提交
1231
		} else {
C
Chao Yu 已提交
1232 1233 1234 1235
			if (flag == F2FS_GET_BLOCK_BMAP) {
				map->m_pblk = 0;
				goto sync_out;
			}
1236 1237
			if (flag == F2FS_GET_BLOCK_PRECACHE)
				goto sync_out;
1238 1239 1240 1241
			if (flag == F2FS_GET_BLOCK_FIEMAP &&
						blkaddr == NULL_ADDR) {
				if (map->m_next_pgofs)
					*map->m_next_pgofs = pgofs + 1;
C
Chao Yu 已提交
1242
				goto sync_out;
1243
			}
1244 1245 1246 1247
			if (flag != F2FS_GET_BLOCK_FIEMAP) {
				/* for defragment case */
				if (map->m_next_pgofs)
					*map->m_next_pgofs = pgofs + 1;
C
Chao Yu 已提交
1248
				goto sync_out;
1249
			}
C
Chao Yu 已提交
1250 1251
		}
	}
1252

1253 1254 1255
	if (flag == F2FS_GET_BLOCK_PRE_AIO)
		goto skip;

C
Chao Yu 已提交
1256 1257 1258 1259 1260 1261 1262 1263 1264 1265
	if (map->m_len == 0) {
		/* preallocated unwritten block should be mapped for fiemap. */
		if (blkaddr == NEW_ADDR)
			map->m_flags |= F2FS_MAP_UNWRITTEN;
		map->m_flags |= F2FS_MAP_MAPPED;

		map->m_pblk = blkaddr;
		map->m_len = 1;
	} else if ((map->m_pblk != NEW_ADDR &&
			blkaddr == (map->m_pblk + ofs)) ||
1266
			(map->m_pblk == NEW_ADDR && blkaddr == NEW_ADDR) ||
1267
			flag == F2FS_GET_BLOCK_PRE_DIO) {
C
Chao Yu 已提交
1268 1269 1270 1271 1272
		ofs++;
		map->m_len++;
	} else {
		goto sync_out;
	}
1273

1274
skip:
1275 1276 1277
	dn.ofs_in_node++;
	pgofs++;

1278 1279 1280
	/* preallocate blocks in batch for one dnode page */
	if (flag == F2FS_GET_BLOCK_PRE_AIO &&
			(pgofs == end || dn.ofs_in_node == end_offset)) {
1281

1282
		dn.ofs_in_node = ofs_in_node;
C
Chao Yu 已提交
1283
		err = f2fs_reserve_new_blocks(&dn, prealloc);
1284 1285
		if (err)
			goto sync_out;
1286

1287 1288 1289 1290
		map->m_len += dn.ofs_in_node - ofs_in_node;
		if (prealloc && dn.ofs_in_node != last_ofs_in_node + 1) {
			err = -ENOSPC;
			goto sync_out;
1291
		}
1292 1293 1294 1295 1296 1297 1298 1299
		dn.ofs_in_node = end_offset;
	}

	if (pgofs >= end)
		goto sync_out;
	else if (dn.ofs_in_node < end_offset)
		goto next_block;

1300 1301 1302 1303 1304 1305 1306 1307 1308 1309
	if (flag == F2FS_GET_BLOCK_PRECACHE) {
		if (map->m_flags & F2FS_MAP_MAPPED) {
			unsigned int ofs = start_pgofs - map->m_lblk;

			f2fs_update_extent_cache_range(&dn,
				start_pgofs, map->m_pblk + ofs,
				map->m_len - ofs);
		}
	}

1310 1311
	f2fs_put_dnode(&dn);

C
Chao Yu 已提交
1312
	if (map->m_may_create) {
1313
		__do_map_lock(sbi, flag, false);
1314
		f2fs_balance_fs(sbi, dn.node_changed);
1315
	}
1316
	goto next_dnode;
1317

1318
sync_out:
1319 1320 1321 1322 1323 1324

	/* for hardware encryption, but to avoid potential issue in future */
	if (flag == F2FS_GET_BLOCK_DIO && map->m_flags & F2FS_MAP_MAPPED)
		f2fs_wait_on_block_writeback_range(inode,
						map->m_pblk, map->m_len);

1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335
	if (flag == F2FS_GET_BLOCK_PRECACHE) {
		if (map->m_flags & F2FS_MAP_MAPPED) {
			unsigned int ofs = start_pgofs - map->m_lblk;

			f2fs_update_extent_cache_range(&dn,
				start_pgofs, map->m_pblk + ofs,
				map->m_len - ofs);
		}
		if (map->m_next_extent)
			*map->m_next_extent = pgofs + 1;
	}
1336
	f2fs_put_dnode(&dn);
1337
unlock_out:
C
Chao Yu 已提交
1338
	if (map->m_may_create) {
1339
		__do_map_lock(sbi, flag, false);
1340
		f2fs_balance_fs(sbi, dn.node_changed);
1341
	}
1342
out:
J
Jaegeuk Kim 已提交
1343
	trace_f2fs_map_blocks(inode, map, err);
1344
	return err;
1345 1346
}

H
Hyunchul Lee 已提交
1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359
bool f2fs_overwrite_io(struct inode *inode, loff_t pos, size_t len)
{
	struct f2fs_map_blocks map;
	block_t last_lblk;
	int err;

	if (pos + len > i_size_read(inode))
		return false;

	map.m_lblk = F2FS_BYTES_TO_BLK(pos);
	map.m_next_pgofs = NULL;
	map.m_next_extent = NULL;
	map.m_seg_type = NO_CHECK_TYPE;
1360
	map.m_may_create = false;
H
Hyunchul Lee 已提交
1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372
	last_lblk = F2FS_BLK_ALIGN(pos + len);

	while (map.m_lblk < last_lblk) {
		map.m_len = last_lblk - map.m_lblk;
		err = f2fs_map_blocks(inode, &map, 0, F2FS_GET_BLOCK_DEFAULT);
		if (err || map.m_len == 0)
			return false;
		map.m_lblk += map.m_len;
	}
	return true;
}

J
Jaegeuk Kim 已提交
1373
static int __get_data_block(struct inode *inode, sector_t iblock,
1374
			struct buffer_head *bh, int create, int flag,
C
Chao Yu 已提交
1375
			pgoff_t *next_pgofs, int seg_type, bool may_write)
J
Jaegeuk Kim 已提交
1376 1377
{
	struct f2fs_map_blocks map;
1378
	int err;
J
Jaegeuk Kim 已提交
1379 1380 1381

	map.m_lblk = iblock;
	map.m_len = bh->b_size >> inode->i_blkbits;
1382
	map.m_next_pgofs = next_pgofs;
1383
	map.m_next_extent = NULL;
1384
	map.m_seg_type = seg_type;
C
Chao Yu 已提交
1385
	map.m_may_create = may_write;
J
Jaegeuk Kim 已提交
1386

1387 1388
	err = f2fs_map_blocks(inode, &map, create, flag);
	if (!err) {
J
Jaegeuk Kim 已提交
1389 1390
		map_bh(bh, inode->i_sb, map.m_pblk);
		bh->b_state = (bh->b_state & ~F2FS_MAP_FLAGS) | map.m_flags;
1391
		bh->b_size = (u64)map.m_len << inode->i_blkbits;
J
Jaegeuk Kim 已提交
1392
	}
1393
	return err;
J
Jaegeuk Kim 已提交
1394 1395
}

1396
static int get_data_block(struct inode *inode, sector_t iblock,
1397 1398
			struct buffer_head *bh_result, int create, int flag,
			pgoff_t *next_pgofs)
C
Chao Yu 已提交
1399
{
1400
	return __get_data_block(inode, iblock, bh_result, create,
1401
							flag, next_pgofs,
C
Chao Yu 已提交
1402 1403 1404 1405 1406 1407 1408 1409 1410
							NO_CHECK_TYPE, create);
}

static int get_data_block_dio_write(struct inode *inode, sector_t iblock,
			struct buffer_head *bh_result, int create)
{
	return __get_data_block(inode, iblock, bh_result, create,
				F2FS_GET_BLOCK_DIO, NULL,
				f2fs_rw_hint_to_seg_type(inode->i_write_hint),
1411
				IS_SWAPFILE(inode) ? false : true);
C
Chao Yu 已提交
1412 1413 1414
}

static int get_data_block_dio(struct inode *inode, sector_t iblock,
1415 1416
			struct buffer_head *bh_result, int create)
{
C
Chao Yu 已提交
1417
	return __get_data_block(inode, iblock, bh_result, create,
C
Chao Yu 已提交
1418 1419 1420
				F2FS_GET_BLOCK_DIO, NULL,
				f2fs_rw_hint_to_seg_type(inode->i_write_hint),
				false);
1421 1422
}

C
Chao Yu 已提交
1423
static int get_data_block_bmap(struct inode *inode, sector_t iblock,
1424 1425
			struct buffer_head *bh_result, int create)
{
1426
	/* Block number less than F2FS MAX BLOCKS */
C
Chao Yu 已提交
1427
	if (unlikely(iblock >= F2FS_I_SB(inode)->max_file_blocks))
1428 1429
		return -EFBIG;

C
Chao Yu 已提交
1430
	return __get_data_block(inode, iblock, bh_result, create,
1431
						F2FS_GET_BLOCK_BMAP, NULL,
C
Chao Yu 已提交
1432
						NO_CHECK_TYPE, create);
1433 1434
}

1435 1436 1437 1438 1439 1440 1441 1442 1443 1444
static inline sector_t logical_to_blk(struct inode *inode, loff_t offset)
{
	return (offset >> inode->i_blkbits);
}

static inline loff_t blk_to_logical(struct inode *inode, sector_t blk)
{
	return (blk << inode->i_blkbits);
}

C
Chao Yu 已提交
1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463
static int f2fs_xattr_fiemap(struct inode *inode,
				struct fiemap_extent_info *fieinfo)
{
	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
	struct page *page;
	struct node_info ni;
	__u64 phys = 0, len;
	__u32 flags;
	nid_t xnid = F2FS_I(inode)->i_xattr_nid;
	int err = 0;

	if (f2fs_has_inline_xattr(inode)) {
		int offset;

		page = f2fs_grab_cache_page(NODE_MAPPING(sbi),
						inode->i_ino, false);
		if (!page)
			return -ENOMEM;

1464 1465 1466 1467 1468
		err = f2fs_get_node_info(sbi, inode->i_ino, &ni);
		if (err) {
			f2fs_put_page(page, 1);
			return err;
		}
C
Chao Yu 已提交
1469 1470 1471 1472

		phys = (__u64)blk_to_logical(inode, ni.blk_addr);
		offset = offsetof(struct f2fs_inode, i_addr) +
					sizeof(__le32) * (DEF_ADDRS_PER_INODE -
1473
					get_inline_xattr_addrs(inode));
C
Chao Yu 已提交
1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494

		phys += offset;
		len = inline_xattr_size(inode);

		f2fs_put_page(page, 1);

		flags = FIEMAP_EXTENT_DATA_INLINE | FIEMAP_EXTENT_NOT_ALIGNED;

		if (!xnid)
			flags |= FIEMAP_EXTENT_LAST;

		err = fiemap_fill_next_extent(fieinfo, 0, phys, len, flags);
		if (err || err == 1)
			return err;
	}

	if (xnid) {
		page = f2fs_grab_cache_page(NODE_MAPPING(sbi), xnid, false);
		if (!page)
			return -ENOMEM;

1495 1496 1497 1498 1499
		err = f2fs_get_node_info(sbi, xnid, &ni);
		if (err) {
			f2fs_put_page(page, 1);
			return err;
		}
C
Chao Yu 已提交
1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514

		phys = (__u64)blk_to_logical(inode, ni.blk_addr);
		len = inode->i_sb->s_blocksize;

		f2fs_put_page(page, 1);

		flags = FIEMAP_EXTENT_LAST;
	}

	if (phys)
		err = fiemap_fill_next_extent(fieinfo, 0, phys, len, flags);

	return (err < 0 ? err : 0);
}

J
Jaegeuk Kim 已提交
1515 1516 1517
int f2fs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
		u64 start, u64 len)
{
1518 1519
	struct buffer_head map_bh;
	sector_t start_blk, last_blk;
1520
	pgoff_t next_pgofs;
1521 1522 1523 1524
	u64 logical = 0, phys = 0, size = 0;
	u32 flags = 0;
	int ret = 0;

1525 1526 1527 1528 1529 1530
	if (fieinfo->fi_flags & FIEMAP_FLAG_CACHE) {
		ret = f2fs_precache_extents(inode);
		if (ret)
			return ret;
	}

C
Chao Yu 已提交
1531
	ret = fiemap_check_flags(fieinfo, FIEMAP_FLAG_SYNC | FIEMAP_FLAG_XATTR);
1532 1533 1534
	if (ret)
		return ret;

1535 1536
	inode_lock(inode);

C
Chao Yu 已提交
1537 1538 1539 1540 1541
	if (fieinfo->fi_flags & FIEMAP_FLAG_XATTR) {
		ret = f2fs_xattr_fiemap(inode, fieinfo);
		goto out;
	}

J
Jaegeuk Kim 已提交
1542 1543 1544
	if (f2fs_has_inline_data(inode)) {
		ret = f2fs_inline_data_fiemap(inode, fieinfo, start, len);
		if (ret != -EAGAIN)
1545
			goto out;
J
Jaegeuk Kim 已提交
1546 1547
	}

1548 1549 1550 1551 1552
	if (logical_to_blk(inode, len) == 0)
		len = blk_to_logical(inode, 1);

	start_blk = logical_to_blk(inode, start);
	last_blk = logical_to_blk(inode, start + len - 1);
1553

1554 1555 1556 1557
next:
	memset(&map_bh, 0, sizeof(struct buffer_head));
	map_bh.b_size = len;

C
Chao Yu 已提交
1558
	ret = get_data_block(inode, start_blk, &map_bh, 0,
1559
					F2FS_GET_BLOCK_FIEMAP, &next_pgofs);
1560 1561 1562 1563 1564
	if (ret)
		goto out;

	/* HOLE */
	if (!buffer_mapped(&map_bh)) {
1565
		start_blk = next_pgofs;
1566 1567 1568

		if (blk_to_logical(inode, start_blk) < blk_to_logical(inode,
					F2FS_I_SB(inode)->max_file_blocks))
1569
			goto prep_next;
1570

1571 1572
		flags |= FIEMAP_EXTENT_LAST;
	}
1573

1574
	if (size) {
1575
		if (IS_ENCRYPTED(inode))
1576 1577
			flags |= FIEMAP_EXTENT_DATA_ENCRYPTED;

1578 1579
		ret = fiemap_fill_next_extent(fieinfo, logical,
				phys, size, flags);
1580
	}
1581

1582 1583
	if (start_blk > last_blk || ret)
		goto out;
1584

1585 1586 1587 1588 1589 1590
	logical = blk_to_logical(inode, start_blk);
	phys = blk_to_logical(inode, map_bh.b_blocknr);
	size = map_bh.b_size;
	flags = 0;
	if (buffer_unwritten(&map_bh))
		flags = FIEMAP_EXTENT_UNWRITTEN;
1591

1592
	start_blk += logical_to_blk(inode, size);
1593

1594
prep_next:
1595 1596 1597 1598 1599 1600 1601 1602 1603
	cond_resched();
	if (fatal_signal_pending(current))
		ret = -EINTR;
	else
		goto next;
out:
	if (ret == 1)
		ret = 0;

A
Al Viro 已提交
1604
	inode_unlock(inode);
1605
	return ret;
J
Jaegeuk Kim 已提交
1606 1607
}

1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623
static int f2fs_read_single_page(struct inode *inode, struct page *page,
					unsigned nr_pages,
					struct f2fs_map_blocks *map,
					struct bio **bio_ret,
					sector_t *last_block_in_bio,
					bool is_readahead)
{
	struct bio *bio = *bio_ret;
	const unsigned blkbits = inode->i_blkbits;
	const unsigned blocksize = 1 << blkbits;
	sector_t block_in_file;
	sector_t last_block;
	sector_t last_block_in_file;
	sector_t block_nr;
	int ret = 0;

J
Jaegeuk Kim 已提交
1624
	block_in_file = (sector_t)page_index(page);
1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656
	last_block = block_in_file + nr_pages;
	last_block_in_file = (i_size_read(inode) + blocksize - 1) >>
							blkbits;
	if (last_block > last_block_in_file)
		last_block = last_block_in_file;

	/* just zeroing out page which is beyond EOF */
	if (block_in_file >= last_block)
		goto zero_out;
	/*
	 * Map blocks using the previous result first.
	 */
	if ((map->m_flags & F2FS_MAP_MAPPED) &&
			block_in_file > map->m_lblk &&
			block_in_file < (map->m_lblk + map->m_len))
		goto got_it;

	/*
	 * Then do more f2fs_map_blocks() calls until we are
	 * done with this page.
	 */
	map->m_lblk = block_in_file;
	map->m_len = last_block - block_in_file;

	ret = f2fs_map_blocks(inode, map, 0, F2FS_GET_BLOCK_DEFAULT);
	if (ret)
		goto out;
got_it:
	if ((map->m_flags & F2FS_MAP_MAPPED)) {
		block_nr = map->m_pblk + block_in_file - map->m_lblk;
		SetPageMappedToDisk(page);

J
Jaegeuk Kim 已提交
1657 1658
		if (!PageUptodate(page) && (!PageSwapCache(page) &&
					!cleancache_get_page(page))) {
1659 1660 1661 1662 1663
			SetPageUptodate(page);
			goto confused;
		}

		if (!f2fs_is_valid_blkaddr(F2FS_I_SB(inode), block_nr,
C
Chao Yu 已提交
1664
						DATA_GENERIC_ENHANCE_READ)) {
1665
			ret = -EFSCORRUPTED;
1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680
			goto out;
		}
	} else {
zero_out:
		zero_user_segment(page, 0, PAGE_SIZE);
		if (!PageUptodate(page))
			SetPageUptodate(page);
		unlock_page(page);
		goto out;
	}

	/*
	 * This page will go to BIO.  Do we need to send this
	 * BIO off first?
	 */
1681 1682
	if (bio && !page_is_mergeable(F2FS_I_SB(inode), bio,
				*last_block_in_bio, block_nr)) {
1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720
submit_and_realloc:
		__submit_bio(F2FS_I_SB(inode), bio, DATA);
		bio = NULL;
	}
	if (bio == NULL) {
		bio = f2fs_grab_read_bio(inode, block_nr, nr_pages,
				is_readahead ? REQ_RAHEAD : 0);
		if (IS_ERR(bio)) {
			ret = PTR_ERR(bio);
			bio = NULL;
			goto out;
		}
	}

	/*
	 * If the page is under writeback, we need to wait for
	 * its completion to see the correct decrypted data.
	 */
	f2fs_wait_on_block_writeback(inode, block_nr);

	if (bio_add_page(bio, page, blocksize, 0) < blocksize)
		goto submit_and_realloc;

	inc_page_count(F2FS_I_SB(inode), F2FS_RD_DATA);
	ClearPageError(page);
	*last_block_in_bio = block_nr;
	goto out;
confused:
	if (bio) {
		__submit_bio(F2FS_I_SB(inode), bio, DATA);
		bio = NULL;
	}
	unlock_page(page);
out:
	*bio_ret = bio;
	return ret;
}

J
Jaegeuk Kim 已提交
1721 1722 1723
/*
 * This function was originally taken from fs/mpage.c, and customized for f2fs.
 * Major change was from block_size == page_size in f2fs by default.
1724 1725 1726 1727 1728
 *
 * Note that the aops->readpages() function is ONLY used for read-ahead. If
 * this function ever deviates from doing just read-ahead, it should either
 * use ->readpage() or do the necessary surgery to decouple ->readpages()
 * from read-ahead.
J
Jaegeuk Kim 已提交
1729 1730 1731
 */
static int f2fs_mpage_readpages(struct address_space *mapping,
			struct list_head *pages, struct page *page,
1732
			unsigned nr_pages, bool is_readahead)
J
Jaegeuk Kim 已提交
1733 1734 1735 1736 1737
{
	struct bio *bio = NULL;
	sector_t last_block_in_bio = 0;
	struct inode *inode = mapping->host;
	struct f2fs_map_blocks map;
1738
	int ret = 0;
J
Jaegeuk Kim 已提交
1739 1740 1741 1742 1743

	map.m_pblk = 0;
	map.m_lblk = 0;
	map.m_len = 0;
	map.m_flags = 0;
1744
	map.m_next_pgofs = NULL;
1745
	map.m_next_extent = NULL;
1746
	map.m_seg_type = NO_CHECK_TYPE;
C
Chao Yu 已提交
1747
	map.m_may_create = false;
J
Jaegeuk Kim 已提交
1748

L
LiFan 已提交
1749
	for (; nr_pages; nr_pages--) {
J
Jaegeuk Kim 已提交
1750
		if (pages) {
1751
			page = list_last_entry(pages, struct page, lru);
1752 1753

			prefetchw(&page->flags);
J
Jaegeuk Kim 已提交
1754 1755
			list_del(&page->lru);
			if (add_to_page_cache_lru(page, mapping,
J
Jaegeuk Kim 已提交
1756
						  page_index(page),
1757
						  readahead_gfp_mask(mapping)))
J
Jaegeuk Kim 已提交
1758 1759 1760
				goto next_page;
		}

1761 1762 1763 1764
		ret = f2fs_read_single_page(inode, page, nr_pages, &map, &bio,
					&last_block_in_bio, is_readahead);
		if (ret) {
			SetPageError(page);
1765
			zero_user_segment(page, 0, PAGE_SIZE);
J
Jaegeuk Kim 已提交
1766 1767 1768 1769
			unlock_page(page);
		}
next_page:
		if (pages)
1770
			put_page(page);
J
Jaegeuk Kim 已提交
1771 1772 1773
	}
	BUG_ON(pages && !list_empty(pages));
	if (bio)
1774
		__submit_bio(F2FS_I_SB(inode), bio, DATA);
1775
	return pages ? 0 : ret;
J
Jaegeuk Kim 已提交
1776 1777
}

1778 1779
static int f2fs_read_data_page(struct file *file, struct page *page)
{
J
Jaegeuk Kim 已提交
1780
	struct inode *inode = page_file_mapping(page)->host;
1781
	int ret = -EAGAIN;
H
Huajun Li 已提交
1782

1783 1784
	trace_f2fs_readpage(page, DATA);

A
arter97 已提交
1785
	/* If the file has inline data, try to read it directly */
H
Huajun Li 已提交
1786 1787
	if (f2fs_has_inline_data(inode))
		ret = f2fs_read_inline_data(inode, page);
1788
	if (ret == -EAGAIN)
J
Jaegeuk Kim 已提交
1789 1790
		ret = f2fs_mpage_readpages(page_file_mapping(page),
						NULL, page, 1, false);
H
Huajun Li 已提交
1791
	return ret;
1792 1793 1794 1795 1796 1797
}

static int f2fs_read_data_pages(struct file *file,
			struct address_space *mapping,
			struct list_head *pages, unsigned nr_pages)
{
1798
	struct inode *inode = mapping->host;
1799
	struct page *page = list_last_entry(pages, struct page, lru);
1800 1801

	trace_f2fs_readpages(inode, page, nr_pages);
H
Huajun Li 已提交
1802 1803 1804 1805 1806

	/* If the file has inline data, skip readpages */
	if (f2fs_has_inline_data(inode))
		return 0;

1807
	return f2fs_mpage_readpages(mapping, pages, NULL, nr_pages, true);
1808 1809
}

1810 1811 1812
static int encrypt_one_page(struct f2fs_io_info *fio)
{
	struct inode *inode = fio->page->mapping->host;
1813
	struct page *mpage;
1814 1815
	gfp_t gfp_flags = GFP_NOFS;

1816
	if (!f2fs_encrypted_file(inode))
1817 1818
		return 0;

1819
	/* wait for GCed page writeback via META_MAPPING */
1820
	f2fs_wait_on_block_writeback(inode, fio->old_blkaddr);
1821 1822

retry_encrypt:
1823 1824 1825
	fio->encrypted_page = fscrypt_encrypt_pagecache_blocks(fio->page,
							       PAGE_SIZE, 0,
							       gfp_flags);
1826 1827 1828 1829 1830 1831 1832 1833 1834 1835
	if (IS_ERR(fio->encrypted_page)) {
		/* flush pending IOs and wait for a while in the ENOMEM case */
		if (PTR_ERR(fio->encrypted_page) == -ENOMEM) {
			f2fs_flush_merged_writes(fio->sbi);
			congestion_wait(BLK_RW_ASYNC, HZ/50);
			gfp_flags |= __GFP_NOFAIL;
			goto retry_encrypt;
		}
		return PTR_ERR(fio->encrypted_page);
	}
1836

1837 1838 1839 1840 1841 1842
	mpage = find_lock_page(META_MAPPING(fio->sbi), fio->old_blkaddr);
	if (mpage) {
		if (PageUptodate(mpage))
			memcpy(page_address(mpage),
				page_address(fio->encrypted_page), PAGE_SIZE);
		f2fs_put_page(mpage, 1);
1843
	}
1844
	return 0;
1845 1846
}

C
Chao Yu 已提交
1847 1848
static inline bool check_inplace_update_policy(struct inode *inode,
				struct f2fs_io_info *fio)
1849
{
C
Chao Yu 已提交
1850 1851
	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
	unsigned int policy = SM_I(sbi)->ipu_policy;
1852

C
Chao Yu 已提交
1853 1854
	if (policy & (0x1 << F2FS_IPU_FORCE))
		return true;
C
Chao Yu 已提交
1855
	if (policy & (0x1 << F2FS_IPU_SSR) && f2fs_need_SSR(sbi))
C
Chao Yu 已提交
1856 1857 1858 1859
		return true;
	if (policy & (0x1 << F2FS_IPU_UTIL) &&
			utilization(sbi) > SM_I(sbi)->min_ipu_util)
		return true;
C
Chao Yu 已提交
1860
	if (policy & (0x1 << F2FS_IPU_SSR_UTIL) && f2fs_need_SSR(sbi) &&
C
Chao Yu 已提交
1861 1862 1863 1864 1865 1866 1867 1868 1869
			utilization(sbi) > SM_I(sbi)->min_ipu_util)
		return true;

	/*
	 * IPU for rewrite async pages
	 */
	if (policy & (0x1 << F2FS_IPU_ASYNC) &&
			fio && fio->op == REQ_OP_WRITE &&
			!(fio->op_flags & REQ_SYNC) &&
1870
			!IS_ENCRYPTED(inode))
C
Chao Yu 已提交
1871 1872 1873 1874 1875 1876 1877
		return true;

	/* this is only set during fdatasync */
	if (policy & (0x1 << F2FS_IPU_FSYNC) &&
			is_inode_flag_set(inode, FI_NEED_IPU))
		return true;

D
Daniel Rosenberg 已提交
1878 1879 1880 1881
	if (unlikely(fio && is_sbi_flag_set(sbi, SBI_CP_DISABLED) &&
			!f2fs_is_checkpointed_data(sbi, fio->old_blkaddr)))
		return true;

C
Chao Yu 已提交
1882 1883 1884
	return false;
}

C
Chao Yu 已提交
1885
bool f2fs_should_update_inplace(struct inode *inode, struct f2fs_io_info *fio)
C
Chao Yu 已提交
1886
{
1887 1888
	if (f2fs_is_pinned_file(inode))
		return true;
C
Chao Yu 已提交
1889 1890 1891 1892 1893 1894 1895 1896

	/* if this is cold file, we should overwrite to avoid fragmentation */
	if (file_is_cold(inode))
		return true;

	return check_inplace_update_policy(inode, fio);
}

C
Chao Yu 已提交
1897
bool f2fs_should_update_outplace(struct inode *inode, struct f2fs_io_info *fio)
C
Chao Yu 已提交
1898 1899 1900 1901 1902 1903 1904
{
	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);

	if (test_opt(sbi, LFS))
		return true;
	if (S_ISDIR(inode->i_mode))
		return true;
1905 1906
	if (IS_NOQUOTA(inode))
		return true;
C
Chao Yu 已提交
1907 1908 1909 1910 1911 1912 1913
	if (f2fs_is_atomic_file(inode))
		return true;
	if (fio) {
		if (is_cold_data(fio->page))
			return true;
		if (IS_ATOMIC_WRITTEN_PAGE(fio->page))
			return true;
D
Daniel Rosenberg 已提交
1914 1915 1916
		if (unlikely(is_sbi_flag_set(sbi, SBI_CP_DISABLED) &&
			f2fs_is_checkpointed_data(sbi, fio->old_blkaddr)))
			return true;
C
Chao Yu 已提交
1917 1918 1919 1920
	}
	return false;
}

1921 1922 1923 1924
static inline bool need_inplace_update(struct f2fs_io_info *fio)
{
	struct inode *inode = fio->page->mapping->host;

C
Chao Yu 已提交
1925
	if (f2fs_should_update_outplace(inode, fio))
1926 1927
		return false;

C
Chao Yu 已提交
1928
	return f2fs_should_update_inplace(inode, fio);
1929 1930
}

C
Chao Yu 已提交
1931
int f2fs_do_write_data_page(struct f2fs_io_info *fio)
1932
{
1933
	struct page *page = fio->page;
1934 1935
	struct inode *inode = page->mapping->host;
	struct dnode_of_data dn;
1936
	struct extent_info ei = {0,0,0};
1937
	struct node_info ni;
1938
	bool ipu_force = false;
1939 1940 1941
	int err = 0;

	set_new_dnode(&dn, inode, NULL, NULL, 0);
1942 1943 1944
	if (need_inplace_update(fio) &&
			f2fs_lookup_extent_cache(inode, page->index, &ei)) {
		fio->old_blkaddr = ei.blk + page->index - ei.fofs;
1945

1946
		if (!f2fs_is_valid_blkaddr(fio->sbi, fio->old_blkaddr,
C
Chao Yu 已提交
1947
						DATA_GENERIC_ENHANCE))
1948
			return -EFSCORRUPTED;
1949 1950 1951 1952

		ipu_force = true;
		fio->need_lock = LOCK_DONE;
		goto got_it;
1953
	}
1954

1955 1956 1957
	/* Deadlock due to between page->lock and f2fs_lock_op */
	if (fio->need_lock == LOCK_REQ && !f2fs_trylock_op(fio->sbi))
		return -EAGAIN;
1958

C
Chao Yu 已提交
1959
	err = f2fs_get_dnode_of_data(&dn, page->index, LOOKUP_NODE);
1960
	if (err)
1961
		goto out;
1962

1963
	fio->old_blkaddr = dn.data_blkaddr;
1964 1965

	/* This page is already truncated */
1966
	if (fio->old_blkaddr == NULL_ADDR) {
1967
		ClearPageUptodate(page);
C
Chao Yu 已提交
1968
		clear_cold_data(page);
1969
		goto out_writepage;
1970
	}
1971
got_it:
1972 1973
	if (__is_valid_data_blkaddr(fio->old_blkaddr) &&
		!f2fs_is_valid_blkaddr(fio->sbi, fio->old_blkaddr,
C
Chao Yu 已提交
1974
						DATA_GENERIC_ENHANCE)) {
1975
		err = -EFSCORRUPTED;
1976 1977
		goto out_writepage;
	}
1978 1979 1980 1981
	/*
	 * If current allocation needs SSR,
	 * it had better in-place writes for updated data.
	 */
C
Chao Yu 已提交
1982 1983
	if (ipu_force ||
		(__is_valid_data_blkaddr(fio->old_blkaddr) &&
C
Chao Yu 已提交
1984
					need_inplace_update(fio))) {
1985 1986 1987 1988 1989
		err = encrypt_one_page(fio);
		if (err)
			goto out_writepage;

		set_page_writeback(page);
J
Jaegeuk Kim 已提交
1990
		ClearPageError(page);
1991
		f2fs_put_dnode(&dn);
1992
		if (fio->need_lock == LOCK_REQ)
1993
			f2fs_unlock_op(fio->sbi);
C
Chao Yu 已提交
1994
		err = f2fs_inplace_write_data(fio);
C
Chao Yu 已提交
1995 1996
		if (err) {
			if (f2fs_encrypted_file(inode))
1997
				fscrypt_finalize_bounce_page(&fio->encrypted_page);
C
Chao Yu 已提交
1998 1999
			if (PageWriteback(page))
				end_page_writeback(page);
2000 2001
		} else {
			set_inode_flag(inode, FI_UPDATE_WRITE);
C
Chao Yu 已提交
2002
		}
2003
		trace_f2fs_do_write_data_page(fio->page, IPU);
2004
		return err;
2005
	}
2006

2007 2008 2009 2010 2011 2012 2013 2014
	if (fio->need_lock == LOCK_RETRY) {
		if (!f2fs_trylock_op(fio->sbi)) {
			err = -EAGAIN;
			goto out_writepage;
		}
		fio->need_lock = LOCK_REQ;
	}

2015 2016 2017 2018 2019 2020
	err = f2fs_get_node_info(fio->sbi, dn.nid, &ni);
	if (err)
		goto out_writepage;

	fio->version = ni.version;

2021 2022 2023 2024 2025
	err = encrypt_one_page(fio);
	if (err)
		goto out_writepage;

	set_page_writeback(page);
J
Jaegeuk Kim 已提交
2026
	ClearPageError(page);
2027

2028
	/* LFS mode write path */
C
Chao Yu 已提交
2029
	f2fs_outplace_write_data(&dn, fio);
2030 2031 2032 2033
	trace_f2fs_do_write_data_page(page, OPU);
	set_inode_flag(inode, FI_APPEND_WRITE);
	if (page->index == 0)
		set_inode_flag(inode, FI_FIRST_BLOCK_WRITTEN);
2034 2035
out_writepage:
	f2fs_put_dnode(&dn);
2036
out:
2037
	if (fio->need_lock == LOCK_REQ)
2038
		f2fs_unlock_op(fio->sbi);
2039 2040 2041
	return err;
}

2042
static int __write_data_page(struct page *page, bool *submitted,
C
Chao Yu 已提交
2043 2044
				struct bio **bio,
				sector_t *last_block,
C
Chao Yu 已提交
2045 2046
				struct writeback_control *wbc,
				enum iostat_type io_type)
2047 2048
{
	struct inode *inode = page->mapping->host;
2049
	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
2050 2051
	loff_t i_size = i_size_read(inode);
	const pgoff_t end_index = ((unsigned long long) i_size)
2052
							>> PAGE_SHIFT;
2053
	loff_t psize = (page->index + 1) << PAGE_SHIFT;
H
Huajun Li 已提交
2054
	unsigned offset = 0;
2055
	bool need_balance_fs = false;
2056
	int err = 0;
J
Jaegeuk Kim 已提交
2057
	struct f2fs_io_info fio = {
2058
		.sbi = sbi,
C
Chao Yu 已提交
2059
		.ino = inode->i_ino,
J
Jaegeuk Kim 已提交
2060
		.type = DATA,
M
Mike Christie 已提交
2061
		.op = REQ_OP_WRITE,
J
Jens Axboe 已提交
2062
		.op_flags = wbc_to_write_flags(wbc),
2063
		.old_blkaddr = NULL_ADDR,
2064
		.page = page,
2065
		.encrypted_page = NULL,
2066
		.submitted = false,
2067
		.need_lock = LOCK_RETRY,
C
Chao Yu 已提交
2068
		.io_type = io_type,
2069
		.io_wbc = wbc,
C
Chao Yu 已提交
2070 2071
		.bio = bio,
		.last_block = last_block,
J
Jaegeuk Kim 已提交
2072
	};
2073

2074 2075
	trace_f2fs_writepage(page, DATA);

2076 2077 2078
	/* we should bypass data pages to proceed the kworkder jobs */
	if (unlikely(f2fs_cp_error(sbi))) {
		mapping_set_error(page->mapping, -EIO);
2079 2080 2081 2082 2083 2084
		/*
		 * don't drop any dirty dentry pages for keeping lastest
		 * directory structure.
		 */
		if (S_ISDIR(inode->i_mode))
			goto redirty_out;
2085 2086 2087
		goto out;
	}

2088 2089 2090
	if (unlikely(is_sbi_flag_set(sbi, SBI_POR_DOING)))
		goto redirty_out;

2091
	if (page->index < end_index)
2092
		goto write;
2093 2094 2095 2096 2097

	/*
	 * If the offset is out-of-range of file size,
	 * this page does not have to be written to disk.
	 */
2098
	offset = i_size & (PAGE_SIZE - 1);
2099
	if ((page->index >= end_index + 1) || !offset)
2100
		goto out;
2101

2102
	zero_user_segment(page, offset, PAGE_SIZE);
2103
write:
2104 2105
	if (f2fs_is_drop_cache(inode))
		goto out;
2106 2107 2108
	/* we should not write 0'th page having journal header */
	if (f2fs_is_volatile_file(inode) && (!page->index ||
			(!wbc->for_reclaim &&
C
Chao Yu 已提交
2109
			f2fs_available_free_memory(sbi, BASE_CHECK))))
2110
		goto redirty_out;
2111

2112
	/* Dentry blocks are controlled by checkpoint */
2113
	if (S_ISDIR(inode->i_mode)) {
2114
		fio.need_lock = LOCK_DONE;
C
Chao Yu 已提交
2115
		err = f2fs_do_write_data_page(&fio);
2116 2117
		goto done;
	}
H
Huajun Li 已提交
2118

2119
	if (!wbc->for_reclaim)
2120
		need_balance_fs = true;
2121
	else if (has_not_enough_free_secs(sbi, 0, 0))
2122
		goto redirty_out;
2123 2124
	else
		set_inode_flag(inode, FI_HOT_DATA);
2125

2126
	err = -EAGAIN;
2127
	if (f2fs_has_inline_data(inode)) {
2128
		err = f2fs_write_inline_data(inode, page);
2129 2130 2131
		if (!err)
			goto out;
	}
2132

2133
	if (err == -EAGAIN) {
C
Chao Yu 已提交
2134
		err = f2fs_do_write_data_page(&fio);
2135 2136
		if (err == -EAGAIN) {
			fio.need_lock = LOCK_REQ;
C
Chao Yu 已提交
2137
			err = f2fs_do_write_data_page(&fio);
2138 2139
		}
	}
2140

2141 2142 2143 2144 2145 2146 2147 2148
	if (err) {
		file_set_keep_isize(inode);
	} else {
		down_write(&F2FS_I(inode)->i_sem);
		if (F2FS_I(inode)->last_disk_size < psize)
			F2FS_I(inode)->last_disk_size = psize;
		up_write(&F2FS_I(inode)->i_sem);
	}
2149

2150 2151 2152
done:
	if (err && err != -ENOENT)
		goto redirty_out;
2153

2154
out:
2155
	inode_dec_dirty_pages(inode);
C
Chao Yu 已提交
2156
	if (err) {
2157
		ClearPageUptodate(page);
C
Chao Yu 已提交
2158 2159
		clear_cold_data(page);
	}
2160 2161

	if (wbc->for_reclaim) {
2162
		f2fs_submit_merged_write_cond(sbi, NULL, page, 0, DATA);
2163
		clear_inode_flag(inode, FI_HOT_DATA);
C
Chao Yu 已提交
2164
		f2fs_remove_dirty_inode(inode);
2165
		submitted = NULL;
2166 2167
	}

2168
	unlock_page(page);
2169
	if (!S_ISDIR(inode->i_mode) && !IS_NOQUOTA(inode) &&
C
Chao Yu 已提交
2170 2171
					!F2FS_I(inode)->cp_task) {
		f2fs_submit_ipu_bio(sbi, bio, page);
J
Jaegeuk Kim 已提交
2172
		f2fs_balance_fs(sbi, need_balance_fs);
C
Chao Yu 已提交
2173
	}
2174

2175
	if (unlikely(f2fs_cp_error(sbi))) {
C
Chao Yu 已提交
2176
		f2fs_submit_ipu_bio(sbi, bio, page);
2177
		f2fs_submit_merged_write(sbi, DATA);
2178 2179 2180 2181 2182
		submitted = NULL;
	}

	if (submitted)
		*submitted = fio.submitted;
2183

2184 2185 2186
	return 0;

redirty_out:
2187
	redirty_page_for_writepage(wbc, page);
2188 2189 2190 2191 2192 2193 2194
	/*
	 * pageout() in MM traslates EAGAIN, so calls handle_write_error()
	 * -> mapping_set_error() -> set_bit(AS_EIO, ...).
	 * file_write_and_wait_range() will see EIO error, which is critical
	 * to return value of fsync() followed by atomic_write failure to user.
	 */
	if (!err || wbc->for_reclaim)
2195
		return AOP_WRITEPAGE_ACTIVATE;
J
Jaegeuk Kim 已提交
2196 2197
	unlock_page(page);
	return err;
2198 2199
}

2200 2201 2202
static int f2fs_write_data_page(struct page *page,
					struct writeback_control *wbc)
{
C
Chao Yu 已提交
2203
	return __write_data_page(page, NULL, NULL, NULL, wbc, FS_DATA_IO);
2204 2205
}

C
Chao Yu 已提交
2206 2207 2208 2209 2210 2211
/*
 * This function was copied from write_cche_pages from mm/page-writeback.c.
 * The major change is making write step of cold data page separately from
 * warm/hot data page.
 */
static int f2fs_write_cache_pages(struct address_space *mapping,
C
Chao Yu 已提交
2212 2213
					struct writeback_control *wbc,
					enum iostat_type io_type)
C
Chao Yu 已提交
2214 2215 2216 2217
{
	int ret = 0;
	int done = 0;
	struct pagevec pvec;
2218
	struct f2fs_sb_info *sbi = F2FS_M_SB(mapping);
C
Chao Yu 已提交
2219 2220
	struct bio *bio = NULL;
	sector_t last_block;
C
Chao Yu 已提交
2221 2222 2223 2224 2225 2226 2227
	int nr_pages;
	pgoff_t uninitialized_var(writeback_index);
	pgoff_t index;
	pgoff_t end;		/* Inclusive */
	pgoff_t done_index;
	int cycled;
	int range_whole = 0;
M
Matthew Wilcox 已提交
2228
	xa_mark_t tag;
2229
	int nwritten = 0;
C
Chao Yu 已提交
2230

2231
	pagevec_init(&pvec);
2232

2233 2234 2235 2236 2237 2238
	if (get_dirty_pages(mapping->host) <=
				SM_I(F2FS_M_SB(mapping))->min_hot_blocks)
		set_inode_flag(mapping->host, FI_HOT_DATA);
	else
		clear_inode_flag(mapping->host, FI_HOT_DATA);

C
Chao Yu 已提交
2239 2240 2241 2242 2243 2244 2245 2246 2247
	if (wbc->range_cyclic) {
		writeback_index = mapping->writeback_index; /* prev offset */
		index = writeback_index;
		if (index == 0)
			cycled = 1;
		else
			cycled = 0;
		end = -1;
	} else {
2248 2249
		index = wbc->range_start >> PAGE_SHIFT;
		end = wbc->range_end >> PAGE_SHIFT;
C
Chao Yu 已提交
2250 2251 2252 2253 2254 2255 2256 2257 2258 2259 2260 2261 2262 2263 2264
		if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX)
			range_whole = 1;
		cycled = 1; /* ignore range_cyclic tests */
	}
	if (wbc->sync_mode == WB_SYNC_ALL || wbc->tagged_writepages)
		tag = PAGECACHE_TAG_TOWRITE;
	else
		tag = PAGECACHE_TAG_DIRTY;
retry:
	if (wbc->sync_mode == WB_SYNC_ALL || wbc->tagged_writepages)
		tag_pages_for_writeback(mapping, index, end);
	done_index = index;
	while (!done && (index <= end)) {
		int i;

J
Jan Kara 已提交
2265
		nr_pages = pagevec_lookup_range_tag(&pvec, mapping, &index, end,
2266
				tag);
C
Chao Yu 已提交
2267 2268 2269 2270 2271
		if (nr_pages == 0)
			break;

		for (i = 0; i < nr_pages; i++) {
			struct page *page = pvec.pages[i];
2272
			bool submitted = false;
C
Chao Yu 已提交
2273

2274
			/* give a priority to WB_SYNC threads */
2275
			if (atomic_read(&sbi->wb_sync_req[DATA]) &&
2276 2277 2278 2279 2280
					wbc->sync_mode == WB_SYNC_NONE) {
				done = 1;
				break;
			}

C
Chao Yu 已提交
2281
			done_index = page->index;
2282
retry_write:
C
Chao Yu 已提交
2283 2284 2285 2286 2287 2288 2289 2290 2291 2292 2293 2294 2295 2296
			lock_page(page);

			if (unlikely(page->mapping != mapping)) {
continue_unlock:
				unlock_page(page);
				continue;
			}

			if (!PageDirty(page)) {
				/* someone wrote it for us */
				goto continue_unlock;
			}

			if (PageWriteback(page)) {
C
Chao Yu 已提交
2297
				if (wbc->sync_mode != WB_SYNC_NONE) {
2298
					f2fs_wait_on_page_writeback(page,
2299
							DATA, true, true);
C
Chao Yu 已提交
2300 2301
					f2fs_submit_ipu_bio(sbi, &bio, page);
				} else {
C
Chao Yu 已提交
2302
					goto continue_unlock;
C
Chao Yu 已提交
2303
				}
C
Chao Yu 已提交
2304 2305 2306 2307 2308
			}

			if (!clear_page_dirty_for_io(page))
				goto continue_unlock;

C
Chao Yu 已提交
2309 2310
			ret = __write_data_page(page, &submitted, &bio,
					&last_block, wbc, io_type);
C
Chao Yu 已提交
2311
			if (unlikely(ret)) {
2312 2313 2314 2315 2316 2317 2318 2319
				/*
				 * keep nr_to_write, since vfs uses this to
				 * get # of written pages.
				 */
				if (ret == AOP_WRITEPAGE_ACTIVATE) {
					unlock_page(page);
					ret = 0;
					continue;
2320 2321 2322 2323 2324 2325 2326 2327 2328
				} else if (ret == -EAGAIN) {
					ret = 0;
					if (wbc->sync_mode == WB_SYNC_ALL) {
						cond_resched();
						congestion_wait(BLK_RW_ASYNC,
									HZ/50);
						goto retry_write;
					}
					continue;
2329
				}
J
Jaegeuk Kim 已提交
2330 2331 2332
				done_index = page->index + 1;
				done = 1;
				break;
2333
			} else if (submitted) {
2334
				nwritten++;
C
Chao Yu 已提交
2335 2336
			}

2337
			if (--wbc->nr_to_write <= 0 &&
2338
					wbc->sync_mode == WB_SYNC_NONE) {
C
Chao Yu 已提交
2339 2340 2341 2342 2343 2344 2345 2346 2347 2348 2349 2350 2351 2352 2353 2354 2355
				done = 1;
				break;
			}
		}
		pagevec_release(&pvec);
		cond_resched();
	}

	if (!cycled && !done) {
		cycled = 1;
		index = 0;
		end = writeback_index - 1;
		goto retry;
	}
	if (wbc->range_cyclic || (range_whole && wbc->nr_to_write > 0))
		mapping->writeback_index = done_index;

2356
	if (nwritten)
2357
		f2fs_submit_merged_write_cond(F2FS_M_SB(mapping), mapping->host,
2358
								NULL, 0, DATA);
C
Chao Yu 已提交
2359 2360 2361
	/* submit cached bio of IPU write */
	if (bio)
		__submit_bio(sbi, bio, DATA);
C
Chao Yu 已提交
2362

C
Chao Yu 已提交
2363 2364 2365
	return ret;
}

2366 2367 2368 2369 2370
static inline bool __should_serialize_io(struct inode *inode,
					struct writeback_control *wbc)
{
	if (!S_ISREG(inode->i_mode))
		return false;
2371 2372
	if (IS_NOQUOTA(inode))
		return false;
2373 2374 2375
	/* to avoid deadlock in path of data flush */
	if (F2FS_I(inode)->cp_task)
		return false;
2376 2377 2378 2379 2380 2381 2382
	if (wbc->sync_mode != WB_SYNC_ALL)
		return true;
	if (get_dirty_pages(inode) >= SM_I(F2FS_I_SB(inode))->min_seq_blocks)
		return true;
	return false;
}

2383
static int __f2fs_write_data_pages(struct address_space *mapping,
C
Chao Yu 已提交
2384 2385
						struct writeback_control *wbc,
						enum iostat_type io_type)
2386 2387
{
	struct inode *inode = mapping->host;
2388
	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
2389
	struct blk_plug plug;
2390
	int ret;
2391
	bool locked = false;
2392

P
P J P 已提交
2393 2394 2395 2396
	/* deal with chardevs and other special file */
	if (!mapping->a_ops->writepage)
		return 0;

2397 2398 2399 2400
	/* skip writing if there is no dirty page in this inode */
	if (!get_dirty_pages(inode) && wbc->sync_mode == WB_SYNC_NONE)
		return 0;

2401 2402 2403 2404
	/* during POR, we don't need to trigger writepage at all. */
	if (unlikely(is_sbi_flag_set(sbi, SBI_POR_DOING)))
		goto skip_write;

2405 2406
	if ((S_ISDIR(inode->i_mode) || IS_NOQUOTA(inode)) &&
			wbc->sync_mode == WB_SYNC_NONE &&
2407
			get_dirty_pages(inode) < nr_pages_to_skip(sbi, DATA) &&
C
Chao Yu 已提交
2408
			f2fs_available_free_memory(sbi, DIRTY_DENTS))
2409 2410
		goto skip_write;

C
Chao Yu 已提交
2411
	/* skip writing during file defragment */
2412
	if (is_inode_flag_set(inode, FI_DO_DEFRAG))
C
Chao Yu 已提交
2413 2414
		goto skip_write;

Y
Yunlei He 已提交
2415 2416
	trace_f2fs_writepages(mapping->host, wbc, DATA);

2417 2418
	/* to avoid spliting IOs due to mixed WB_SYNC_ALL and WB_SYNC_NONE */
	if (wbc->sync_mode == WB_SYNC_ALL)
2419 2420
		atomic_inc(&sbi->wb_sync_req[DATA]);
	else if (atomic_read(&sbi->wb_sync_req[DATA]))
2421 2422
		goto skip_write;

2423 2424 2425 2426 2427
	if (__should_serialize_io(inode, wbc)) {
		mutex_lock(&sbi->writepages);
		locked = true;
	}

2428
	blk_start_plug(&plug);
C
Chao Yu 已提交
2429
	ret = f2fs_write_cache_pages(mapping, wbc, io_type);
2430
	blk_finish_plug(&plug);
2431

2432 2433 2434
	if (locked)
		mutex_unlock(&sbi->writepages);

2435
	if (wbc->sync_mode == WB_SYNC_ALL)
2436
		atomic_dec(&sbi->wb_sync_req[DATA]);
2437 2438 2439 2440
	/*
	 * if some pages were truncated, we cannot guarantee its mapping->host
	 * to detect pending bios.
	 */
J
Jaegeuk Kim 已提交
2441

C
Chao Yu 已提交
2442
	f2fs_remove_dirty_inode(inode);
2443
	return ret;
2444 2445

skip_write:
2446
	wbc->pages_skipped += get_dirty_pages(inode);
Y
Yunlei He 已提交
2447
	trace_f2fs_writepages(mapping->host, wbc, DATA);
2448
	return 0;
2449 2450
}

C
Chao Yu 已提交
2451 2452 2453 2454 2455 2456 2457 2458 2459 2460
static int f2fs_write_data_pages(struct address_space *mapping,
			    struct writeback_control *wbc)
{
	struct inode *inode = mapping->host;

	return __f2fs_write_data_pages(mapping, wbc,
			F2FS_I(inode)->cp_task == current ?
			FS_CP_DATA_IO : FS_DATA_IO);
}

2461 2462 2463
static void f2fs_write_failed(struct address_space *mapping, loff_t to)
{
	struct inode *inode = mapping->host;
J
Jaegeuk Kim 已提交
2464
	loff_t i_size = i_size_read(inode);
2465

J
Jaegeuk Kim 已提交
2466
	if (to > i_size) {
2467
		down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
2468
		down_write(&F2FS_I(inode)->i_mmap_sem);
2469

J
Jaegeuk Kim 已提交
2470
		truncate_pagecache(inode, i_size);
2471 2472
		if (!IS_NOQUOTA(inode))
			f2fs_truncate_blocks(inode, i_size, true);
2473

2474
		up_write(&F2FS_I(inode)->i_mmap_sem);
2475
		up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
2476 2477 2478
	}
}

2479 2480 2481 2482 2483 2484 2485 2486
static int prepare_write_begin(struct f2fs_sb_info *sbi,
			struct page *page, loff_t pos, unsigned len,
			block_t *blk_addr, bool *node_changed)
{
	struct inode *inode = page->mapping->host;
	pgoff_t index = page->index;
	struct dnode_of_data dn;
	struct page *ipage;
2487
	bool locked = false;
2488
	struct extent_info ei = {0,0,0};
2489
	int err = 0;
2490
	int flag;
2491

2492 2493 2494 2495
	/*
	 * we already allocated all the blocks, so we don't need to get
	 * the block addresses when there is no need to fill the page.
	 */
2496 2497
	if (!f2fs_has_inline_data(inode) && len == PAGE_SIZE &&
			!is_inode_flag_set(inode, FI_NO_PREALLOC))
2498 2499
		return 0;

2500 2501 2502 2503 2504 2505
	/* f2fs_lock_op avoids race between write CP and convert_inline_page */
	if (f2fs_has_inline_data(inode) && pos + len > MAX_INLINE_DATA(inode))
		flag = F2FS_GET_BLOCK_DEFAULT;
	else
		flag = F2FS_GET_BLOCK_PRE_AIO;

2506
	if (f2fs_has_inline_data(inode) ||
2507
			(pos & PAGE_MASK) >= i_size_read(inode)) {
2508
		__do_map_lock(sbi, flag, true);
2509 2510 2511
		locked = true;
	}
restart:
2512
	/* check inline_data */
C
Chao Yu 已提交
2513
	ipage = f2fs_get_node_page(sbi, inode->i_ino);
2514 2515 2516 2517 2518 2519 2520 2521
	if (IS_ERR(ipage)) {
		err = PTR_ERR(ipage);
		goto unlock_out;
	}

	set_new_dnode(&dn, inode, ipage, ipage, 0);

	if (f2fs_has_inline_data(inode)) {
C
Chao Yu 已提交
2522
		if (pos + len <= MAX_INLINE_DATA(inode)) {
C
Chao Yu 已提交
2523
			f2fs_do_read_inline_data(page, ipage);
2524
			set_inode_flag(inode, FI_DATA_EXIST);
2525 2526
			if (inode->i_nlink)
				set_inline_node(ipage);
2527 2528 2529
		} else {
			err = f2fs_convert_inline_page(&dn, page);
			if (err)
2530 2531 2532 2533 2534 2535 2536 2537 2538 2539 2540
				goto out;
			if (dn.data_blkaddr == NULL_ADDR)
				err = f2fs_get_block(&dn, index);
		}
	} else if (locked) {
		err = f2fs_get_block(&dn, index);
	} else {
		if (f2fs_lookup_extent_cache(inode, index, &ei)) {
			dn.data_blkaddr = ei.blk + index - ei.fofs;
		} else {
			/* hole case */
C
Chao Yu 已提交
2541
			err = f2fs_get_dnode_of_data(&dn, index, LOOKUP_NODE);
2542
			if (err || dn.data_blkaddr == NULL_ADDR) {
2543
				f2fs_put_dnode(&dn);
2544 2545
				__do_map_lock(sbi, F2FS_GET_BLOCK_PRE_AIO,
								true);
2546
				WARN_ON(flag != F2FS_GET_BLOCK_PRE_AIO);
2547 2548 2549
				locked = true;
				goto restart;
			}
2550 2551
		}
	}
2552

2553 2554 2555
	/* convert_inline_page can make node_changed */
	*blk_addr = dn.data_blkaddr;
	*node_changed = dn.node_changed;
2556
out:
2557 2558
	f2fs_put_dnode(&dn);
unlock_out:
2559
	if (locked)
2560
		__do_map_lock(sbi, flag, false);
2561 2562 2563
	return err;
}

2564 2565 2566 2567 2568
static int f2fs_write_begin(struct file *file, struct address_space *mapping,
		loff_t pos, unsigned len, unsigned flags,
		struct page **pagep, void **fsdata)
{
	struct inode *inode = mapping->host;
2569
	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
2570
	struct page *page = NULL;
2571
	pgoff_t index = ((unsigned long long) pos) >> PAGE_SHIFT;
2572
	bool need_balance = false, drop_atomic = false;
2573
	block_t blkaddr = NULL_ADDR;
2574 2575
	int err = 0;

2576 2577
	trace_f2fs_write_begin(inode, pos, len, flags);

D
Daniel Rosenberg 已提交
2578 2579 2580 2581
	err = f2fs_is_checkpoint_ready(sbi);
	if (err)
		goto fail;

2582 2583 2584
	if ((f2fs_is_atomic_file(inode) &&
			!f2fs_available_free_memory(sbi, INMEM_PAGES)) ||
			is_inode_flag_set(inode, FI_ATOMIC_REVOKE_REQUEST)) {
J
Jaegeuk Kim 已提交
2585
		err = -ENOMEM;
2586
		drop_atomic = true;
J
Jaegeuk Kim 已提交
2587 2588 2589
		goto fail;
	}

2590 2591 2592 2593 2594 2595 2596 2597 2598 2599
	/*
	 * We should check this at this moment to avoid deadlock on inode page
	 * and #0 page. The locking rule for inline_data conversion should be:
	 * lock_page(page #0) -> lock_page(inode_page)
	 */
	if (index != 0) {
		err = f2fs_convert_inline_inode(inode);
		if (err)
			goto fail;
	}
2600
repeat:
2601 2602 2603 2604
	/*
	 * Do not use grab_cache_page_write_begin() to avoid deadlock due to
	 * wait_for_stable_page. Will wait that below with our IO control.
	 */
C
Chao Yu 已提交
2605
	page = f2fs_pagecache_get_page(mapping, index,
2606
				FGP_LOCK | FGP_WRITE | FGP_CREAT, GFP_NOFS);
2607 2608 2609 2610
	if (!page) {
		err = -ENOMEM;
		goto fail;
	}
2611

2612 2613
	*pagep = page;

2614 2615
	err = prepare_write_begin(sbi, page, pos, len,
					&blkaddr, &need_balance);
2616
	if (err)
2617
		goto fail;
2618

2619 2620
	if (need_balance && !IS_NOQUOTA(inode) &&
			has_not_enough_free_secs(sbi, 0, 0)) {
2621
		unlock_page(page);
J
Jaegeuk Kim 已提交
2622
		f2fs_balance_fs(sbi, true);
2623 2624 2625 2626 2627 2628 2629 2630
		lock_page(page);
		if (page->mapping != mapping) {
			/* The page got truncated from under us */
			f2fs_put_page(page, 1);
			goto repeat;
		}
	}

2631
	f2fs_wait_on_page_writeback(page, DATA, false, true);
2632

2633 2634
	if (len == PAGE_SIZE || PageUptodate(page))
		return 0;
2635

2636 2637 2638 2639 2640
	if (!(pos & (PAGE_SIZE - 1)) && (pos + len) >= i_size_read(inode)) {
		zero_user_segment(page, len, PAGE_SIZE);
		return 0;
	}

2641
	if (blkaddr == NEW_ADDR) {
2642
		zero_user_segment(page, 0, PAGE_SIZE);
2643
		SetPageUptodate(page);
2644
	} else {
C
Chao Yu 已提交
2645 2646
		if (!f2fs_is_valid_blkaddr(sbi, blkaddr,
				DATA_GENERIC_ENHANCE_READ)) {
2647
			err = -EFSCORRUPTED;
C
Chao Yu 已提交
2648 2649
			goto fail;
		}
2650 2651
		err = f2fs_submit_page_read(inode, page, blkaddr);
		if (err)
2652
			goto fail;
2653

2654
		lock_page(page);
2655
		if (unlikely(page->mapping != mapping)) {
2656 2657
			f2fs_put_page(page, 1);
			goto repeat;
2658
		}
2659 2660 2661
		if (unlikely(!PageUptodate(page))) {
			err = -EIO;
			goto fail;
2662
		}
2663 2664
	}
	return 0;
2665

2666
fail:
2667
	f2fs_put_page(page, 1);
2668
	f2fs_write_failed(mapping, pos + len);
2669
	if (drop_atomic)
C
Chao Yu 已提交
2670
		f2fs_drop_inmem_pages_all(sbi, false);
2671
	return err;
2672 2673
}

2674 2675 2676 2677 2678 2679 2680
static int f2fs_write_end(struct file *file,
			struct address_space *mapping,
			loff_t pos, unsigned len, unsigned copied,
			struct page *page, void *fsdata)
{
	struct inode *inode = page->mapping->host;

2681 2682
	trace_f2fs_write_end(inode, pos, len, copied);

2683 2684 2685 2686 2687 2688
	/*
	 * This should be come from len == PAGE_SIZE, and we expect copied
	 * should be PAGE_SIZE. Otherwise, we treat it with zero copied and
	 * let generic_perform_write() try to copy data again through copied=0.
	 */
	if (!PageUptodate(page)) {
2689
		if (unlikely(copied != len))
2690 2691 2692 2693 2694 2695 2696
			copied = 0;
		else
			SetPageUptodate(page);
	}
	if (!copied)
		goto unlock_out;

2697
	set_page_dirty(page);
2698

2699 2700
	if (pos + copied > i_size_read(inode))
		f2fs_i_size_write(inode, pos + copied);
2701
unlock_out:
2702
	f2fs_put_page(page, 1);
2703
	f2fs_update_time(F2FS_I_SB(inode), REQ_TIME);
2704 2705 2706
	return copied;
}

2707 2708
static int check_direct_IO(struct inode *inode, struct iov_iter *iter,
			   loff_t offset)
2709
{
2710 2711 2712 2713 2714 2715 2716 2717 2718 2719 2720 2721 2722 2723
	unsigned i_blkbits = READ_ONCE(inode->i_blkbits);
	unsigned blkbits = i_blkbits;
	unsigned blocksize_mask = (1 << blkbits) - 1;
	unsigned long align = offset | iov_iter_alignment(iter);
	struct block_device *bdev = inode->i_sb->s_bdev;

	if (align & blocksize_mask) {
		if (bdev)
			blkbits = blksize_bits(bdev_logical_block_size(bdev));
		blocksize_mask = (1 << blkbits) - 1;
		if (align & blocksize_mask)
			return -EINVAL;
		return 1;
	}
2724 2725 2726
	return 0;
}

C
Chao Yu 已提交
2727 2728 2729 2730 2731 2732 2733 2734 2735 2736
static void f2fs_dio_end_io(struct bio *bio)
{
	struct f2fs_private_dio *dio = bio->bi_private;

	dec_page_count(F2FS_I_SB(dio->inode),
			dio->write ? F2FS_DIO_WRITE : F2FS_DIO_READ);

	bio->bi_private = dio->orig_private;
	bio->bi_end_io = dio->orig_end_io;

2737
	kvfree(dio);
C
Chao Yu 已提交
2738 2739 2740 2741 2742 2743 2744 2745 2746 2747 2748 2749

	bio_endio(bio);
}

static void f2fs_dio_submit_bio(struct bio *bio, struct inode *inode,
							loff_t file_offset)
{
	struct f2fs_private_dio *dio;
	bool write = (bio_op(bio) == REQ_OP_WRITE);

	dio = f2fs_kzalloc(F2FS_I_SB(inode),
			sizeof(struct f2fs_private_dio), GFP_NOFS);
2750
	if (!dio)
C
Chao Yu 已提交
2751 2752 2753 2754 2755 2756 2757 2758 2759 2760 2761 2762 2763 2764 2765 2766 2767 2768 2769 2770
		goto out;

	dio->inode = inode;
	dio->orig_end_io = bio->bi_end_io;
	dio->orig_private = bio->bi_private;
	dio->write = write;

	bio->bi_end_io = f2fs_dio_end_io;
	bio->bi_private = dio;

	inc_page_count(F2FS_I_SB(inode),
			write ? F2FS_DIO_WRITE : F2FS_DIO_READ);

	submit_bio(bio);
	return;
out:
	bio->bi_status = BLK_STS_IOERR;
	bio_endio(bio);
}

2771
static ssize_t f2fs_direct_IO(struct kiocb *iocb, struct iov_iter *iter)
2772
{
2773
	struct address_space *mapping = iocb->ki_filp->f_mapping;
2774
	struct inode *inode = mapping->host;
2775
	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
2776
	struct f2fs_inode_info *fi = F2FS_I(inode);
2777
	size_t count = iov_iter_count(iter);
2778
	loff_t offset = iocb->ki_pos;
2779
	int rw = iov_iter_rw(iter);
2780
	int err;
2781
	enum rw_hint hint = iocb->ki_hint;
2782
	int whint_mode = F2FS_OPTION(sbi).whint_mode;
2783
	bool do_opu;
2784

2785
	err = check_direct_IO(inode, iter, offset);
2786
	if (err)
2787
		return err < 0 ? err : 0;
H
Huajun Li 已提交
2788

2789
	if (f2fs_force_buffered_io(inode, iocb, iter))
2790
		return 0;
2791

2792 2793
	do_opu = allow_outplace_dio(inode, iocb, iter);

2794
	trace_f2fs_direct_IO_enter(inode, offset, count, rw);
2795

2796 2797 2798
	if (rw == WRITE && whint_mode == WHINT_MODE_OFF)
		iocb->ki_hint = WRITE_LIFE_NOT_SET;

2799 2800 2801 2802 2803 2804 2805 2806
	if (iocb->ki_flags & IOCB_NOWAIT) {
		if (!down_read_trylock(&fi->i_gc_rwsem[rw])) {
			iocb->ki_hint = hint;
			err = -EAGAIN;
			goto out;
		}
		if (do_opu && !down_read_trylock(&fi->i_gc_rwsem[READ])) {
			up_read(&fi->i_gc_rwsem[rw]);
H
Hyunchul Lee 已提交
2807 2808 2809 2810
			iocb->ki_hint = hint;
			err = -EAGAIN;
			goto out;
		}
2811 2812 2813 2814
	} else {
		down_read(&fi->i_gc_rwsem[rw]);
		if (do_opu)
			down_read(&fi->i_gc_rwsem[READ]);
H
Hyunchul Lee 已提交
2815 2816
	}

C
Chao Yu 已提交
2817
	err = __blockdev_direct_IO(iocb, inode, inode->i_sb->s_bdev,
C
Chao Yu 已提交
2818 2819
			iter, rw == WRITE ? get_data_block_dio_write :
			get_data_block_dio, NULL, f2fs_dio_submit_bio,
C
Chao Yu 已提交
2820
			DIO_LOCKING | DIO_SKIP_HOLES);
2821 2822 2823 2824 2825

	if (do_opu)
		up_read(&fi->i_gc_rwsem[READ]);

	up_read(&fi->i_gc_rwsem[rw]);
2826 2827

	if (rw == WRITE) {
2828 2829
		if (whint_mode == WHINT_MODE_OFF)
			iocb->ki_hint = hint;
C
Chao Yu 已提交
2830 2831 2832
		if (err > 0) {
			f2fs_update_iostat(F2FS_I_SB(inode), APP_DIRECT_IO,
									err);
2833 2834
			if (!do_opu)
				set_inode_flag(inode, FI_UPDATE_WRITE);
C
Chao Yu 已提交
2835
		} else if (err < 0) {
2836
			f2fs_write_failed(mapping, offset + count);
C
Chao Yu 已提交
2837
		}
2838
	}
2839

H
Hyunchul Lee 已提交
2840
out:
2841
	trace_f2fs_direct_IO_exit(inode, offset, count, rw, err);
2842

2843
	return err;
2844 2845
}

2846 2847
void f2fs_invalidate_page(struct page *page, unsigned int offset,
							unsigned int length)
2848 2849
{
	struct inode *inode = page->mapping->host;
2850
	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
2851

2852
	if (inode->i_ino >= F2FS_ROOT_INO(sbi) &&
2853
		(offset % PAGE_SIZE || length != PAGE_SIZE))
2854 2855
		return;

2856
	if (PageDirty(page)) {
2857
		if (inode->i_ino == F2FS_META_INO(sbi)) {
2858
			dec_page_count(sbi, F2FS_DIRTY_META);
2859
		} else if (inode->i_ino == F2FS_NODE_INO(sbi)) {
2860
			dec_page_count(sbi, F2FS_DIRTY_NODES);
2861
		} else {
2862
			inode_dec_dirty_pages(inode);
C
Chao Yu 已提交
2863
			f2fs_remove_dirty_inode(inode);
2864
		}
2865
	}
C
Chao Yu 已提交
2866

C
Chao Yu 已提交
2867 2868
	clear_cold_data(page);

C
Chao Yu 已提交
2869
	if (IS_ATOMIC_WRITTEN_PAGE(page))
C
Chao Yu 已提交
2870
		return f2fs_drop_inmem_page(inode, page);
C
Chao Yu 已提交
2871

2872
	f2fs_clear_page_private(page);
2873 2874
}

2875
int f2fs_release_page(struct page *page, gfp_t wait)
2876
{
2877 2878 2879 2880
	/* If this is dirty page, keep PagePrivate */
	if (PageDirty(page))
		return 0;

C
Chao Yu 已提交
2881 2882 2883 2884
	/* This is atomic written page, keep Private */
	if (IS_ATOMIC_WRITTEN_PAGE(page))
		return 0;

C
Chao Yu 已提交
2885
	clear_cold_data(page);
2886
	f2fs_clear_page_private(page);
2887
	return 1;
2888 2889 2890 2891
}

static int f2fs_set_data_page_dirty(struct page *page)
{
J
Jaegeuk Kim 已提交
2892
	struct inode *inode = page_file_mapping(page)->host;
2893

2894 2895
	trace_f2fs_set_page_dirty(page, DATA);

2896 2897
	if (!PageUptodate(page))
		SetPageUptodate(page);
J
Jaegeuk Kim 已提交
2898 2899
	if (PageSwapCache(page))
		return __set_page_dirty_nobuffers(page);
2900

C
Chao Yu 已提交
2901
	if (f2fs_is_atomic_file(inode) && !f2fs_is_commit_atomic_write(inode)) {
C
Chao Yu 已提交
2902
		if (!IS_ATOMIC_WRITTEN_PAGE(page)) {
C
Chao Yu 已提交
2903
			f2fs_register_inmem_page(inode, page);
C
Chao Yu 已提交
2904 2905 2906 2907 2908 2909 2910
			return 1;
		}
		/*
		 * Previously, this page has been registered, we just
		 * return here.
		 */
		return 0;
2911 2912
	}

2913
	if (!PageDirty(page)) {
2914
		__set_page_dirty_nobuffers(page);
C
Chao Yu 已提交
2915
		f2fs_update_dirty_page(inode, page);
2916 2917 2918 2919 2920
		return 1;
	}
	return 0;
}

J
Jaegeuk Kim 已提交
2921 2922
static sector_t f2fs_bmap(struct address_space *mapping, sector_t block)
{
2923 2924
	struct inode *inode = mapping->host;

J
Jaegeuk Kim 已提交
2925 2926 2927 2928 2929 2930 2931
	if (f2fs_has_inline_data(inode))
		return 0;

	/* make sure allocating whole blocks */
	if (mapping_tagged(mapping, PAGECACHE_TAG_DIRTY))
		filemap_write_and_wait(mapping);

C
Chao Yu 已提交
2932
	return generic_block_bmap(mapping, block, get_data_block_bmap);
2933 2934
}

2935 2936 2937 2938 2939 2940 2941 2942 2943 2944 2945 2946 2947
#ifdef CONFIG_MIGRATION
#include <linux/migrate.h>

int f2fs_migrate_page(struct address_space *mapping,
		struct page *newpage, struct page *page, enum migrate_mode mode)
{
	int rc, extra_count;
	struct f2fs_inode_info *fi = F2FS_I(mapping->host);
	bool atomic_written = IS_ATOMIC_WRITTEN_PAGE(page);

	BUG_ON(PageWriteback(page));

	/* migrating an atomic written page is safe with the inmem_lock hold */
2948 2949 2950 2951 2952 2953
	if (atomic_written) {
		if (mode != MIGRATE_SYNC)
			return -EBUSY;
		if (!mutex_trylock(&fi->inmem_lock))
			return -EAGAIN;
	}
2954

2955 2956
	/* one extra reference was held for atomic_write page */
	extra_count = atomic_written ? 1 : 0;
2957
	rc = migrate_page_move_mapping(mapping, newpage,
2958
				page, extra_count);
2959 2960 2961 2962 2963 2964 2965 2966 2967 2968 2969 2970 2971 2972 2973 2974 2975 2976
	if (rc != MIGRATEPAGE_SUCCESS) {
		if (atomic_written)
			mutex_unlock(&fi->inmem_lock);
		return rc;
	}

	if (atomic_written) {
		struct inmem_pages *cur;
		list_for_each_entry(cur, &fi->inmem_pages, list)
			if (cur->page == page) {
				cur->page = newpage;
				break;
			}
		mutex_unlock(&fi->inmem_lock);
		put_page(page);
		get_page(newpage);
	}

2977 2978 2979 2980
	if (PagePrivate(page)) {
		f2fs_set_page_private(newpage, page_private(page));
		f2fs_clear_page_private(page);
	}
2981

2982 2983 2984 2985
	if (mode != MIGRATE_SYNC_NO_COPY)
		migrate_page_copy(newpage, page);
	else
		migrate_page_states(newpage, page);
2986 2987 2988 2989 2990

	return MIGRATEPAGE_SUCCESS;
}
#endif

J
Jaegeuk Kim 已提交
2991 2992 2993 2994 2995 2996 2997 2998 2999 3000 3001 3002 3003 3004 3005 3006 3007 3008 3009 3010 3011 3012 3013 3014 3015 3016 3017 3018 3019 3020 3021 3022 3023 3024 3025 3026 3027 3028 3029 3030 3031 3032 3033 3034 3035 3036 3037 3038 3039 3040 3041 3042 3043 3044 3045 3046 3047 3048 3049 3050 3051 3052 3053 3054 3055 3056 3057 3058 3059 3060 3061 3062 3063 3064 3065 3066 3067 3068 3069 3070 3071 3072 3073 3074 3075 3076 3077 3078 3079 3080 3081 3082 3083 3084 3085 3086 3087 3088 3089 3090 3091 3092 3093 3094 3095 3096 3097 3098 3099 3100 3101 3102 3103 3104 3105 3106 3107 3108 3109 3110
#ifdef CONFIG_SWAP
/* Copied from generic_swapfile_activate() to check any holes */
static int check_swap_activate(struct file *swap_file, unsigned int max)
{
	struct address_space *mapping = swap_file->f_mapping;
	struct inode *inode = mapping->host;
	unsigned blocks_per_page;
	unsigned long page_no;
	unsigned blkbits;
	sector_t probe_block;
	sector_t last_block;
	sector_t lowest_block = -1;
	sector_t highest_block = 0;

	blkbits = inode->i_blkbits;
	blocks_per_page = PAGE_SIZE >> blkbits;

	/*
	 * Map all the blocks into the extent list.  This code doesn't try
	 * to be very smart.
	 */
	probe_block = 0;
	page_no = 0;
	last_block = i_size_read(inode) >> blkbits;
	while ((probe_block + blocks_per_page) <= last_block && page_no < max) {
		unsigned block_in_page;
		sector_t first_block;

		cond_resched();

		first_block = bmap(inode, probe_block);
		if (first_block == 0)
			goto bad_bmap;

		/*
		 * It must be PAGE_SIZE aligned on-disk
		 */
		if (first_block & (blocks_per_page - 1)) {
			probe_block++;
			goto reprobe;
		}

		for (block_in_page = 1; block_in_page < blocks_per_page;
					block_in_page++) {
			sector_t block;

			block = bmap(inode, probe_block + block_in_page);
			if (block == 0)
				goto bad_bmap;
			if (block != first_block + block_in_page) {
				/* Discontiguity */
				probe_block++;
				goto reprobe;
			}
		}

		first_block >>= (PAGE_SHIFT - blkbits);
		if (page_no) {	/* exclude the header page */
			if (first_block < lowest_block)
				lowest_block = first_block;
			if (first_block > highest_block)
				highest_block = first_block;
		}

		page_no++;
		probe_block += blocks_per_page;
reprobe:
		continue;
	}
	return 0;

bad_bmap:
	pr_err("swapon: swapfile has holes\n");
	return -EINVAL;
}

static int f2fs_swap_activate(struct swap_info_struct *sis, struct file *file,
				sector_t *span)
{
	struct inode *inode = file_inode(file);
	int ret;

	if (!S_ISREG(inode->i_mode))
		return -EINVAL;

	if (f2fs_readonly(F2FS_I_SB(inode)->sb))
		return -EROFS;

	ret = f2fs_convert_inline_inode(inode);
	if (ret)
		return ret;

	ret = check_swap_activate(file, sis->max);
	if (ret)
		return ret;

	set_inode_flag(inode, FI_PIN_FILE);
	f2fs_precache_extents(inode);
	f2fs_update_time(F2FS_I_SB(inode), REQ_TIME);
	return 0;
}

static void f2fs_swap_deactivate(struct file *file)
{
	struct inode *inode = file_inode(file);

	clear_inode_flag(inode, FI_PIN_FILE);
}
#else
static int f2fs_swap_activate(struct swap_info_struct *sis, struct file *file,
				sector_t *span)
{
	return -EOPNOTSUPP;
}

static void f2fs_swap_deactivate(struct file *file)
{
}
#endif

3111 3112 3113 3114 3115 3116
const struct address_space_operations f2fs_dblock_aops = {
	.readpage	= f2fs_read_data_page,
	.readpages	= f2fs_read_data_pages,
	.writepage	= f2fs_write_data_page,
	.writepages	= f2fs_write_data_pages,
	.write_begin	= f2fs_write_begin,
3117
	.write_end	= f2fs_write_end,
3118
	.set_page_dirty	= f2fs_set_data_page_dirty,
3119 3120
	.invalidatepage	= f2fs_invalidate_page,
	.releasepage	= f2fs_release_page,
3121
	.direct_IO	= f2fs_direct_IO,
J
Jaegeuk Kim 已提交
3122
	.bmap		= f2fs_bmap,
J
Jaegeuk Kim 已提交
3123 3124
	.swap_activate  = f2fs_swap_activate,
	.swap_deactivate = f2fs_swap_deactivate,
3125 3126 3127
#ifdef CONFIG_MIGRATION
	.migratepage    = f2fs_migrate_page,
#endif
3128
};
3129

M
Matthew Wilcox 已提交
3130
void f2fs_clear_page_cache_dirty_tag(struct page *page)
3131 3132 3133 3134 3135
{
	struct address_space *mapping = page_mapping(page);
	unsigned long flags;

	xa_lock_irqsave(&mapping->i_pages, flags);
M
Matthew Wilcox 已提交
3136
	__xa_clear_mark(&mapping->i_pages, page_index(page),
3137 3138 3139 3140
						PAGECACHE_TAG_DIRTY);
	xa_unlock_irqrestore(&mapping->i_pages, flags);
}

3141 3142 3143 3144 3145 3146 3147 3148 3149 3150 3151 3152 3153 3154 3155 3156 3157 3158 3159 3160 3161 3162 3163
int __init f2fs_init_post_read_processing(void)
{
	bio_post_read_ctx_cache = KMEM_CACHE(bio_post_read_ctx, 0);
	if (!bio_post_read_ctx_cache)
		goto fail;
	bio_post_read_ctx_pool =
		mempool_create_slab_pool(NUM_PREALLOC_POST_READ_CTXS,
					 bio_post_read_ctx_cache);
	if (!bio_post_read_ctx_pool)
		goto fail_free_cache;
	return 0;

fail_free_cache:
	kmem_cache_destroy(bio_post_read_ctx_cache);
fail:
	return -ENOMEM;
}

void __exit f2fs_destroy_post_read_processing(void)
{
	mempool_destroy(bio_post_read_ctx_pool);
	kmem_cache_destroy(bio_post_read_ctx_cache);
}