data.c 65.2 KB
Newer Older
J
Jaegeuk Kim 已提交
1
/*
2 3 4 5 6 7 8 9 10 11 12 13 14 15 16
 * fs/f2fs/data.c
 *
 * Copyright (c) 2012 Samsung Electronics Co., Ltd.
 *             http://www.samsung.com/
 *
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License version 2 as
 * published by the Free Software Foundation.
 */
#include <linux/fs.h>
#include <linux/f2fs_fs.h>
#include <linux/buffer_head.h>
#include <linux/mpage.h>
#include <linux/writeback.h>
#include <linux/backing-dev.h>
C
Chao Yu 已提交
17
#include <linux/pagevec.h>
18 19
#include <linux/blkdev.h>
#include <linux/bio.h>
20
#include <linux/prefetch.h>
21
#include <linux/uio.h>
J
Jaegeuk Kim 已提交
22
#include <linux/cleancache.h>
23
#include <linux/sched/signal.h>
24 25 26 27

#include "f2fs.h"
#include "node.h"
#include "segment.h"
J
Jaegeuk Kim 已提交
28
#include "trace.h"
29
#include <trace/events/f2fs.h>
30

31 32 33 34 35
#define NUM_PREALLOC_POST_READ_CTXS	128

static struct kmem_cache *bio_post_read_ctx_cache;
static mempool_t *bio_post_read_ctx_pool;

36 37 38 39 40 41 42 43 44 45 46 47 48 49 50
static bool __is_cp_guaranteed(struct page *page)
{
	struct address_space *mapping = page->mapping;
	struct inode *inode;
	struct f2fs_sb_info *sbi;

	if (!mapping)
		return false;

	inode = mapping->host;
	sbi = F2FS_I_SB(inode);

	if (inode->i_ino == F2FS_META_INO(sbi) ||
			inode->i_ino ==  F2FS_NODE_INO(sbi) ||
			S_ISDIR(inode->i_mode) ||
51 52
			(S_ISREG(inode->i_mode) &&
			is_inode_flag_set(inode, FI_ATOMIC_FILE)) ||
53 54 55 56 57
			is_cold_data(page))
		return true;
	return false;
}

58 59 60 61 62 63 64 65 66 67 68 69 70 71
/* postprocessing steps for read bios */
enum bio_post_read_step {
	STEP_INITIAL = 0,
	STEP_DECRYPT,
};

struct bio_post_read_ctx {
	struct bio *bio;
	struct work_struct work;
	unsigned int cur_step;
	unsigned int enabled_steps;
};

static void __read_end_io(struct bio *bio)
72
{
73 74
	struct page *page;
	struct bio_vec *bv;
75
	int i;
76

77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128
	bio_for_each_segment_all(bv, bio, i) {
		page = bv->bv_page;

		/* PG_error was set if any post_read step failed */
		if (bio->bi_status || PageError(page)) {
			ClearPageUptodate(page);
			SetPageError(page);
		} else {
			SetPageUptodate(page);
		}
		unlock_page(page);
	}
	if (bio->bi_private)
		mempool_free(bio->bi_private, bio_post_read_ctx_pool);
	bio_put(bio);
}

static void bio_post_read_processing(struct bio_post_read_ctx *ctx);

static void decrypt_work(struct work_struct *work)
{
	struct bio_post_read_ctx *ctx =
		container_of(work, struct bio_post_read_ctx, work);

	fscrypt_decrypt_bio(ctx->bio);

	bio_post_read_processing(ctx);
}

static void bio_post_read_processing(struct bio_post_read_ctx *ctx)
{
	switch (++ctx->cur_step) {
	case STEP_DECRYPT:
		if (ctx->enabled_steps & (1 << STEP_DECRYPT)) {
			INIT_WORK(&ctx->work, decrypt_work);
			fscrypt_enqueue_decrypt_work(&ctx->work);
			return;
		}
		ctx->cur_step++;
		/* fall-through */
	default:
		__read_end_io(ctx->bio);
	}
}

static bool f2fs_bio_post_read_required(struct bio *bio)
{
	return bio->bi_private && !bio->bi_status;
}

static void f2fs_read_end_io(struct bio *bio)
{
129
	if (time_to_inject(F2FS_P_SB(bio_first_page_all(bio)), FAULT_IO)) {
130
		f2fs_show_injection_info(FAULT_IO);
131
		bio->bi_status = BLK_STS_IOERR;
132
	}
C
Chao Yu 已提交
133

134 135
	if (f2fs_bio_post_read_required(bio)) {
		struct bio_post_read_ctx *ctx = bio->bi_private;
J
Jaegeuk Kim 已提交
136

137 138 139
		ctx->cur_step = STEP_INITIAL;
		bio_post_read_processing(ctx);
		return;
J
Jaegeuk Kim 已提交
140
	}
141 142

	__read_end_io(bio);
J
Jaegeuk Kim 已提交
143 144
}

145
static void f2fs_write_end_io(struct bio *bio)
146
{
147
	struct f2fs_sb_info *sbi = bio->bi_private;
148 149
	struct bio_vec *bvec;
	int i;
150

151
	bio_for_each_segment_all(bvec, bio, i) {
152
		struct page *page = bvec->bv_page;
153
		enum count_type type = WB_DATA_TYPE(page);
154

155 156 157 158 159 160
		if (IS_DUMMY_WRITTEN_PAGE(page)) {
			set_page_private(page, (unsigned long)NULL);
			ClearPagePrivate(page);
			unlock_page(page);
			mempool_free(page, sbi->write_io_dummy);

161
			if (unlikely(bio->bi_status))
162 163 164 165
				f2fs_stop_checkpoint(sbi, true);
			continue;
		}

166
		fscrypt_pullback_bio_page(&page, true);
167

168
		if (unlikely(bio->bi_status)) {
169
			mapping_set_error(page->mapping, -EIO);
170 171
			if (type == F2FS_WB_CP_DATA)
				f2fs_stop_checkpoint(sbi, true);
172
		}
173 174 175 176

		f2fs_bug_on(sbi, page->mapping == NODE_MAPPING(sbi) &&
					page->index != nid_of_node(page));

177
		dec_page_count(sbi, type);
178 179
		if (f2fs_in_warm_node_list(sbi, page))
			f2fs_del_fsync_node_entry(sbi, page);
180
		clear_cold_data(page);
181
		end_page_writeback(page);
182
	}
183
	if (!get_pages(sbi, F2FS_WB_CP_DATA) &&
184
				wq_has_sleeper(&sbi->cp_wait))
185 186 187 188 189
		wake_up(&sbi->cp_wait);

	bio_put(bio);
}

J
Jaegeuk Kim 已提交
190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207
/*
 * Return true, if pre_bio's bdev is same as its target device.
 */
struct block_device *f2fs_target_device(struct f2fs_sb_info *sbi,
				block_t blk_addr, struct bio *bio)
{
	struct block_device *bdev = sbi->sb->s_bdev;
	int i;

	for (i = 0; i < sbi->s_ndevs; i++) {
		if (FDEV(i).start_blk <= blk_addr &&
					FDEV(i).end_blk >= blk_addr) {
			blk_addr -= FDEV(i).start_blk;
			bdev = FDEV(i).bdev;
			break;
		}
	}
	if (bio) {
208
		bio_set_dev(bio, bdev);
J
Jaegeuk Kim 已提交
209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226
		bio->bi_iter.bi_sector = SECTOR_FROM_BLOCK(blk_addr);
	}
	return bdev;
}

int f2fs_target_device_index(struct f2fs_sb_info *sbi, block_t blkaddr)
{
	int i;

	for (i = 0; i < sbi->s_ndevs; i++)
		if (FDEV(i).start_blk <= blkaddr && FDEV(i).end_blk >= blkaddr)
			return i;
	return 0;
}

static bool __same_bdev(struct f2fs_sb_info *sbi,
				block_t blk_addr, struct bio *bio)
{
227 228
	struct block_device *b = f2fs_target_device(sbi, blk_addr, NULL);
	return bio->bi_disk == b->bd_disk && bio->bi_partno == b->bd_partno;
J
Jaegeuk Kim 已提交
229 230
}

231 232 233 234
/*
 * Low-level block read/write IO operations.
 */
static struct bio *__bio_alloc(struct f2fs_sb_info *sbi, block_t blk_addr,
235
				struct writeback_control *wbc,
236 237
				int npages, bool is_read,
				enum page_type type, enum temp_type temp)
238 239 240
{
	struct bio *bio;

241
	bio = f2fs_bio_alloc(sbi, npages, true);
242

J
Jaegeuk Kim 已提交
243
	f2fs_target_device(sbi, blk_addr, bio);
244 245 246 247 248 249
	if (is_read) {
		bio->bi_end_io = f2fs_read_end_io;
		bio->bi_private = NULL;
	} else {
		bio->bi_end_io = f2fs_write_end_io;
		bio->bi_private = sbi;
C
Chao Yu 已提交
250
		bio->bi_write_hint = f2fs_io_type_to_rw_hint(sbi, type, temp);
251
	}
252 253
	if (wbc)
		wbc_init_bio(wbc, bio);
254 255 256 257

	return bio;
}

258 259
static inline void __submit_bio(struct f2fs_sb_info *sbi,
				struct bio *bio, enum page_type type)
260
{
261
	if (!is_read_io(bio_op(bio))) {
262 263 264 265 266
		unsigned int start;

		if (type != DATA && type != NODE)
			goto submit_io;

267
		if (test_opt(sbi, LFS) && current->plug)
268 269
			blk_finish_plug(current->plug);

270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294
		start = bio->bi_iter.bi_size >> F2FS_BLKSIZE_BITS;
		start %= F2FS_IO_SIZE(sbi);

		if (start == 0)
			goto submit_io;

		/* fill dummy pages */
		for (; start < F2FS_IO_SIZE(sbi); start++) {
			struct page *page =
				mempool_alloc(sbi->write_io_dummy,
					GFP_NOIO | __GFP_ZERO | __GFP_NOFAIL);
			f2fs_bug_on(sbi, !page);

			SetPagePrivate(page);
			set_page_private(page, (unsigned long)DUMMY_WRITTEN_PAGE);
			lock_page(page);
			if (bio_add_page(bio, page, PAGE_SIZE, 0) < PAGE_SIZE)
				f2fs_bug_on(sbi, 1);
		}
		/*
		 * In the NODE case, we lose next block address chain. So, we
		 * need to do checkpoint in f2fs_sync_file.
		 */
		if (type == NODE)
			set_sbi_flag(sbi, SBI_NEED_CP);
J
Jaegeuk Kim 已提交
295
	}
296
submit_io:
J
Jaegeuk Kim 已提交
297 298 299 300
	if (is_read_io(bio_op(bio)))
		trace_f2fs_submit_read_bio(sbi->sb, type, bio);
	else
		trace_f2fs_submit_write_bio(sbi->sb, type, bio);
301
	submit_bio(bio);
302 303
}

J
Jaegeuk Kim 已提交
304
static void __submit_merged_bio(struct f2fs_bio_info *io)
305
{
J
Jaegeuk Kim 已提交
306
	struct f2fs_io_info *fio = &io->fio;
307 308 309 310

	if (!io->bio)
		return;

J
Jaegeuk Kim 已提交
311 312
	bio_set_op_attrs(io->bio, fio->op, fio->op_flags);

M
Mike Christie 已提交
313
	if (is_read_io(fio->op))
J
Jaegeuk Kim 已提交
314
		trace_f2fs_prepare_read_bio(io->sbi->sb, fio->type, io->bio);
315
	else
J
Jaegeuk Kim 已提交
316
		trace_f2fs_prepare_write_bio(io->sbi->sb, fio->type, io->bio);
M
Mike Christie 已提交
317

318
	__submit_bio(io->sbi, io->bio, fio->type);
319 320 321
	io->bio = NULL;
}

322 323
static bool __has_merged_page(struct f2fs_bio_info *io,
				struct inode *inode, nid_t ino, pgoff_t idx)
C
Chao Yu 已提交
324 325 326 327 328
{
	struct bio_vec *bvec;
	struct page *target;
	int i;

329
	if (!io->bio)
C
Chao Yu 已提交
330
		return false;
331

332
	if (!inode && !ino)
333
		return true;
C
Chao Yu 已提交
334 335 336

	bio_for_each_segment_all(bvec, io->bio, i) {

337
		if (bvec->bv_page->mapping)
C
Chao Yu 已提交
338
			target = bvec->bv_page;
339 340
		else
			target = fscrypt_control_page(bvec->bv_page);
C
Chao Yu 已提交
341

342 343 344
		if (idx != target->index)
			continue;

345 346 347
		if (inode && inode == target->mapping->host)
			return true;
		if (ino && ino == ino_of_node(target))
C
Chao Yu 已提交
348 349 350 351 352 353
			return true;
	}

	return false;
}

354
static bool has_merged_page(struct f2fs_sb_info *sbi, struct inode *inode,
355
				nid_t ino, pgoff_t idx, enum page_type type)
356 357
{
	enum page_type btype = PAGE_TYPE_OF_BIO(type);
J
Jaegeuk Kim 已提交
358 359 360
	enum temp_type temp;
	struct f2fs_bio_info *io;
	bool ret = false;
361

J
Jaegeuk Kim 已提交
362 363 364 365 366 367
	for (temp = HOT; temp < NR_TEMP_TYPE; temp++) {
		io = sbi->write_io[btype] + temp;

		down_read(&io->io_rwsem);
		ret = __has_merged_page(io, inode, ino, idx);
		up_read(&io->io_rwsem);
368

J
Jaegeuk Kim 已提交
369 370 371 372
		/* TODO: use HOT temp only for meta pages now. */
		if (ret || btype == META)
			break;
	}
373 374 375
	return ret;
}

376
static void __f2fs_submit_merged_write(struct f2fs_sb_info *sbi,
J
Jaegeuk Kim 已提交
377
				enum page_type type, enum temp_type temp)
378 379
{
	enum page_type btype = PAGE_TYPE_OF_BIO(type);
J
Jaegeuk Kim 已提交
380
	struct f2fs_bio_info *io = sbi->write_io[btype] + temp;
381

382
	down_write(&io->io_rwsem);
J
Jaegeuk Kim 已提交
383 384 385 386

	/* change META to META_FLUSH in the checkpoint procedure */
	if (type >= META_FLUSH) {
		io->fio.type = META_FLUSH;
M
Mike Christie 已提交
387
		io->fio.op = REQ_OP_WRITE;
388
		io->fio.op_flags = REQ_META | REQ_PRIO | REQ_SYNC;
389
		if (!test_opt(sbi, NOBARRIER))
390
			io->fio.op_flags |= REQ_PREFLUSH | REQ_FUA;
J
Jaegeuk Kim 已提交
391 392
	}
	__submit_merged_bio(io);
393
	up_write(&io->io_rwsem);
394 395
}

J
Jaegeuk Kim 已提交
396 397 398
static void __submit_merged_write_cond(struct f2fs_sb_info *sbi,
				struct inode *inode, nid_t ino, pgoff_t idx,
				enum page_type type, bool force)
399
{
J
Jaegeuk Kim 已提交
400 401 402 403 404 405 406 407 408 409 410 411 412
	enum temp_type temp;

	if (!force && !has_merged_page(sbi, inode, ino, idx, type))
		return;

	for (temp = HOT; temp < NR_TEMP_TYPE; temp++) {

		__f2fs_submit_merged_write(sbi, type, temp);

		/* TODO: use HOT temp only for meta pages now. */
		if (type >= META)
			break;
	}
413 414
}

415
void f2fs_submit_merged_write(struct f2fs_sb_info *sbi, enum page_type type)
416
{
J
Jaegeuk Kim 已提交
417
	__submit_merged_write_cond(sbi, NULL, 0, 0, type, true);
418 419
}

420
void f2fs_submit_merged_write_cond(struct f2fs_sb_info *sbi,
421
				struct inode *inode, nid_t ino, pgoff_t idx,
422
				enum page_type type)
423
{
J
Jaegeuk Kim 已提交
424
	__submit_merged_write_cond(sbi, inode, ino, idx, type, false);
425 426
}

427
void f2fs_flush_merged_writes(struct f2fs_sb_info *sbi)
428
{
429 430 431
	f2fs_submit_merged_write(sbi, DATA);
	f2fs_submit_merged_write(sbi, NODE);
	f2fs_submit_merged_write(sbi, META);
432 433
}

434 435
/*
 * Fill the locked page with data located in the block address.
436
 * A caller needs to unlock the page on failure.
437
 */
438
int f2fs_submit_page_bio(struct f2fs_io_info *fio)
439 440
{
	struct bio *bio;
441 442
	struct page *page = fio->encrypted_page ?
			fio->encrypted_page : fio->page;
443

444 445 446 447
	if (!f2fs_is_valid_blkaddr(fio->sbi, fio->new_blkaddr,
			__is_meta_io(fio) ? META_GENERIC : DATA_GENERIC))
		return -EFAULT;

448
	trace_f2fs_submit_page_bio(page, fio);
449
	f2fs_trace_ios(fio, 0);
450 451

	/* Allocate a new bio */
452
	bio = __bio_alloc(fio->sbi, fio->new_blkaddr, fio->io_wbc,
453
				1, is_read_io(fio->op), fio->type, fio->temp);
454

455
	if (bio_add_page(bio, page, PAGE_SIZE, 0) < PAGE_SIZE) {
456 457 458
		bio_put(bio);
		return -EFAULT;
	}
M
Mike Christie 已提交
459
	bio_set_op_attrs(bio, fio->op, fio->op_flags);
460

461
	__submit_bio(fio->sbi, bio, fio->type);
462 463 464

	if (!is_read_io(fio->op))
		inc_page_count(fio->sbi, WB_DATA_TYPE(fio->page));
465 466 467
	return 0;
}

468
void f2fs_submit_page_write(struct f2fs_io_info *fio)
469
{
470
	struct f2fs_sb_info *sbi = fio->sbi;
J
Jaegeuk Kim 已提交
471
	enum page_type btype = PAGE_TYPE_OF_BIO(fio->type);
J
Jaegeuk Kim 已提交
472
	struct f2fs_bio_info *io = sbi->write_io[btype] + fio->temp;
473
	struct page *bio_page;
474

475
	f2fs_bug_on(sbi, is_read_io(fio->op));
476

477 478 479 480 481 482
	down_write(&io->io_rwsem);
next:
	if (fio->in_list) {
		spin_lock(&io->io_lock);
		if (list_empty(&io->io_list)) {
			spin_unlock(&io->io_lock);
483
			goto out;
484 485 486 487 488 489
		}
		fio = list_first_entry(&io->io_list,
						struct f2fs_io_info, list);
		list_del(&fio->list);
		spin_unlock(&io->io_lock);
	}
490

491
	if (__is_valid_data_blkaddr(fio->old_blkaddr))
492 493
		verify_block_addr(fio, fio->old_blkaddr);
	verify_block_addr(fio, fio->new_blkaddr);
494

495 496
	bio_page = fio->encrypted_page ? fio->encrypted_page : fio->page;

497 498
	/* set submitted = true as a return value */
	fio->submitted = true;
499

500
	inc_page_count(sbi, WB_DATA_TYPE(bio_page));
501

502
	if (io->bio && (io->last_block_in_bio != fio->new_blkaddr - 1 ||
J
Jaegeuk Kim 已提交
503 504
	    (io->fio.op != fio->op || io->fio.op_flags != fio->op_flags) ||
			!__same_bdev(sbi, fio->new_blkaddr, io->bio)))
J
Jaegeuk Kim 已提交
505
		__submit_merged_bio(io);
506 507
alloc_new:
	if (io->bio == NULL) {
508 509
		if ((fio->type == DATA || fio->type == NODE) &&
				fio->new_blkaddr & F2FS_IO_SIZE_MASK(sbi)) {
510
			dec_page_count(sbi, WB_DATA_TYPE(bio_page));
511 512
			fio->retry = true;
			goto skip;
513
		}
514
		io->bio = __bio_alloc(sbi, fio->new_blkaddr, fio->io_wbc,
515 516
						BIO_MAX_PAGES, false,
						fio->type, fio->temp);
J
Jaegeuk Kim 已提交
517
		io->fio = *fio;
518 519
	}

J
Jaegeuk Kim 已提交
520
	if (bio_add_page(io->bio, bio_page, PAGE_SIZE, 0) < PAGE_SIZE) {
J
Jaegeuk Kim 已提交
521
		__submit_merged_bio(io);
522 523 524
		goto alloc_new;
	}

525 526 527
	if (fio->io_wbc)
		wbc_account_io(fio->io_wbc, bio_page, PAGE_SIZE);

528
	io->last_block_in_bio = fio->new_blkaddr;
529
	f2fs_trace_ios(fio, 0);
530 531

	trace_f2fs_submit_page_write(fio->page, fio);
532
skip:
533 534
	if (fio->in_list)
		goto next;
535
out:
536
	up_write(&io->io_rwsem);
537 538
}

539
static struct bio *f2fs_grab_read_bio(struct inode *inode, block_t blkaddr,
540
					unsigned nr_pages, unsigned op_flag)
541 542 543
{
	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
	struct bio *bio;
544 545
	struct bio_post_read_ctx *ctx;
	unsigned int post_read_steps = 0;
546

547 548 549
	if (!f2fs_is_valid_blkaddr(sbi, blkaddr, DATA_GENERIC))
		return ERR_PTR(-EFAULT);

550
	bio = f2fs_bio_alloc(sbi, min_t(int, nr_pages, BIO_MAX_PAGES), false);
551
	if (!bio)
552 553 554
		return ERR_PTR(-ENOMEM);
	f2fs_target_device(sbi, blkaddr, bio);
	bio->bi_end_io = f2fs_read_end_io;
555
	bio_set_op_attrs(bio, REQ_OP_READ, op_flag);
556

557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572
	if (f2fs_encrypted_file(inode))
		post_read_steps |= 1 << STEP_DECRYPT;
	if (post_read_steps) {
		ctx = mempool_alloc(bio_post_read_ctx_pool, GFP_NOFS);
		if (!ctx) {
			bio_put(bio);
			return ERR_PTR(-ENOMEM);
		}
		ctx->bio = bio;
		ctx->enabled_steps = post_read_steps;
		bio->bi_private = ctx;

		/* wait the page to be moved by cleaning */
		f2fs_wait_on_block_writeback(sbi, blkaddr);
	}

573 574 575 576 577 578 579
	return bio;
}

/* This can handle encryption stuffs */
static int f2fs_submit_page_read(struct inode *inode, struct page *page,
							block_t blkaddr)
{
580
	struct bio *bio = f2fs_grab_read_bio(inode, blkaddr, 1, 0);
581 582 583 584 585 586 587 588 589 590 591 592

	if (IS_ERR(bio))
		return PTR_ERR(bio);

	if (bio_add_page(bio, page, PAGE_SIZE, 0) < PAGE_SIZE) {
		bio_put(bio);
		return -EFAULT;
	}
	__submit_bio(F2FS_I_SB(inode), bio, DATA);
	return 0;
}

593 594 595 596
static void __set_data_blkaddr(struct dnode_of_data *dn)
{
	struct f2fs_node *rn = F2FS_NODE(dn->node_page);
	__le32 *addr_array;
597 598 599 600
	int base = 0;

	if (IS_INODE(dn->node_page) && f2fs_has_extra_attr(dn->inode))
		base = get_extra_isize(dn->inode);
601 602 603

	/* Get physical address of data block */
	addr_array = blkaddr_in_node(rn);
604
	addr_array[base + dn->ofs_in_node] = cpu_to_le32(dn->data_blkaddr);
605 606
}

J
Jaegeuk Kim 已提交
607
/*
608 609 610 611 612
 * Lock ordering for the change of data block address:
 * ->data_page
 *  ->node_page
 *    update block addresses in the node page
 */
C
Chao Yu 已提交
613
void f2fs_set_data_blkaddr(struct dnode_of_data *dn)
614
{
615 616 617
	f2fs_wait_on_page_writeback(dn->node_page, NODE, true);
	__set_data_blkaddr(dn);
	if (set_page_dirty(dn->node_page))
618
		dn->node_changed = true;
619 620
}

621 622 623
void f2fs_update_data_blkaddr(struct dnode_of_data *dn, block_t blkaddr)
{
	dn->data_blkaddr = blkaddr;
C
Chao Yu 已提交
624
	f2fs_set_data_blkaddr(dn);
625 626 627
	f2fs_update_extent_cache(dn);
}

628
/* dn->ofs_in_node will be returned with up-to-date last block pointer */
C
Chao Yu 已提交
629
int f2fs_reserve_new_blocks(struct dnode_of_data *dn, blkcnt_t count)
630
{
631
	struct f2fs_sb_info *sbi = F2FS_I_SB(dn->inode);
C
Chao Yu 已提交
632
	int err;
633

634 635 636
	if (!count)
		return 0;

637
	if (unlikely(is_inode_flag_set(dn->inode, FI_NO_ALLOC)))
638
		return -EPERM;
C
Chao Yu 已提交
639 640
	if (unlikely((err = inc_valid_block_count(sbi, dn->inode, &count))))
		return err;
641

642 643 644 645 646 647
	trace_f2fs_reserve_new_blocks(dn->inode, dn->nid,
						dn->ofs_in_node, count);

	f2fs_wait_on_page_writeback(dn->node_page, NODE, true);

	for (; count > 0; dn->ofs_in_node++) {
648 649
		block_t blkaddr = datablock_addr(dn->inode,
					dn->node_page, dn->ofs_in_node);
650 651 652 653 654 655 656 657 658
		if (blkaddr == NULL_ADDR) {
			dn->data_blkaddr = NEW_ADDR;
			__set_data_blkaddr(dn);
			count--;
		}
	}

	if (set_page_dirty(dn->node_page))
		dn->node_changed = true;
659 660 661
	return 0;
}

662
/* Should keep dn->ofs_in_node unchanged */
C
Chao Yu 已提交
663
int f2fs_reserve_new_block(struct dnode_of_data *dn)
664 665 666 667
{
	unsigned int ofs_in_node = dn->ofs_in_node;
	int ret;

C
Chao Yu 已提交
668
	ret = f2fs_reserve_new_blocks(dn, 1);
669 670 671 672
	dn->ofs_in_node = ofs_in_node;
	return ret;
}

673 674 675 676 677
int f2fs_reserve_block(struct dnode_of_data *dn, pgoff_t index)
{
	bool need_put = dn->inode_page ? false : true;
	int err;

C
Chao Yu 已提交
678
	err = f2fs_get_dnode_of_data(dn, index, ALLOC_NODE);
679 680
	if (err)
		return err;
681

682
	if (dn->data_blkaddr == NULL_ADDR)
C
Chao Yu 已提交
683
		err = f2fs_reserve_new_block(dn);
684
	if (err || need_put)
685 686 687 688
		f2fs_put_dnode(dn);
	return err;
}

689
int f2fs_get_block(struct dnode_of_data *dn, pgoff_t index)
690
{
691
	struct extent_info ei  = {0,0,0};
692
	struct inode *inode = dn->inode;
693

694 695 696
	if (f2fs_lookup_extent_cache(inode, index, &ei)) {
		dn->data_blkaddr = ei.blk + index - ei.fofs;
		return 0;
697
	}
698

699
	return f2fs_reserve_block(dn, index);
700 701
}

C
Chao Yu 已提交
702
struct page *f2fs_get_read_data_page(struct inode *inode, pgoff_t index,
M
Mike Christie 已提交
703
						int op_flags, bool for_write)
704 705 706 707
{
	struct address_space *mapping = inode->i_mapping;
	struct dnode_of_data dn;
	struct page *page;
708
	struct extent_info ei = {0,0,0};
709
	int err;
710

711
	page = f2fs_grab_cache_page(mapping, index, for_write);
712 713 714
	if (!page)
		return ERR_PTR(-ENOMEM);

C
Chao Yu 已提交
715 716 717 718 719
	if (f2fs_lookup_extent_cache(inode, index, &ei)) {
		dn.data_blkaddr = ei.blk + index - ei.fofs;
		goto got_it;
	}

720
	set_new_dnode(&dn, inode, NULL, NULL, 0);
C
Chao Yu 已提交
721
	err = f2fs_get_dnode_of_data(&dn, index, LOOKUP_NODE);
722 723
	if (err)
		goto put_err;
724 725
	f2fs_put_dnode(&dn);

726
	if (unlikely(dn.data_blkaddr == NULL_ADDR)) {
727 728
		err = -ENOENT;
		goto put_err;
729
	}
C
Chao Yu 已提交
730
got_it:
731 732
	if (PageUptodate(page)) {
		unlock_page(page);
733
		return page;
734
	}
735

J
Jaegeuk Kim 已提交
736 737 738 739
	/*
	 * A new dentry page is allocated but not able to be written, since its
	 * new inode page couldn't be allocated due to -ENOSPC.
	 * In such the case, its blkaddr can be remained as NEW_ADDR.
C
Chao Yu 已提交
740 741
	 * see, f2fs_add_link -> f2fs_get_new_data_page ->
	 * f2fs_init_inode_metadata.
J
Jaegeuk Kim 已提交
742 743
	 */
	if (dn.data_blkaddr == NEW_ADDR) {
744
		zero_user_segment(page, 0, PAGE_SIZE);
745 746
		if (!PageUptodate(page))
			SetPageUptodate(page);
747
		unlock_page(page);
J
Jaegeuk Kim 已提交
748 749
		return page;
	}
750

751
	err = f2fs_submit_page_read(inode, page, dn.data_blkaddr);
752
	if (err)
753
		goto put_err;
754
	return page;
755 756 757 758

put_err:
	f2fs_put_page(page, 1);
	return ERR_PTR(err);
759 760
}

C
Chao Yu 已提交
761
struct page *f2fs_find_data_page(struct inode *inode, pgoff_t index)
762 763 764 765 766 767 768 769 770
{
	struct address_space *mapping = inode->i_mapping;
	struct page *page;

	page = find_get_page(mapping, index);
	if (page && PageUptodate(page))
		return page;
	f2fs_put_page(page, 0);

C
Chao Yu 已提交
771
	page = f2fs_get_read_data_page(inode, index, 0, false);
772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790
	if (IS_ERR(page))
		return page;

	if (PageUptodate(page))
		return page;

	wait_on_page_locked(page);
	if (unlikely(!PageUptodate(page))) {
		f2fs_put_page(page, 0);
		return ERR_PTR(-EIO);
	}
	return page;
}

/*
 * If it tries to access a hole, return an error.
 * Because, the callers, functions in dir.c and GC, should be able to know
 * whether this page exists or not.
 */
C
Chao Yu 已提交
791
struct page *f2fs_get_lock_data_page(struct inode *inode, pgoff_t index,
792
							bool for_write)
793 794 795 796
{
	struct address_space *mapping = inode->i_mapping;
	struct page *page;
repeat:
C
Chao Yu 已提交
797
	page = f2fs_get_read_data_page(inode, index, 0, for_write);
798 799
	if (IS_ERR(page))
		return page;
800

801
	/* wait for read completion */
802
	lock_page(page);
803
	if (unlikely(page->mapping != mapping)) {
804 805
		f2fs_put_page(page, 1);
		goto repeat;
806
	}
807 808 809 810
	if (unlikely(!PageUptodate(page))) {
		f2fs_put_page(page, 1);
		return ERR_PTR(-EIO);
	}
811 812 813
	return page;
}

J
Jaegeuk Kim 已提交
814
/*
815 816
 * Caller ensures that this data page is never allocated.
 * A new zero-filled data page is allocated in the page cache.
817
 *
C
Chao Yu 已提交
818 819
 * Also, caller should grab and release a rwsem by calling f2fs_lock_op() and
 * f2fs_unlock_op().
820 821
 * Note that, ipage is set only by make_empty_dir, and if any error occur,
 * ipage should be released by this function.
822
 */
C
Chao Yu 已提交
823
struct page *f2fs_get_new_data_page(struct inode *inode,
824
		struct page *ipage, pgoff_t index, bool new_i_size)
825 826 827 828 829
{
	struct address_space *mapping = inode->i_mapping;
	struct page *page;
	struct dnode_of_data dn;
	int err;
830

831
	page = f2fs_grab_cache_page(mapping, index, true);
832 833 834 835 836 837
	if (!page) {
		/*
		 * before exiting, we should make sure ipage will be released
		 * if any error occur.
		 */
		f2fs_put_page(ipage, 1);
838
		return ERR_PTR(-ENOMEM);
839
	}
840

841
	set_new_dnode(&dn, inode, ipage, NULL, 0);
842
	err = f2fs_reserve_block(&dn, index);
843 844
	if (err) {
		f2fs_put_page(page, 1);
845
		return ERR_PTR(err);
846
	}
847 848
	if (!ipage)
		f2fs_put_dnode(&dn);
849 850

	if (PageUptodate(page))
851
		goto got_it;
852 853

	if (dn.data_blkaddr == NEW_ADDR) {
854
		zero_user_segment(page, 0, PAGE_SIZE);
855 856
		if (!PageUptodate(page))
			SetPageUptodate(page);
857
	} else {
858
		f2fs_put_page(page, 1);
859

860 861
		/* if ipage exists, blkaddr should be NEW_ADDR */
		f2fs_bug_on(F2FS_I_SB(inode), ipage);
C
Chao Yu 已提交
862
		page = f2fs_get_lock_data_page(inode, index, true);
863
		if (IS_ERR(page))
864
			return page;
865
	}
866
got_it:
C
Chao Yu 已提交
867
	if (new_i_size && i_size_read(inode) <
868
				((loff_t)(index + 1) << PAGE_SHIFT))
869
		f2fs_i_size_write(inode, ((loff_t)(index + 1) << PAGE_SHIFT));
870 871 872
	return page;
}

873
static int __allocate_data_block(struct dnode_of_data *dn, int seg_type)
874
{
875
	struct f2fs_sb_info *sbi = F2FS_I_SB(dn->inode);
876 877
	struct f2fs_summary sum;
	struct node_info ni;
878
	block_t old_blkaddr;
879
	pgoff_t fofs;
880
	blkcnt_t count = 1;
C
Chao Yu 已提交
881
	int err;
882

883
	if (unlikely(is_inode_flag_set(dn->inode, FI_NO_ALLOC)))
884
		return -EPERM;
885

886 887 888 889
	err = f2fs_get_node_info(sbi, dn->nid, &ni);
	if (err)
		return err;

890 891
	dn->data_blkaddr = datablock_addr(dn->inode,
				dn->node_page, dn->ofs_in_node);
892 893 894
	if (dn->data_blkaddr == NEW_ADDR)
		goto alloc;

C
Chao Yu 已提交
895 896
	if (unlikely((err = inc_valid_block_count(sbi, dn->inode, &count))))
		return err;
897

898
alloc:
899
	set_summary(&sum, dn->nid, dn->ofs_in_node, ni.version);
900 901
	old_blkaddr = dn->data_blkaddr;
	f2fs_allocate_data_block(sbi, NULL, old_blkaddr, &dn->data_blkaddr,
902
					&sum, seg_type, NULL, false);
903 904 905
	if (GET_SEGNO(sbi, old_blkaddr) != NULL_SEGNO)
		invalidate_mapping_pages(META_MAPPING(sbi),
					old_blkaddr, old_blkaddr);
C
Chao Yu 已提交
906
	f2fs_set_data_blkaddr(dn);
907

908
	/* update i_size */
C
Chao Yu 已提交
909
	fofs = f2fs_start_bidx_of_node(ofs_of_node(dn->node_page), dn->inode) +
910
							dn->ofs_in_node;
911
	if (i_size_read(dn->inode) < ((loff_t)(fofs + 1) << PAGE_SHIFT))
912
		f2fs_i_size_write(dn->inode,
913
				((loff_t)(fofs + 1) << PAGE_SHIFT));
914 915 916
	return 0;
}

917
int f2fs_preallocate_blocks(struct kiocb *iocb, struct iov_iter *from)
918
{
919
	struct inode *inode = file_inode(iocb->ki_filp);
C
Chao Yu 已提交
920
	struct f2fs_map_blocks map;
921
	int flag;
922
	int err = 0;
923
	bool direct_io = iocb->ki_flags & IOCB_DIRECT;
924

925
	/* convert inline data for Direct I/O*/
926
	if (direct_io) {
927 928 929 930 931
		err = f2fs_convert_inline_inode(inode);
		if (err)
			return err;
	}

932 933 934
	if (is_inode_flag_set(inode, FI_NO_PREALLOC))
		return 0;

935
	map.m_lblk = F2FS_BLK_ALIGN(iocb->ki_pos);
936 937 938 939 940 941
	map.m_len = F2FS_BYTES_TO_BLK(iocb->ki_pos + iov_iter_count(from));
	if (map.m_len > map.m_lblk)
		map.m_len -= map.m_lblk;
	else
		map.m_len = 0;

942
	map.m_next_pgofs = NULL;
943
	map.m_next_extent = NULL;
944
	map.m_seg_type = NO_CHECK_TYPE;
945

946
	if (direct_io) {
C
Chao Yu 已提交
947
		map.m_seg_type = f2fs_rw_hint_to_seg_type(iocb->ki_hint);
H
Hyunchul Lee 已提交
948
		flag = f2fs_force_buffered_io(inode, WRITE) ?
949 950 951
					F2FS_GET_BLOCK_PRE_AIO :
					F2FS_GET_BLOCK_PRE_DIO;
		goto map_blocks;
952
	}
C
Chao Yu 已提交
953
	if (iocb->ki_pos + iov_iter_count(from) > MAX_INLINE_DATA(inode)) {
954 955 956
		err = f2fs_convert_inline_inode(inode);
		if (err)
			return err;
957
	}
958
	if (f2fs_has_inline_data(inode))
959
		return err;
960 961 962 963 964 965 966 967 968

	flag = F2FS_GET_BLOCK_PRE_AIO;

map_blocks:
	err = f2fs_map_blocks(inode, &map, 1, flag);
	if (map.m_len > 0 && err == -ENOSPC) {
		if (!direct_io)
			set_inode_flag(inode, FI_NO_PREALLOC);
		err = 0;
969
	}
970
	return err;
971 972
}

973 974 975 976 977 978 979 980 981 982 983 984 985 986 987
static inline void __do_map_lock(struct f2fs_sb_info *sbi, int flag, bool lock)
{
	if (flag == F2FS_GET_BLOCK_PRE_AIO) {
		if (lock)
			down_read(&sbi->node_change);
		else
			up_read(&sbi->node_change);
	} else {
		if (lock)
			f2fs_lock_op(sbi);
		else
			f2fs_unlock_op(sbi);
	}
}

J
Jaegeuk Kim 已提交
988
/*
J
Jaegeuk Kim 已提交
989 990
 * f2fs_map_blocks() now supported readahead/bmap/rw direct_IO with
 * f2fs_map_blocks structure.
C
Chao Yu 已提交
991 992 993 994 995
 * If original data blocks are allocated, then give them to blockdev.
 * Otherwise,
 *     a. preallocate requested block addresses
 *     b. do not use extent cache for better performance
 *     c. give the block addresses to blockdev
996
 */
C
Chao Yu 已提交
997
int f2fs_map_blocks(struct inode *inode, struct f2fs_map_blocks *map,
C
Chao Yu 已提交
998
						int create, int flag)
999
{
J
Jaegeuk Kim 已提交
1000
	unsigned int maxblocks = map->m_len;
1001
	struct dnode_of_data dn;
1002
	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
1003
	int mode = create ? ALLOC_NODE : LOOKUP_NODE;
1004
	pgoff_t pgofs, end_offset, end;
1005
	int err = 0, ofs = 1;
1006 1007
	unsigned int ofs_in_node, last_ofs_in_node;
	blkcnt_t prealloc;
1008
	struct extent_info ei = {0,0,0};
1009
	block_t blkaddr;
1010
	unsigned int start_pgofs;
1011

1012 1013 1014
	if (!maxblocks)
		return 0;

J
Jaegeuk Kim 已提交
1015 1016 1017 1018 1019
	map->m_len = 0;
	map->m_flags = 0;

	/* it only supports block size == page size */
	pgofs =	(pgoff_t)map->m_lblk;
1020
	end = pgofs + maxblocks;
1021

1022
	if (!create && f2fs_lookup_extent_cache(inode, pgofs, &ei)) {
J
Jaegeuk Kim 已提交
1023 1024 1025
		map->m_pblk = ei.blk + pgofs - ei.fofs;
		map->m_len = min((pgoff_t)maxblocks, ei.fofs + ei.len - pgofs);
		map->m_flags = F2FS_MAP_MAPPED;
1026 1027
		if (map->m_next_extent)
			*map->m_next_extent = pgofs + map->m_len;
1028
		goto out;
1029
	}
1030

C
Chao Yu 已提交
1031
next_dnode:
1032
	if (create)
1033
		__do_map_lock(sbi, flag, true);
1034 1035 1036

	/* When reading holes, we need its node page */
	set_new_dnode(&dn, inode, NULL, NULL, 0);
C
Chao Yu 已提交
1037
	err = f2fs_get_dnode_of_data(&dn, pgofs, mode);
1038
	if (err) {
C
Chao Yu 已提交
1039 1040
		if (flag == F2FS_GET_BLOCK_BMAP)
			map->m_pblk = 0;
1041
		if (err == -ENOENT) {
1042
			err = 0;
1043 1044
			if (map->m_next_pgofs)
				*map->m_next_pgofs =
C
Chao Yu 已提交
1045
					f2fs_get_next_page_offset(&dn, pgofs);
1046 1047
			if (map->m_next_extent)
				*map->m_next_extent =
C
Chao Yu 已提交
1048
					f2fs_get_next_page_offset(&dn, pgofs);
1049
		}
1050
		goto unlock_out;
1051
	}
C
Chao Yu 已提交
1052

1053
	start_pgofs = pgofs;
1054
	prealloc = 0;
1055
	last_ofs_in_node = ofs_in_node = dn.ofs_in_node;
1056
	end_offset = ADDRS_PER_PAGE(dn.node_page, inode);
C
Chao Yu 已提交
1057 1058

next_block:
1059
	blkaddr = datablock_addr(dn.inode, dn.node_page, dn.ofs_in_node);
C
Chao Yu 已提交
1060

1061 1062 1063 1064 1065 1066
	if (__is_valid_data_blkaddr(blkaddr) &&
		!f2fs_is_valid_blkaddr(sbi, blkaddr, DATA_GENERIC)) {
		err = -EFAULT;
		goto sync_out;
	}

1067
	if (!is_valid_data_blkaddr(sbi, blkaddr)) {
C
Chao Yu 已提交
1068
		if (create) {
1069 1070
			if (unlikely(f2fs_cp_error(sbi))) {
				err = -EIO;
C
Chao Yu 已提交
1071
				goto sync_out;
1072
			}
1073
			if (flag == F2FS_GET_BLOCK_PRE_AIO) {
1074 1075 1076 1077
				if (blkaddr == NULL_ADDR) {
					prealloc++;
					last_ofs_in_node = dn.ofs_in_node;
				}
1078
			} else {
1079 1080
				err = __allocate_data_block(&dn,
							map->m_seg_type);
1081
				if (!err)
1082
					set_inode_flag(inode, FI_APPEND_WRITE);
1083
			}
C
Chao Yu 已提交
1084
			if (err)
C
Chao Yu 已提交
1085
				goto sync_out;
1086
			map->m_flags |= F2FS_MAP_NEW;
C
Chao Yu 已提交
1087
			blkaddr = dn.data_blkaddr;
C
Chao Yu 已提交
1088
		} else {
C
Chao Yu 已提交
1089 1090 1091 1092
			if (flag == F2FS_GET_BLOCK_BMAP) {
				map->m_pblk = 0;
				goto sync_out;
			}
1093 1094
			if (flag == F2FS_GET_BLOCK_PRECACHE)
				goto sync_out;
1095 1096 1097 1098
			if (flag == F2FS_GET_BLOCK_FIEMAP &&
						blkaddr == NULL_ADDR) {
				if (map->m_next_pgofs)
					*map->m_next_pgofs = pgofs + 1;
C
Chao Yu 已提交
1099
				goto sync_out;
1100
			}
1101 1102 1103 1104
			if (flag != F2FS_GET_BLOCK_FIEMAP) {
				/* for defragment case */
				if (map->m_next_pgofs)
					*map->m_next_pgofs = pgofs + 1;
C
Chao Yu 已提交
1105
				goto sync_out;
1106
			}
C
Chao Yu 已提交
1107 1108
		}
	}
1109

1110 1111 1112
	if (flag == F2FS_GET_BLOCK_PRE_AIO)
		goto skip;

C
Chao Yu 已提交
1113 1114 1115 1116 1117 1118 1119 1120 1121 1122
	if (map->m_len == 0) {
		/* preallocated unwritten block should be mapped for fiemap. */
		if (blkaddr == NEW_ADDR)
			map->m_flags |= F2FS_MAP_UNWRITTEN;
		map->m_flags |= F2FS_MAP_MAPPED;

		map->m_pblk = blkaddr;
		map->m_len = 1;
	} else if ((map->m_pblk != NEW_ADDR &&
			blkaddr == (map->m_pblk + ofs)) ||
1123
			(map->m_pblk == NEW_ADDR && blkaddr == NEW_ADDR) ||
1124
			flag == F2FS_GET_BLOCK_PRE_DIO) {
C
Chao Yu 已提交
1125 1126 1127 1128 1129
		ofs++;
		map->m_len++;
	} else {
		goto sync_out;
	}
1130

1131
skip:
1132 1133 1134
	dn.ofs_in_node++;
	pgofs++;

1135 1136 1137
	/* preallocate blocks in batch for one dnode page */
	if (flag == F2FS_GET_BLOCK_PRE_AIO &&
			(pgofs == end || dn.ofs_in_node == end_offset)) {
1138

1139
		dn.ofs_in_node = ofs_in_node;
C
Chao Yu 已提交
1140
		err = f2fs_reserve_new_blocks(&dn, prealloc);
1141 1142
		if (err)
			goto sync_out;
1143

1144 1145 1146 1147
		map->m_len += dn.ofs_in_node - ofs_in_node;
		if (prealloc && dn.ofs_in_node != last_ofs_in_node + 1) {
			err = -ENOSPC;
			goto sync_out;
1148
		}
1149 1150 1151 1152 1153 1154 1155 1156
		dn.ofs_in_node = end_offset;
	}

	if (pgofs >= end)
		goto sync_out;
	else if (dn.ofs_in_node < end_offset)
		goto next_block;

1157 1158 1159 1160 1161 1162 1163 1164 1165 1166
	if (flag == F2FS_GET_BLOCK_PRECACHE) {
		if (map->m_flags & F2FS_MAP_MAPPED) {
			unsigned int ofs = start_pgofs - map->m_lblk;

			f2fs_update_extent_cache_range(&dn,
				start_pgofs, map->m_pblk + ofs,
				map->m_len - ofs);
		}
	}

1167 1168 1169
	f2fs_put_dnode(&dn);

	if (create) {
1170
		__do_map_lock(sbi, flag, false);
1171
		f2fs_balance_fs(sbi, dn.node_changed);
1172
	}
1173
	goto next_dnode;
1174

1175
sync_out:
1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186
	if (flag == F2FS_GET_BLOCK_PRECACHE) {
		if (map->m_flags & F2FS_MAP_MAPPED) {
			unsigned int ofs = start_pgofs - map->m_lblk;

			f2fs_update_extent_cache_range(&dn,
				start_pgofs, map->m_pblk + ofs,
				map->m_len - ofs);
		}
		if (map->m_next_extent)
			*map->m_next_extent = pgofs + 1;
	}
1187
	f2fs_put_dnode(&dn);
1188
unlock_out:
1189
	if (create) {
1190
		__do_map_lock(sbi, flag, false);
1191
		f2fs_balance_fs(sbi, dn.node_changed);
1192
	}
1193
out:
J
Jaegeuk Kim 已提交
1194
	trace_f2fs_map_blocks(inode, map, err);
1195
	return err;
1196 1197
}

H
Hyunchul Lee 已提交
1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222
bool f2fs_overwrite_io(struct inode *inode, loff_t pos, size_t len)
{
	struct f2fs_map_blocks map;
	block_t last_lblk;
	int err;

	if (pos + len > i_size_read(inode))
		return false;

	map.m_lblk = F2FS_BYTES_TO_BLK(pos);
	map.m_next_pgofs = NULL;
	map.m_next_extent = NULL;
	map.m_seg_type = NO_CHECK_TYPE;
	last_lblk = F2FS_BLK_ALIGN(pos + len);

	while (map.m_lblk < last_lblk) {
		map.m_len = last_lblk - map.m_lblk;
		err = f2fs_map_blocks(inode, &map, 0, F2FS_GET_BLOCK_DEFAULT);
		if (err || map.m_len == 0)
			return false;
		map.m_lblk += map.m_len;
	}
	return true;
}

J
Jaegeuk Kim 已提交
1223
static int __get_data_block(struct inode *inode, sector_t iblock,
1224
			struct buffer_head *bh, int create, int flag,
1225
			pgoff_t *next_pgofs, int seg_type)
J
Jaegeuk Kim 已提交
1226 1227
{
	struct f2fs_map_blocks map;
1228
	int err;
J
Jaegeuk Kim 已提交
1229 1230 1231

	map.m_lblk = iblock;
	map.m_len = bh->b_size >> inode->i_blkbits;
1232
	map.m_next_pgofs = next_pgofs;
1233
	map.m_next_extent = NULL;
1234
	map.m_seg_type = seg_type;
J
Jaegeuk Kim 已提交
1235

1236 1237
	err = f2fs_map_blocks(inode, &map, create, flag);
	if (!err) {
J
Jaegeuk Kim 已提交
1238 1239
		map_bh(bh, inode->i_sb, map.m_pblk);
		bh->b_state = (bh->b_state & ~F2FS_MAP_FLAGS) | map.m_flags;
1240
		bh->b_size = (u64)map.m_len << inode->i_blkbits;
J
Jaegeuk Kim 已提交
1241
	}
1242
	return err;
J
Jaegeuk Kim 已提交
1243 1244
}

1245
static int get_data_block(struct inode *inode, sector_t iblock,
1246 1247
			struct buffer_head *bh_result, int create, int flag,
			pgoff_t *next_pgofs)
C
Chao Yu 已提交
1248
{
1249
	return __get_data_block(inode, iblock, bh_result, create,
1250 1251
							flag, next_pgofs,
							NO_CHECK_TYPE);
C
Chao Yu 已提交
1252 1253 1254
}

static int get_data_block_dio(struct inode *inode, sector_t iblock,
1255 1256
			struct buffer_head *bh_result, int create)
{
C
Chao Yu 已提交
1257
	return __get_data_block(inode, iblock, bh_result, create,
1258
						F2FS_GET_BLOCK_DEFAULT, NULL,
C
Chao Yu 已提交
1259
						f2fs_rw_hint_to_seg_type(
1260
							inode->i_write_hint));
1261 1262
}

C
Chao Yu 已提交
1263
static int get_data_block_bmap(struct inode *inode, sector_t iblock,
1264 1265
			struct buffer_head *bh_result, int create)
{
1266
	/* Block number less than F2FS MAX BLOCKS */
C
Chao Yu 已提交
1267
	if (unlikely(iblock >= F2FS_I_SB(inode)->max_file_blocks))
1268 1269
		return -EFBIG;

C
Chao Yu 已提交
1270
	return __get_data_block(inode, iblock, bh_result, create,
1271 1272
						F2FS_GET_BLOCK_BMAP, NULL,
						NO_CHECK_TYPE);
1273 1274
}

1275 1276 1277 1278 1279 1280 1281 1282 1283 1284
static inline sector_t logical_to_blk(struct inode *inode, loff_t offset)
{
	return (offset >> inode->i_blkbits);
}

static inline loff_t blk_to_logical(struct inode *inode, sector_t blk)
{
	return (blk << inode->i_blkbits);
}

C
Chao Yu 已提交
1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303
static int f2fs_xattr_fiemap(struct inode *inode,
				struct fiemap_extent_info *fieinfo)
{
	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
	struct page *page;
	struct node_info ni;
	__u64 phys = 0, len;
	__u32 flags;
	nid_t xnid = F2FS_I(inode)->i_xattr_nid;
	int err = 0;

	if (f2fs_has_inline_xattr(inode)) {
		int offset;

		page = f2fs_grab_cache_page(NODE_MAPPING(sbi),
						inode->i_ino, false);
		if (!page)
			return -ENOMEM;

1304 1305 1306 1307 1308
		err = f2fs_get_node_info(sbi, inode->i_ino, &ni);
		if (err) {
			f2fs_put_page(page, 1);
			return err;
		}
C
Chao Yu 已提交
1309 1310 1311 1312

		phys = (__u64)blk_to_logical(inode, ni.blk_addr);
		offset = offsetof(struct f2fs_inode, i_addr) +
					sizeof(__le32) * (DEF_ADDRS_PER_INODE -
1313
					get_inline_xattr_addrs(inode));
C
Chao Yu 已提交
1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334

		phys += offset;
		len = inline_xattr_size(inode);

		f2fs_put_page(page, 1);

		flags = FIEMAP_EXTENT_DATA_INLINE | FIEMAP_EXTENT_NOT_ALIGNED;

		if (!xnid)
			flags |= FIEMAP_EXTENT_LAST;

		err = fiemap_fill_next_extent(fieinfo, 0, phys, len, flags);
		if (err || err == 1)
			return err;
	}

	if (xnid) {
		page = f2fs_grab_cache_page(NODE_MAPPING(sbi), xnid, false);
		if (!page)
			return -ENOMEM;

1335 1336 1337 1338 1339
		err = f2fs_get_node_info(sbi, xnid, &ni);
		if (err) {
			f2fs_put_page(page, 1);
			return err;
		}
C
Chao Yu 已提交
1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354

		phys = (__u64)blk_to_logical(inode, ni.blk_addr);
		len = inode->i_sb->s_blocksize;

		f2fs_put_page(page, 1);

		flags = FIEMAP_EXTENT_LAST;
	}

	if (phys)
		err = fiemap_fill_next_extent(fieinfo, 0, phys, len, flags);

	return (err < 0 ? err : 0);
}

J
Jaegeuk Kim 已提交
1355 1356 1357
int f2fs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
		u64 start, u64 len)
{
1358 1359
	struct buffer_head map_bh;
	sector_t start_blk, last_blk;
1360
	pgoff_t next_pgofs;
1361 1362 1363 1364
	u64 logical = 0, phys = 0, size = 0;
	u32 flags = 0;
	int ret = 0;

1365 1366 1367 1368 1369 1370
	if (fieinfo->fi_flags & FIEMAP_FLAG_CACHE) {
		ret = f2fs_precache_extents(inode);
		if (ret)
			return ret;
	}

C
Chao Yu 已提交
1371
	ret = fiemap_check_flags(fieinfo, FIEMAP_FLAG_SYNC | FIEMAP_FLAG_XATTR);
1372 1373 1374
	if (ret)
		return ret;

1375 1376
	inode_lock(inode);

C
Chao Yu 已提交
1377 1378 1379 1380 1381
	if (fieinfo->fi_flags & FIEMAP_FLAG_XATTR) {
		ret = f2fs_xattr_fiemap(inode, fieinfo);
		goto out;
	}

J
Jaegeuk Kim 已提交
1382 1383 1384
	if (f2fs_has_inline_data(inode)) {
		ret = f2fs_inline_data_fiemap(inode, fieinfo, start, len);
		if (ret != -EAGAIN)
1385
			goto out;
J
Jaegeuk Kim 已提交
1386 1387
	}

1388 1389 1390 1391 1392
	if (logical_to_blk(inode, len) == 0)
		len = blk_to_logical(inode, 1);

	start_blk = logical_to_blk(inode, start);
	last_blk = logical_to_blk(inode, start + len - 1);
1393

1394 1395 1396 1397
next:
	memset(&map_bh, 0, sizeof(struct buffer_head));
	map_bh.b_size = len;

C
Chao Yu 已提交
1398
	ret = get_data_block(inode, start_blk, &map_bh, 0,
1399
					F2FS_GET_BLOCK_FIEMAP, &next_pgofs);
1400 1401 1402 1403 1404
	if (ret)
		goto out;

	/* HOLE */
	if (!buffer_mapped(&map_bh)) {
1405
		start_blk = next_pgofs;
1406 1407 1408

		if (blk_to_logical(inode, start_blk) < blk_to_logical(inode,
					F2FS_I_SB(inode)->max_file_blocks))
1409
			goto prep_next;
1410

1411 1412
		flags |= FIEMAP_EXTENT_LAST;
	}
1413

1414 1415 1416 1417
	if (size) {
		if (f2fs_encrypted_inode(inode))
			flags |= FIEMAP_EXTENT_DATA_ENCRYPTED;

1418 1419
		ret = fiemap_fill_next_extent(fieinfo, logical,
				phys, size, flags);
1420
	}
1421

1422 1423
	if (start_blk > last_blk || ret)
		goto out;
1424

1425 1426 1427 1428 1429 1430
	logical = blk_to_logical(inode, start_blk);
	phys = blk_to_logical(inode, map_bh.b_blocknr);
	size = map_bh.b_size;
	flags = 0;
	if (buffer_unwritten(&map_bh))
		flags = FIEMAP_EXTENT_UNWRITTEN;
1431

1432
	start_blk += logical_to_blk(inode, size);
1433

1434
prep_next:
1435 1436 1437 1438 1439 1440 1441 1442 1443
	cond_resched();
	if (fatal_signal_pending(current))
		ret = -EINTR;
	else
		goto next;
out:
	if (ret == 1)
		ret = 0;

A
Al Viro 已提交
1444
	inode_unlock(inode);
1445
	return ret;
J
Jaegeuk Kim 已提交
1446 1447
}

J
Jaegeuk Kim 已提交
1448 1449 1450
/*
 * This function was originally taken from fs/mpage.c, and customized for f2fs.
 * Major change was from block_size == page_size in f2fs by default.
1451 1452 1453 1454 1455
 *
 * Note that the aops->readpages() function is ONLY used for read-ahead. If
 * this function ever deviates from doing just read-ahead, it should either
 * use ->readpage() or do the necessary surgery to decouple ->readpages()
 * from read-ahead.
J
Jaegeuk Kim 已提交
1456 1457 1458
 */
static int f2fs_mpage_readpages(struct address_space *mapping,
			struct list_head *pages, struct page *page,
1459
			unsigned nr_pages, bool is_readahead)
J
Jaegeuk Kim 已提交
1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475
{
	struct bio *bio = NULL;
	sector_t last_block_in_bio = 0;
	struct inode *inode = mapping->host;
	const unsigned blkbits = inode->i_blkbits;
	const unsigned blocksize = 1 << blkbits;
	sector_t block_in_file;
	sector_t last_block;
	sector_t last_block_in_file;
	sector_t block_nr;
	struct f2fs_map_blocks map;

	map.m_pblk = 0;
	map.m_lblk = 0;
	map.m_len = 0;
	map.m_flags = 0;
1476
	map.m_next_pgofs = NULL;
1477
	map.m_next_extent = NULL;
1478
	map.m_seg_type = NO_CHECK_TYPE;
J
Jaegeuk Kim 已提交
1479

L
LiFan 已提交
1480
	for (; nr_pages; nr_pages--) {
J
Jaegeuk Kim 已提交
1481
		if (pages) {
1482
			page = list_last_entry(pages, struct page, lru);
1483 1484

			prefetchw(&page->flags);
J
Jaegeuk Kim 已提交
1485 1486
			list_del(&page->lru);
			if (add_to_page_cache_lru(page, mapping,
1487 1488
						  page->index,
						  readahead_gfp_mask(mapping)))
J
Jaegeuk Kim 已提交
1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516
				goto next_page;
		}

		block_in_file = (sector_t)page->index;
		last_block = block_in_file + nr_pages;
		last_block_in_file = (i_size_read(inode) + blocksize - 1) >>
								blkbits;
		if (last_block > last_block_in_file)
			last_block = last_block_in_file;

		/*
		 * Map blocks using the previous result first.
		 */
		if ((map.m_flags & F2FS_MAP_MAPPED) &&
				block_in_file > map.m_lblk &&
				block_in_file < (map.m_lblk + map.m_len))
			goto got_it;

		/*
		 * Then do more f2fs_map_blocks() calls until we are
		 * done with this page.
		 */
		map.m_flags = 0;

		if (block_in_file < last_block) {
			map.m_lblk = block_in_file;
			map.m_len = last_block - block_in_file;

1517
			if (f2fs_map_blocks(inode, &map, 0,
1518
						F2FS_GET_BLOCK_DEFAULT))
J
Jaegeuk Kim 已提交
1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529
				goto set_error_page;
		}
got_it:
		if ((map.m_flags & F2FS_MAP_MAPPED)) {
			block_nr = map.m_pblk + block_in_file - map.m_lblk;
			SetPageMappedToDisk(page);

			if (!PageUptodate(page) && !cleancache_get_page(page)) {
				SetPageUptodate(page);
				goto confused;
			}
1530 1531 1532 1533

			if (!f2fs_is_valid_blkaddr(F2FS_I_SB(inode), block_nr,
								DATA_GENERIC))
				goto set_error_page;
J
Jaegeuk Kim 已提交
1534
		} else {
1535
			zero_user_segment(page, 0, PAGE_SIZE);
1536 1537
			if (!PageUptodate(page))
				SetPageUptodate(page);
J
Jaegeuk Kim 已提交
1538 1539 1540 1541 1542 1543 1544 1545
			unlock_page(page);
			goto next_page;
		}

		/*
		 * This page will go to BIO.  Do we need to send this
		 * BIO off first?
		 */
J
Jaegeuk Kim 已提交
1546 1547
		if (bio && (last_block_in_bio != block_nr - 1 ||
			!__same_bdev(F2FS_I_SB(inode), block_nr, bio))) {
J
Jaegeuk Kim 已提交
1548
submit_and_realloc:
1549
			__submit_bio(F2FS_I_SB(inode), bio, DATA);
J
Jaegeuk Kim 已提交
1550 1551 1552
			bio = NULL;
		}
		if (bio == NULL) {
1553 1554
			bio = f2fs_grab_read_bio(inode, block_nr, nr_pages,
					is_readahead ? REQ_RAHEAD : 0);
J
Jaegeuk Kim 已提交
1555 1556
			if (IS_ERR(bio)) {
				bio = NULL;
J
Jaegeuk Kim 已提交
1557
				goto set_error_page;
1558
			}
J
Jaegeuk Kim 已提交
1559 1560 1561 1562 1563 1564 1565 1566 1567
		}

		if (bio_add_page(bio, page, blocksize, 0) < blocksize)
			goto submit_and_realloc;

		last_block_in_bio = block_nr;
		goto next_page;
set_error_page:
		SetPageError(page);
1568
		zero_user_segment(page, 0, PAGE_SIZE);
J
Jaegeuk Kim 已提交
1569 1570 1571 1572
		unlock_page(page);
		goto next_page;
confused:
		if (bio) {
1573
			__submit_bio(F2FS_I_SB(inode), bio, DATA);
J
Jaegeuk Kim 已提交
1574 1575 1576 1577 1578
			bio = NULL;
		}
		unlock_page(page);
next_page:
		if (pages)
1579
			put_page(page);
J
Jaegeuk Kim 已提交
1580 1581 1582
	}
	BUG_ON(pages && !list_empty(pages));
	if (bio)
1583
		__submit_bio(F2FS_I_SB(inode), bio, DATA);
J
Jaegeuk Kim 已提交
1584 1585 1586
	return 0;
}

1587 1588
static int f2fs_read_data_page(struct file *file, struct page *page)
{
H
Huajun Li 已提交
1589
	struct inode *inode = page->mapping->host;
1590
	int ret = -EAGAIN;
H
Huajun Li 已提交
1591

1592 1593
	trace_f2fs_readpage(page, DATA);

A
arter97 已提交
1594
	/* If the file has inline data, try to read it directly */
H
Huajun Li 已提交
1595 1596
	if (f2fs_has_inline_data(inode))
		ret = f2fs_read_inline_data(inode, page);
1597
	if (ret == -EAGAIN)
1598
		ret = f2fs_mpage_readpages(page->mapping, NULL, page, 1, false);
H
Huajun Li 已提交
1599
	return ret;
1600 1601 1602 1603 1604 1605
}

static int f2fs_read_data_pages(struct file *file,
			struct address_space *mapping,
			struct list_head *pages, unsigned nr_pages)
{
1606
	struct inode *inode = mapping->host;
1607
	struct page *page = list_last_entry(pages, struct page, lru);
1608 1609

	trace_f2fs_readpages(inode, page, nr_pages);
H
Huajun Li 已提交
1610 1611 1612 1613 1614

	/* If the file has inline data, skip readpages */
	if (f2fs_has_inline_data(inode))
		return 0;

1615
	return f2fs_mpage_readpages(mapping, pages, NULL, nr_pages, true);
1616 1617
}

1618 1619 1620
static int encrypt_one_page(struct f2fs_io_info *fio)
{
	struct inode *inode = fio->page->mapping->host;
1621
	struct page *mpage;
1622 1623
	gfp_t gfp_flags = GFP_NOFS;

1624
	if (!f2fs_encrypted_file(inode))
1625 1626
		return 0;

1627
	/* wait for GCed page writeback via META_MAPPING */
1628
	f2fs_wait_on_block_writeback(fio->sbi, fio->old_blkaddr);
1629 1630 1631 1632

retry_encrypt:
	fio->encrypted_page = fscrypt_encrypt_page(inode, fio->page,
			PAGE_SIZE, 0, fio->page->index, gfp_flags);
1633 1634 1635 1636 1637 1638 1639 1640 1641 1642
	if (IS_ERR(fio->encrypted_page)) {
		/* flush pending IOs and wait for a while in the ENOMEM case */
		if (PTR_ERR(fio->encrypted_page) == -ENOMEM) {
			f2fs_flush_merged_writes(fio->sbi);
			congestion_wait(BLK_RW_ASYNC, HZ/50);
			gfp_flags |= __GFP_NOFAIL;
			goto retry_encrypt;
		}
		return PTR_ERR(fio->encrypted_page);
	}
1643

1644 1645 1646 1647 1648 1649
	mpage = find_lock_page(META_MAPPING(fio->sbi), fio->old_blkaddr);
	if (mpage) {
		if (PageUptodate(mpage))
			memcpy(page_address(mpage),
				page_address(fio->encrypted_page), PAGE_SIZE);
		f2fs_put_page(mpage, 1);
1650
	}
1651
	return 0;
1652 1653
}

C
Chao Yu 已提交
1654 1655
static inline bool check_inplace_update_policy(struct inode *inode,
				struct f2fs_io_info *fio)
1656
{
C
Chao Yu 已提交
1657 1658
	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
	unsigned int policy = SM_I(sbi)->ipu_policy;
1659

C
Chao Yu 已提交
1660 1661
	if (policy & (0x1 << F2FS_IPU_FORCE))
		return true;
C
Chao Yu 已提交
1662
	if (policy & (0x1 << F2FS_IPU_SSR) && f2fs_need_SSR(sbi))
C
Chao Yu 已提交
1663 1664 1665 1666
		return true;
	if (policy & (0x1 << F2FS_IPU_UTIL) &&
			utilization(sbi) > SM_I(sbi)->min_ipu_util)
		return true;
C
Chao Yu 已提交
1667
	if (policy & (0x1 << F2FS_IPU_SSR_UTIL) && f2fs_need_SSR(sbi) &&
C
Chao Yu 已提交
1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687
			utilization(sbi) > SM_I(sbi)->min_ipu_util)
		return true;

	/*
	 * IPU for rewrite async pages
	 */
	if (policy & (0x1 << F2FS_IPU_ASYNC) &&
			fio && fio->op == REQ_OP_WRITE &&
			!(fio->op_flags & REQ_SYNC) &&
			!f2fs_encrypted_inode(inode))
		return true;

	/* this is only set during fdatasync */
	if (policy & (0x1 << F2FS_IPU_FSYNC) &&
			is_inode_flag_set(inode, FI_NEED_IPU))
		return true;

	return false;
}

C
Chao Yu 已提交
1688
bool f2fs_should_update_inplace(struct inode *inode, struct f2fs_io_info *fio)
C
Chao Yu 已提交
1689
{
1690 1691
	if (f2fs_is_pinned_file(inode))
		return true;
C
Chao Yu 已提交
1692 1693 1694 1695 1696 1697 1698 1699

	/* if this is cold file, we should overwrite to avoid fragmentation */
	if (file_is_cold(inode))
		return true;

	return check_inplace_update_policy(inode, fio);
}

C
Chao Yu 已提交
1700
bool f2fs_should_update_outplace(struct inode *inode, struct f2fs_io_info *fio)
C
Chao Yu 已提交
1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718
{
	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);

	if (test_opt(sbi, LFS))
		return true;
	if (S_ISDIR(inode->i_mode))
		return true;
	if (f2fs_is_atomic_file(inode))
		return true;
	if (fio) {
		if (is_cold_data(fio->page))
			return true;
		if (IS_ATOMIC_WRITTEN_PAGE(fio->page))
			return true;
	}
	return false;
}

1719 1720 1721 1722
static inline bool need_inplace_update(struct f2fs_io_info *fio)
{
	struct inode *inode = fio->page->mapping->host;

C
Chao Yu 已提交
1723
	if (f2fs_should_update_outplace(inode, fio))
1724 1725
		return false;

C
Chao Yu 已提交
1726
	return f2fs_should_update_inplace(inode, fio);
1727 1728
}

C
Chao Yu 已提交
1729
int f2fs_do_write_data_page(struct f2fs_io_info *fio)
1730
{
1731
	struct page *page = fio->page;
1732 1733
	struct inode *inode = page->mapping->host;
	struct dnode_of_data dn;
1734
	struct extent_info ei = {0,0,0};
1735
	struct node_info ni;
1736
	bool ipu_force = false;
1737 1738 1739
	int err = 0;

	set_new_dnode(&dn, inode, NULL, NULL, 0);
1740 1741 1742
	if (need_inplace_update(fio) &&
			f2fs_lookup_extent_cache(inode, page->index, &ei)) {
		fio->old_blkaddr = ei.blk + page->index - ei.fofs;
1743

1744 1745 1746 1747 1748 1749 1750
		if (!f2fs_is_valid_blkaddr(fio->sbi, fio->old_blkaddr,
							DATA_GENERIC))
			return -EFAULT;

		ipu_force = true;
		fio->need_lock = LOCK_DONE;
		goto got_it;
1751
	}
1752

1753 1754 1755
	/* Deadlock due to between page->lock and f2fs_lock_op */
	if (fio->need_lock == LOCK_REQ && !f2fs_trylock_op(fio->sbi))
		return -EAGAIN;
1756

C
Chao Yu 已提交
1757
	err = f2fs_get_dnode_of_data(&dn, page->index, LOOKUP_NODE);
1758
	if (err)
1759
		goto out;
1760

1761
	fio->old_blkaddr = dn.data_blkaddr;
1762 1763

	/* This page is already truncated */
1764
	if (fio->old_blkaddr == NULL_ADDR) {
1765
		ClearPageUptodate(page);
1766
		goto out_writepage;
1767
	}
1768
got_it:
1769 1770 1771 1772 1773 1774
	if (__is_valid_data_blkaddr(fio->old_blkaddr) &&
		!f2fs_is_valid_blkaddr(fio->sbi, fio->old_blkaddr,
							DATA_GENERIC)) {
		err = -EFAULT;
		goto out_writepage;
	}
1775 1776 1777 1778
	/*
	 * If current allocation needs SSR,
	 * it had better in-place writes for updated data.
	 */
1779
	if (ipu_force || (is_valid_data_blkaddr(fio->sbi, fio->old_blkaddr) &&
C
Chao Yu 已提交
1780
					need_inplace_update(fio))) {
1781 1782 1783 1784 1785
		err = encrypt_one_page(fio);
		if (err)
			goto out_writepage;

		set_page_writeback(page);
J
Jaegeuk Kim 已提交
1786
		ClearPageError(page);
1787
		f2fs_put_dnode(&dn);
1788
		if (fio->need_lock == LOCK_REQ)
1789
			f2fs_unlock_op(fio->sbi);
C
Chao Yu 已提交
1790
		err = f2fs_inplace_write_data(fio);
1791
		trace_f2fs_do_write_data_page(fio->page, IPU);
1792
		set_inode_flag(inode, FI_UPDATE_WRITE);
1793
		return err;
1794
	}
1795

1796 1797 1798 1799 1800 1801 1802 1803
	if (fio->need_lock == LOCK_RETRY) {
		if (!f2fs_trylock_op(fio->sbi)) {
			err = -EAGAIN;
			goto out_writepage;
		}
		fio->need_lock = LOCK_REQ;
	}

1804 1805 1806 1807 1808 1809
	err = f2fs_get_node_info(fio->sbi, dn.nid, &ni);
	if (err)
		goto out_writepage;

	fio->version = ni.version;

1810 1811 1812 1813 1814
	err = encrypt_one_page(fio);
	if (err)
		goto out_writepage;

	set_page_writeback(page);
J
Jaegeuk Kim 已提交
1815
	ClearPageError(page);
1816

1817
	/* LFS mode write path */
C
Chao Yu 已提交
1818
	f2fs_outplace_write_data(&dn, fio);
1819 1820 1821 1822
	trace_f2fs_do_write_data_page(page, OPU);
	set_inode_flag(inode, FI_APPEND_WRITE);
	if (page->index == 0)
		set_inode_flag(inode, FI_FIRST_BLOCK_WRITTEN);
1823 1824
out_writepage:
	f2fs_put_dnode(&dn);
1825
out:
1826
	if (fio->need_lock == LOCK_REQ)
1827
		f2fs_unlock_op(fio->sbi);
1828 1829 1830
	return err;
}

1831
static int __write_data_page(struct page *page, bool *submitted,
C
Chao Yu 已提交
1832 1833
				struct writeback_control *wbc,
				enum iostat_type io_type)
1834 1835
{
	struct inode *inode = page->mapping->host;
1836
	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
1837 1838
	loff_t i_size = i_size_read(inode);
	const pgoff_t end_index = ((unsigned long long) i_size)
1839
							>> PAGE_SHIFT;
1840
	loff_t psize = (page->index + 1) << PAGE_SHIFT;
H
Huajun Li 已提交
1841
	unsigned offset = 0;
1842
	bool need_balance_fs = false;
1843
	int err = 0;
J
Jaegeuk Kim 已提交
1844
	struct f2fs_io_info fio = {
1845
		.sbi = sbi,
C
Chao Yu 已提交
1846
		.ino = inode->i_ino,
J
Jaegeuk Kim 已提交
1847
		.type = DATA,
M
Mike Christie 已提交
1848
		.op = REQ_OP_WRITE,
J
Jens Axboe 已提交
1849
		.op_flags = wbc_to_write_flags(wbc),
1850
		.old_blkaddr = NULL_ADDR,
1851
		.page = page,
1852
		.encrypted_page = NULL,
1853
		.submitted = false,
1854
		.need_lock = LOCK_RETRY,
C
Chao Yu 已提交
1855
		.io_type = io_type,
1856
		.io_wbc = wbc,
J
Jaegeuk Kim 已提交
1857
	};
1858

1859 1860
	trace_f2fs_writepage(page, DATA);

1861 1862 1863
	/* we should bypass data pages to proceed the kworkder jobs */
	if (unlikely(f2fs_cp_error(sbi))) {
		mapping_set_error(page->mapping, -EIO);
1864 1865 1866 1867 1868 1869
		/*
		 * don't drop any dirty dentry pages for keeping lastest
		 * directory structure.
		 */
		if (S_ISDIR(inode->i_mode))
			goto redirty_out;
1870 1871 1872
		goto out;
	}

1873 1874 1875
	if (unlikely(is_sbi_flag_set(sbi, SBI_POR_DOING)))
		goto redirty_out;

1876
	if (page->index < end_index)
1877
		goto write;
1878 1879 1880 1881 1882

	/*
	 * If the offset is out-of-range of file size,
	 * this page does not have to be written to disk.
	 */
1883
	offset = i_size & (PAGE_SIZE - 1);
1884
	if ((page->index >= end_index + 1) || !offset)
1885
		goto out;
1886

1887
	zero_user_segment(page, offset, PAGE_SIZE);
1888
write:
1889 1890
	if (f2fs_is_drop_cache(inode))
		goto out;
1891 1892 1893
	/* we should not write 0'th page having journal header */
	if (f2fs_is_volatile_file(inode) && (!page->index ||
			(!wbc->for_reclaim &&
C
Chao Yu 已提交
1894
			f2fs_available_free_memory(sbi, BASE_CHECK))))
1895
		goto redirty_out;
1896

1897
	/* Dentry blocks are controlled by checkpoint */
1898
	if (S_ISDIR(inode->i_mode)) {
1899
		fio.need_lock = LOCK_DONE;
C
Chao Yu 已提交
1900
		err = f2fs_do_write_data_page(&fio);
1901 1902
		goto done;
	}
H
Huajun Li 已提交
1903

1904
	if (!wbc->for_reclaim)
1905
		need_balance_fs = true;
1906
	else if (has_not_enough_free_secs(sbi, 0, 0))
1907
		goto redirty_out;
1908 1909
	else
		set_inode_flag(inode, FI_HOT_DATA);
1910

1911
	err = -EAGAIN;
1912
	if (f2fs_has_inline_data(inode)) {
1913
		err = f2fs_write_inline_data(inode, page);
1914 1915 1916
		if (!err)
			goto out;
	}
1917

1918
	if (err == -EAGAIN) {
C
Chao Yu 已提交
1919
		err = f2fs_do_write_data_page(&fio);
1920 1921
		if (err == -EAGAIN) {
			fio.need_lock = LOCK_REQ;
C
Chao Yu 已提交
1922
			err = f2fs_do_write_data_page(&fio);
1923 1924
		}
	}
1925

1926 1927 1928 1929 1930 1931 1932 1933
	if (err) {
		file_set_keep_isize(inode);
	} else {
		down_write(&F2FS_I(inode)->i_sem);
		if (F2FS_I(inode)->last_disk_size < psize)
			F2FS_I(inode)->last_disk_size = psize;
		up_write(&F2FS_I(inode)->i_sem);
	}
1934

1935 1936 1937
done:
	if (err && err != -ENOENT)
		goto redirty_out;
1938

1939
out:
1940
	inode_dec_dirty_pages(inode);
1941 1942
	if (err)
		ClearPageUptodate(page);
1943 1944

	if (wbc->for_reclaim) {
1945
		f2fs_submit_merged_write_cond(sbi, inode, 0, page->index, DATA);
1946
		clear_inode_flag(inode, FI_HOT_DATA);
C
Chao Yu 已提交
1947
		f2fs_remove_dirty_inode(inode);
1948
		submitted = NULL;
1949 1950
	}

1951
	unlock_page(page);
J
Jaegeuk Kim 已提交
1952 1953
	if (!S_ISDIR(inode->i_mode))
		f2fs_balance_fs(sbi, need_balance_fs);
1954

1955
	if (unlikely(f2fs_cp_error(sbi))) {
1956
		f2fs_submit_merged_write(sbi, DATA);
1957 1958 1959 1960 1961
		submitted = NULL;
	}

	if (submitted)
		*submitted = fio.submitted;
1962

1963 1964 1965
	return 0;

redirty_out:
1966
	redirty_page_for_writepage(wbc, page);
1967 1968 1969 1970 1971 1972 1973
	/*
	 * pageout() in MM traslates EAGAIN, so calls handle_write_error()
	 * -> mapping_set_error() -> set_bit(AS_EIO, ...).
	 * file_write_and_wait_range() will see EIO error, which is critical
	 * to return value of fsync() followed by atomic_write failure to user.
	 */
	if (!err || wbc->for_reclaim)
1974
		return AOP_WRITEPAGE_ACTIVATE;
J
Jaegeuk Kim 已提交
1975 1976
	unlock_page(page);
	return err;
1977 1978
}

1979 1980 1981
static int f2fs_write_data_page(struct page *page,
					struct writeback_control *wbc)
{
C
Chao Yu 已提交
1982
	return __write_data_page(page, NULL, wbc, FS_DATA_IO);
1983 1984
}

C
Chao Yu 已提交
1985 1986 1987 1988 1989 1990
/*
 * This function was copied from write_cche_pages from mm/page-writeback.c.
 * The major change is making write step of cold data page separately from
 * warm/hot data page.
 */
static int f2fs_write_cache_pages(struct address_space *mapping,
C
Chao Yu 已提交
1991 1992
					struct writeback_control *wbc,
					enum iostat_type io_type)
C
Chao Yu 已提交
1993 1994 1995 1996
{
	int ret = 0;
	int done = 0;
	struct pagevec pvec;
1997
	struct f2fs_sb_info *sbi = F2FS_M_SB(mapping);
C
Chao Yu 已提交
1998 1999 2000 2001 2002
	int nr_pages;
	pgoff_t uninitialized_var(writeback_index);
	pgoff_t index;
	pgoff_t end;		/* Inclusive */
	pgoff_t done_index;
2003
	pgoff_t last_idx = ULONG_MAX;
C
Chao Yu 已提交
2004 2005
	int cycled;
	int range_whole = 0;
M
Matthew Wilcox 已提交
2006
	xa_mark_t tag;
C
Chao Yu 已提交
2007

2008
	pagevec_init(&pvec);
2009

2010 2011 2012 2013 2014 2015
	if (get_dirty_pages(mapping->host) <=
				SM_I(F2FS_M_SB(mapping))->min_hot_blocks)
		set_inode_flag(mapping->host, FI_HOT_DATA);
	else
		clear_inode_flag(mapping->host, FI_HOT_DATA);

C
Chao Yu 已提交
2016 2017 2018 2019 2020 2021 2022 2023 2024
	if (wbc->range_cyclic) {
		writeback_index = mapping->writeback_index; /* prev offset */
		index = writeback_index;
		if (index == 0)
			cycled = 1;
		else
			cycled = 0;
		end = -1;
	} else {
2025 2026
		index = wbc->range_start >> PAGE_SHIFT;
		end = wbc->range_end >> PAGE_SHIFT;
C
Chao Yu 已提交
2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041
		if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX)
			range_whole = 1;
		cycled = 1; /* ignore range_cyclic tests */
	}
	if (wbc->sync_mode == WB_SYNC_ALL || wbc->tagged_writepages)
		tag = PAGECACHE_TAG_TOWRITE;
	else
		tag = PAGECACHE_TAG_DIRTY;
retry:
	if (wbc->sync_mode == WB_SYNC_ALL || wbc->tagged_writepages)
		tag_pages_for_writeback(mapping, index, end);
	done_index = index;
	while (!done && (index <= end)) {
		int i;

J
Jan Kara 已提交
2042
		nr_pages = pagevec_lookup_range_tag(&pvec, mapping, &index, end,
2043
				tag);
C
Chao Yu 已提交
2044 2045 2046 2047 2048
		if (nr_pages == 0)
			break;

		for (i = 0; i < nr_pages; i++) {
			struct page *page = pvec.pages[i];
2049
			bool submitted = false;
C
Chao Yu 已提交
2050

2051
			/* give a priority to WB_SYNC threads */
2052
			if (atomic_read(&sbi->wb_sync_req[DATA]) &&
2053 2054 2055 2056 2057
					wbc->sync_mode == WB_SYNC_NONE) {
				done = 1;
				break;
			}

C
Chao Yu 已提交
2058
			done_index = page->index;
2059
retry_write:
C
Chao Yu 已提交
2060 2061 2062 2063 2064 2065 2066 2067 2068 2069 2070 2071 2072 2073 2074
			lock_page(page);

			if (unlikely(page->mapping != mapping)) {
continue_unlock:
				unlock_page(page);
				continue;
			}

			if (!PageDirty(page)) {
				/* someone wrote it for us */
				goto continue_unlock;
			}

			if (PageWriteback(page)) {
				if (wbc->sync_mode != WB_SYNC_NONE)
2075 2076
					f2fs_wait_on_page_writeback(page,
								DATA, true);
C
Chao Yu 已提交
2077 2078 2079 2080 2081 2082 2083 2084
				else
					goto continue_unlock;
			}

			BUG_ON(PageWriteback(page));
			if (!clear_page_dirty_for_io(page))
				goto continue_unlock;

C
Chao Yu 已提交
2085
			ret = __write_data_page(page, &submitted, wbc, io_type);
C
Chao Yu 已提交
2086
			if (unlikely(ret)) {
2087 2088 2089 2090 2091 2092 2093 2094
				/*
				 * keep nr_to_write, since vfs uses this to
				 * get # of written pages.
				 */
				if (ret == AOP_WRITEPAGE_ACTIVATE) {
					unlock_page(page);
					ret = 0;
					continue;
2095 2096 2097 2098 2099 2100 2101 2102 2103
				} else if (ret == -EAGAIN) {
					ret = 0;
					if (wbc->sync_mode == WB_SYNC_ALL) {
						cond_resched();
						congestion_wait(BLK_RW_ASYNC,
									HZ/50);
						goto retry_write;
					}
					continue;
2104
				}
J
Jaegeuk Kim 已提交
2105 2106 2107
				done_index = page->index + 1;
				done = 1;
				break;
2108
			} else if (submitted) {
2109
				last_idx = page->index;
C
Chao Yu 已提交
2110 2111
			}

2112
			if (--wbc->nr_to_write <= 0 &&
2113
					wbc->sync_mode == WB_SYNC_NONE) {
C
Chao Yu 已提交
2114 2115 2116 2117 2118 2119 2120 2121 2122 2123 2124 2125 2126 2127 2128 2129 2130
				done = 1;
				break;
			}
		}
		pagevec_release(&pvec);
		cond_resched();
	}

	if (!cycled && !done) {
		cycled = 1;
		index = 0;
		end = writeback_index - 1;
		goto retry;
	}
	if (wbc->range_cyclic || (range_whole && wbc->nr_to_write > 0))
		mapping->writeback_index = done_index;

2131
	if (last_idx != ULONG_MAX)
2132 2133
		f2fs_submit_merged_write_cond(F2FS_M_SB(mapping), mapping->host,
						0, last_idx, DATA);
C
Chao Yu 已提交
2134

C
Chao Yu 已提交
2135 2136 2137
	return ret;
}

2138 2139 2140 2141 2142 2143 2144 2145 2146 2147 2148 2149
static inline bool __should_serialize_io(struct inode *inode,
					struct writeback_control *wbc)
{
	if (!S_ISREG(inode->i_mode))
		return false;
	if (wbc->sync_mode != WB_SYNC_ALL)
		return true;
	if (get_dirty_pages(inode) >= SM_I(F2FS_I_SB(inode))->min_seq_blocks)
		return true;
	return false;
}

2150
static int __f2fs_write_data_pages(struct address_space *mapping,
C
Chao Yu 已提交
2151 2152
						struct writeback_control *wbc,
						enum iostat_type io_type)
2153 2154
{
	struct inode *inode = mapping->host;
2155
	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
2156
	struct blk_plug plug;
2157
	int ret;
2158
	bool locked = false;
2159

P
P J P 已提交
2160 2161 2162 2163
	/* deal with chardevs and other special file */
	if (!mapping->a_ops->writepage)
		return 0;

2164 2165 2166 2167
	/* skip writing if there is no dirty page in this inode */
	if (!get_dirty_pages(inode) && wbc->sync_mode == WB_SYNC_NONE)
		return 0;

2168 2169 2170 2171
	/* during POR, we don't need to trigger writepage at all. */
	if (unlikely(is_sbi_flag_set(sbi, SBI_POR_DOING)))
		goto skip_write;

2172 2173
	if (S_ISDIR(inode->i_mode) && wbc->sync_mode == WB_SYNC_NONE &&
			get_dirty_pages(inode) < nr_pages_to_skip(sbi, DATA) &&
C
Chao Yu 已提交
2174
			f2fs_available_free_memory(sbi, DIRTY_DENTS))
2175 2176
		goto skip_write;

C
Chao Yu 已提交
2177
	/* skip writing during file defragment */
2178
	if (is_inode_flag_set(inode, FI_DO_DEFRAG))
C
Chao Yu 已提交
2179 2180
		goto skip_write;

Y
Yunlei He 已提交
2181 2182
	trace_f2fs_writepages(mapping->host, wbc, DATA);

2183 2184
	/* to avoid spliting IOs due to mixed WB_SYNC_ALL and WB_SYNC_NONE */
	if (wbc->sync_mode == WB_SYNC_ALL)
2185 2186
		atomic_inc(&sbi->wb_sync_req[DATA]);
	else if (atomic_read(&sbi->wb_sync_req[DATA]))
2187 2188
		goto skip_write;

2189 2190 2191 2192 2193
	if (__should_serialize_io(inode, wbc)) {
		mutex_lock(&sbi->writepages);
		locked = true;
	}

2194
	blk_start_plug(&plug);
C
Chao Yu 已提交
2195
	ret = f2fs_write_cache_pages(mapping, wbc, io_type);
2196
	blk_finish_plug(&plug);
2197

2198 2199 2200
	if (locked)
		mutex_unlock(&sbi->writepages);

2201
	if (wbc->sync_mode == WB_SYNC_ALL)
2202
		atomic_dec(&sbi->wb_sync_req[DATA]);
2203 2204 2205 2206
	/*
	 * if some pages were truncated, we cannot guarantee its mapping->host
	 * to detect pending bios.
	 */
J
Jaegeuk Kim 已提交
2207

C
Chao Yu 已提交
2208
	f2fs_remove_dirty_inode(inode);
2209
	return ret;
2210 2211

skip_write:
2212
	wbc->pages_skipped += get_dirty_pages(inode);
Y
Yunlei He 已提交
2213
	trace_f2fs_writepages(mapping->host, wbc, DATA);
2214
	return 0;
2215 2216
}

C
Chao Yu 已提交
2217 2218 2219 2220 2221 2222 2223 2224 2225 2226
static int f2fs_write_data_pages(struct address_space *mapping,
			    struct writeback_control *wbc)
{
	struct inode *inode = mapping->host;

	return __f2fs_write_data_pages(mapping, wbc,
			F2FS_I(inode)->cp_task == current ?
			FS_CP_DATA_IO : FS_DATA_IO);
}

2227 2228 2229
static void f2fs_write_failed(struct address_space *mapping, loff_t to)
{
	struct inode *inode = mapping->host;
J
Jaegeuk Kim 已提交
2230
	loff_t i_size = i_size_read(inode);
2231

J
Jaegeuk Kim 已提交
2232
	if (to > i_size) {
2233
		down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
2234
		down_write(&F2FS_I(inode)->i_mmap_sem);
2235

J
Jaegeuk Kim 已提交
2236
		truncate_pagecache(inode, i_size);
C
Chao Yu 已提交
2237
		f2fs_truncate_blocks(inode, i_size, true);
2238

2239
		up_write(&F2FS_I(inode)->i_mmap_sem);
2240
		up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
2241 2242 2243
	}
}

2244 2245 2246 2247 2248 2249 2250 2251
static int prepare_write_begin(struct f2fs_sb_info *sbi,
			struct page *page, loff_t pos, unsigned len,
			block_t *blk_addr, bool *node_changed)
{
	struct inode *inode = page->mapping->host;
	pgoff_t index = page->index;
	struct dnode_of_data dn;
	struct page *ipage;
2252
	bool locked = false;
2253
	struct extent_info ei = {0,0,0};
2254 2255
	int err = 0;

2256 2257 2258 2259
	/*
	 * we already allocated all the blocks, so we don't need to get
	 * the block addresses when there is no need to fill the page.
	 */
2260 2261
	if (!f2fs_has_inline_data(inode) && len == PAGE_SIZE &&
			!is_inode_flag_set(inode, FI_NO_PREALLOC))
2262 2263
		return 0;

2264
	if (f2fs_has_inline_data(inode) ||
2265
			(pos & PAGE_MASK) >= i_size_read(inode)) {
2266
		__do_map_lock(sbi, F2FS_GET_BLOCK_PRE_AIO, true);
2267 2268 2269
		locked = true;
	}
restart:
2270
	/* check inline_data */
C
Chao Yu 已提交
2271
	ipage = f2fs_get_node_page(sbi, inode->i_ino);
2272 2273 2274 2275 2276 2277 2278 2279
	if (IS_ERR(ipage)) {
		err = PTR_ERR(ipage);
		goto unlock_out;
	}

	set_new_dnode(&dn, inode, ipage, ipage, 0);

	if (f2fs_has_inline_data(inode)) {
C
Chao Yu 已提交
2280
		if (pos + len <= MAX_INLINE_DATA(inode)) {
C
Chao Yu 已提交
2281
			f2fs_do_read_inline_data(page, ipage);
2282
			set_inode_flag(inode, FI_DATA_EXIST);
2283 2284
			if (inode->i_nlink)
				set_inline_node(ipage);
2285 2286 2287
		} else {
			err = f2fs_convert_inline_page(&dn, page);
			if (err)
2288 2289 2290 2291 2292 2293 2294 2295 2296 2297 2298
				goto out;
			if (dn.data_blkaddr == NULL_ADDR)
				err = f2fs_get_block(&dn, index);
		}
	} else if (locked) {
		err = f2fs_get_block(&dn, index);
	} else {
		if (f2fs_lookup_extent_cache(inode, index, &ei)) {
			dn.data_blkaddr = ei.blk + index - ei.fofs;
		} else {
			/* hole case */
C
Chao Yu 已提交
2299
			err = f2fs_get_dnode_of_data(&dn, index, LOOKUP_NODE);
2300
			if (err || dn.data_blkaddr == NULL_ADDR) {
2301
				f2fs_put_dnode(&dn);
2302 2303
				__do_map_lock(sbi, F2FS_GET_BLOCK_PRE_AIO,
								true);
2304 2305 2306
				locked = true;
				goto restart;
			}
2307 2308
		}
	}
2309

2310 2311 2312
	/* convert_inline_page can make node_changed */
	*blk_addr = dn.data_blkaddr;
	*node_changed = dn.node_changed;
2313
out:
2314 2315
	f2fs_put_dnode(&dn);
unlock_out:
2316
	if (locked)
2317
		__do_map_lock(sbi, F2FS_GET_BLOCK_PRE_AIO, false);
2318 2319 2320
	return err;
}

2321 2322 2323 2324 2325
static int f2fs_write_begin(struct file *file, struct address_space *mapping,
		loff_t pos, unsigned len, unsigned flags,
		struct page **pagep, void **fsdata)
{
	struct inode *inode = mapping->host;
2326
	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
2327
	struct page *page = NULL;
2328
	pgoff_t index = ((unsigned long long) pos) >> PAGE_SHIFT;
2329
	bool need_balance = false, drop_atomic = false;
2330
	block_t blkaddr = NULL_ADDR;
2331 2332
	int err = 0;

2333 2334
	trace_f2fs_write_begin(inode, pos, len, flags);

2335 2336 2337
	if ((f2fs_is_atomic_file(inode) &&
			!f2fs_available_free_memory(sbi, INMEM_PAGES)) ||
			is_inode_flag_set(inode, FI_ATOMIC_REVOKE_REQUEST)) {
J
Jaegeuk Kim 已提交
2338
		err = -ENOMEM;
2339
		drop_atomic = true;
J
Jaegeuk Kim 已提交
2340 2341 2342
		goto fail;
	}

2343 2344 2345 2346 2347 2348 2349 2350 2351 2352
	/*
	 * We should check this at this moment to avoid deadlock on inode page
	 * and #0 page. The locking rule for inline_data conversion should be:
	 * lock_page(page #0) -> lock_page(inode_page)
	 */
	if (index != 0) {
		err = f2fs_convert_inline_inode(inode);
		if (err)
			goto fail;
	}
2353
repeat:
2354 2355 2356 2357
	/*
	 * Do not use grab_cache_page_write_begin() to avoid deadlock due to
	 * wait_for_stable_page. Will wait that below with our IO control.
	 */
C
Chao Yu 已提交
2358
	page = f2fs_pagecache_get_page(mapping, index,
2359
				FGP_LOCK | FGP_WRITE | FGP_CREAT, GFP_NOFS);
2360 2361 2362 2363
	if (!page) {
		err = -ENOMEM;
		goto fail;
	}
2364

2365 2366
	*pagep = page;

2367 2368
	err = prepare_write_begin(sbi, page, pos, len,
					&blkaddr, &need_balance);
2369
	if (err)
2370
		goto fail;
2371

2372
	if (need_balance && has_not_enough_free_secs(sbi, 0, 0)) {
2373
		unlock_page(page);
J
Jaegeuk Kim 已提交
2374
		f2fs_balance_fs(sbi, true);
2375 2376 2377 2378 2379 2380 2381 2382
		lock_page(page);
		if (page->mapping != mapping) {
			/* The page got truncated from under us */
			f2fs_put_page(page, 1);
			goto repeat;
		}
	}

2383
	f2fs_wait_on_page_writeback(page, DATA, false);
2384

2385 2386
	/* wait for GCed page writeback via META_MAPPING */
	if (f2fs_post_read_required(inode))
2387
		f2fs_wait_on_block_writeback(sbi, blkaddr);
2388

2389 2390
	if (len == PAGE_SIZE || PageUptodate(page))
		return 0;
2391

2392 2393 2394 2395 2396
	if (!(pos & (PAGE_SIZE - 1)) && (pos + len) >= i_size_read(inode)) {
		zero_user_segment(page, len, PAGE_SIZE);
		return 0;
	}

2397
	if (blkaddr == NEW_ADDR) {
2398
		zero_user_segment(page, 0, PAGE_SIZE);
2399
		SetPageUptodate(page);
2400
	} else {
2401 2402
		err = f2fs_submit_page_read(inode, page, blkaddr);
		if (err)
2403
			goto fail;
2404

2405
		lock_page(page);
2406
		if (unlikely(page->mapping != mapping)) {
2407 2408
			f2fs_put_page(page, 1);
			goto repeat;
2409
		}
2410 2411 2412
		if (unlikely(!PageUptodate(page))) {
			err = -EIO;
			goto fail;
2413
		}
2414 2415
	}
	return 0;
2416

2417
fail:
2418
	f2fs_put_page(page, 1);
2419
	f2fs_write_failed(mapping, pos + len);
2420
	if (drop_atomic)
C
Chao Yu 已提交
2421
		f2fs_drop_inmem_pages_all(sbi, false);
2422
	return err;
2423 2424
}

2425 2426 2427 2428 2429 2430 2431
static int f2fs_write_end(struct file *file,
			struct address_space *mapping,
			loff_t pos, unsigned len, unsigned copied,
			struct page *page, void *fsdata)
{
	struct inode *inode = page->mapping->host;

2432 2433
	trace_f2fs_write_end(inode, pos, len, copied);

2434 2435 2436 2437 2438 2439
	/*
	 * This should be come from len == PAGE_SIZE, and we expect copied
	 * should be PAGE_SIZE. Otherwise, we treat it with zero copied and
	 * let generic_perform_write() try to copy data again through copied=0.
	 */
	if (!PageUptodate(page)) {
2440
		if (unlikely(copied != len))
2441 2442 2443 2444 2445 2446 2447
			copied = 0;
		else
			SetPageUptodate(page);
	}
	if (!copied)
		goto unlock_out;

2448
	set_page_dirty(page);
2449

2450 2451
	if (pos + copied > i_size_read(inode))
		f2fs_i_size_write(inode, pos + copied);
2452
unlock_out:
2453
	f2fs_put_page(page, 1);
2454
	f2fs_update_time(F2FS_I_SB(inode), REQ_TIME);
2455 2456 2457
	return copied;
}

2458 2459
static int check_direct_IO(struct inode *inode, struct iov_iter *iter,
			   loff_t offset)
2460
{
2461 2462 2463 2464 2465 2466 2467 2468 2469 2470 2471 2472 2473 2474
	unsigned i_blkbits = READ_ONCE(inode->i_blkbits);
	unsigned blkbits = i_blkbits;
	unsigned blocksize_mask = (1 << blkbits) - 1;
	unsigned long align = offset | iov_iter_alignment(iter);
	struct block_device *bdev = inode->i_sb->s_bdev;

	if (align & blocksize_mask) {
		if (bdev)
			blkbits = blksize_bits(bdev_logical_block_size(bdev));
		blocksize_mask = (1 << blkbits) - 1;
		if (align & blocksize_mask)
			return -EINVAL;
		return 1;
	}
2475 2476 2477
	return 0;
}

2478
static ssize_t f2fs_direct_IO(struct kiocb *iocb, struct iov_iter *iter)
2479
{
2480
	struct address_space *mapping = iocb->ki_filp->f_mapping;
2481
	struct inode *inode = mapping->host;
2482
	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
2483
	size_t count = iov_iter_count(iter);
2484
	loff_t offset = iocb->ki_pos;
2485
	int rw = iov_iter_rw(iter);
2486
	int err;
2487
	enum rw_hint hint = iocb->ki_hint;
2488
	int whint_mode = F2FS_OPTION(sbi).whint_mode;
2489

2490
	err = check_direct_IO(inode, iter, offset);
2491
	if (err)
2492
		return err < 0 ? err : 0;
H
Huajun Li 已提交
2493

H
Hyunchul Lee 已提交
2494
	if (f2fs_force_buffered_io(inode, rw))
2495
		return 0;
2496

2497
	trace_f2fs_direct_IO_enter(inode, offset, count, rw);
2498

2499 2500 2501
	if (rw == WRITE && whint_mode == WHINT_MODE_OFF)
		iocb->ki_hint = WRITE_LIFE_NOT_SET;

C
Chao Yu 已提交
2502
	if (!down_read_trylock(&F2FS_I(inode)->i_gc_rwsem[rw])) {
H
Hyunchul Lee 已提交
2503 2504 2505 2506 2507
		if (iocb->ki_flags & IOCB_NOWAIT) {
			iocb->ki_hint = hint;
			err = -EAGAIN;
			goto out;
		}
C
Chao Yu 已提交
2508
		down_read(&F2FS_I(inode)->i_gc_rwsem[rw]);
H
Hyunchul Lee 已提交
2509 2510
	}

2511
	err = blockdev_direct_IO(iocb, inode, iter, get_data_block_dio);
C
Chao Yu 已提交
2512
	up_read(&F2FS_I(inode)->i_gc_rwsem[rw]);
2513 2514

	if (rw == WRITE) {
2515 2516
		if (whint_mode == WHINT_MODE_OFF)
			iocb->ki_hint = hint;
C
Chao Yu 已提交
2517 2518 2519
		if (err > 0) {
			f2fs_update_iostat(F2FS_I_SB(inode), APP_DIRECT_IO,
									err);
2520
			set_inode_flag(inode, FI_UPDATE_WRITE);
C
Chao Yu 已提交
2521
		} else if (err < 0) {
2522
			f2fs_write_failed(mapping, offset + count);
C
Chao Yu 已提交
2523
		}
2524
	}
2525

H
Hyunchul Lee 已提交
2526
out:
2527
	trace_f2fs_direct_IO_exit(inode, offset, count, rw, err);
2528

2529
	return err;
2530 2531
}

2532 2533
void f2fs_invalidate_page(struct page *page, unsigned int offset,
							unsigned int length)
2534 2535
{
	struct inode *inode = page->mapping->host;
2536
	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
2537

2538
	if (inode->i_ino >= F2FS_ROOT_INO(sbi) &&
2539
		(offset % PAGE_SIZE || length != PAGE_SIZE))
2540 2541
		return;

2542
	if (PageDirty(page)) {
2543
		if (inode->i_ino == F2FS_META_INO(sbi)) {
2544
			dec_page_count(sbi, F2FS_DIRTY_META);
2545
		} else if (inode->i_ino == F2FS_NODE_INO(sbi)) {
2546
			dec_page_count(sbi, F2FS_DIRTY_NODES);
2547
		} else {
2548
			inode_dec_dirty_pages(inode);
C
Chao Yu 已提交
2549
			f2fs_remove_dirty_inode(inode);
2550
		}
2551
	}
C
Chao Yu 已提交
2552 2553 2554

	/* This is atomic written page, keep Private */
	if (IS_ATOMIC_WRITTEN_PAGE(page))
C
Chao Yu 已提交
2555
		return f2fs_drop_inmem_page(inode, page);
C
Chao Yu 已提交
2556

2557
	set_page_private(page, 0);
2558 2559 2560
	ClearPagePrivate(page);
}

2561
int f2fs_release_page(struct page *page, gfp_t wait)
2562
{
2563 2564 2565 2566
	/* If this is dirty page, keep PagePrivate */
	if (PageDirty(page))
		return 0;

C
Chao Yu 已提交
2567 2568 2569 2570
	/* This is atomic written page, keep Private */
	if (IS_ATOMIC_WRITTEN_PAGE(page))
		return 0;

2571
	set_page_private(page, 0);
2572
	ClearPagePrivate(page);
2573
	return 1;
2574 2575 2576 2577 2578 2579 2580
}

static int f2fs_set_data_page_dirty(struct page *page)
{
	struct address_space *mapping = page->mapping;
	struct inode *inode = mapping->host;

2581 2582
	trace_f2fs_set_page_dirty(page, DATA);

2583 2584
	if (!PageUptodate(page))
		SetPageUptodate(page);
2585

2586 2587 2588 2589
	/* don't remain PG_checked flag which was set during GC */
	if (is_cold_data(page))
		clear_cold_data(page);

C
Chao Yu 已提交
2590
	if (f2fs_is_atomic_file(inode) && !f2fs_is_commit_atomic_write(inode)) {
C
Chao Yu 已提交
2591
		if (!IS_ATOMIC_WRITTEN_PAGE(page)) {
C
Chao Yu 已提交
2592
			f2fs_register_inmem_page(inode, page);
C
Chao Yu 已提交
2593 2594 2595 2596 2597 2598 2599
			return 1;
		}
		/*
		 * Previously, this page has been registered, we just
		 * return here.
		 */
		return 0;
2600 2601
	}

2602
	if (!PageDirty(page)) {
2603
		__set_page_dirty_nobuffers(page);
C
Chao Yu 已提交
2604
		f2fs_update_dirty_page(inode, page);
2605 2606 2607 2608 2609
		return 1;
	}
	return 0;
}

J
Jaegeuk Kim 已提交
2610 2611
static sector_t f2fs_bmap(struct address_space *mapping, sector_t block)
{
2612 2613
	struct inode *inode = mapping->host;

J
Jaegeuk Kim 已提交
2614 2615 2616 2617 2618 2619 2620
	if (f2fs_has_inline_data(inode))
		return 0;

	/* make sure allocating whole blocks */
	if (mapping_tagged(mapping, PAGECACHE_TAG_DIRTY))
		filemap_write_and_wait(mapping);

C
Chao Yu 已提交
2621
	return generic_block_bmap(mapping, block, get_data_block_bmap);
2622 2623
}

2624 2625 2626 2627 2628 2629 2630 2631 2632 2633 2634 2635 2636
#ifdef CONFIG_MIGRATION
#include <linux/migrate.h>

int f2fs_migrate_page(struct address_space *mapping,
		struct page *newpage, struct page *page, enum migrate_mode mode)
{
	int rc, extra_count;
	struct f2fs_inode_info *fi = F2FS_I(mapping->host);
	bool atomic_written = IS_ATOMIC_WRITTEN_PAGE(page);

	BUG_ON(PageWriteback(page));

	/* migrating an atomic written page is safe with the inmem_lock hold */
2637 2638 2639 2640 2641 2642
	if (atomic_written) {
		if (mode != MIGRATE_SYNC)
			return -EBUSY;
		if (!mutex_trylock(&fi->inmem_lock))
			return -EAGAIN;
	}
2643 2644 2645 2646 2647 2648 2649 2650 2651 2652 2653 2654 2655 2656 2657 2658 2659 2660 2661 2662 2663 2664 2665 2666 2667 2668 2669 2670 2671 2672 2673

	/*
	 * A reference is expected if PagePrivate set when move mapping,
	 * however F2FS breaks this for maintaining dirty page counts when
	 * truncating pages. So here adjusting the 'extra_count' make it work.
	 */
	extra_count = (atomic_written ? 1 : 0) - page_has_private(page);
	rc = migrate_page_move_mapping(mapping, newpage,
				page, NULL, mode, extra_count);
	if (rc != MIGRATEPAGE_SUCCESS) {
		if (atomic_written)
			mutex_unlock(&fi->inmem_lock);
		return rc;
	}

	if (atomic_written) {
		struct inmem_pages *cur;
		list_for_each_entry(cur, &fi->inmem_pages, list)
			if (cur->page == page) {
				cur->page = newpage;
				break;
			}
		mutex_unlock(&fi->inmem_lock);
		put_page(page);
		get_page(newpage);
	}

	if (PagePrivate(page))
		SetPagePrivate(newpage);
	set_page_private(newpage, page_private(page));

2674 2675 2676 2677
	if (mode != MIGRATE_SYNC_NO_COPY)
		migrate_page_copy(newpage, page);
	else
		migrate_page_states(newpage, page);
2678 2679 2680 2681 2682

	return MIGRATEPAGE_SUCCESS;
}
#endif

2683 2684 2685 2686 2687 2688
const struct address_space_operations f2fs_dblock_aops = {
	.readpage	= f2fs_read_data_page,
	.readpages	= f2fs_read_data_pages,
	.writepage	= f2fs_write_data_page,
	.writepages	= f2fs_write_data_pages,
	.write_begin	= f2fs_write_begin,
2689
	.write_end	= f2fs_write_end,
2690
	.set_page_dirty	= f2fs_set_data_page_dirty,
2691 2692
	.invalidatepage	= f2fs_invalidate_page,
	.releasepage	= f2fs_release_page,
2693
	.direct_IO	= f2fs_direct_IO,
J
Jaegeuk Kim 已提交
2694
	.bmap		= f2fs_bmap,
2695 2696 2697
#ifdef CONFIG_MIGRATION
	.migratepage    = f2fs_migrate_page,
#endif
2698
};
2699

C
Chao Yu 已提交
2700
void f2fs_clear_radix_tree_dirty_tag(struct page *page)
2701 2702 2703 2704 2705 2706 2707 2708 2709 2710
{
	struct address_space *mapping = page_mapping(page);
	unsigned long flags;

	xa_lock_irqsave(&mapping->i_pages, flags);
	radix_tree_tag_clear(&mapping->i_pages, page_index(page),
						PAGECACHE_TAG_DIRTY);
	xa_unlock_irqrestore(&mapping->i_pages, flags);
}

2711 2712 2713 2714 2715 2716 2717 2718 2719 2720 2721 2722 2723 2724 2725 2726 2727 2728 2729 2730 2731 2732 2733
int __init f2fs_init_post_read_processing(void)
{
	bio_post_read_ctx_cache = KMEM_CACHE(bio_post_read_ctx, 0);
	if (!bio_post_read_ctx_cache)
		goto fail;
	bio_post_read_ctx_pool =
		mempool_create_slab_pool(NUM_PREALLOC_POST_READ_CTXS,
					 bio_post_read_ctx_cache);
	if (!bio_post_read_ctx_pool)
		goto fail_free_cache;
	return 0;

fail_free_cache:
	kmem_cache_destroy(bio_post_read_ctx_cache);
fail:
	return -ENOMEM;
}

void __exit f2fs_destroy_post_read_processing(void)
{
	mempool_destroy(bio_post_read_ctx_pool);
	kmem_cache_destroy(bio_post_read_ctx_cache);
}