data.c 64.3 KB
Newer Older
J
Jaegeuk Kim 已提交
1
/*
2 3 4 5 6 7 8 9 10 11 12 13 14 15 16
 * fs/f2fs/data.c
 *
 * Copyright (c) 2012 Samsung Electronics Co., Ltd.
 *             http://www.samsung.com/
 *
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License version 2 as
 * published by the Free Software Foundation.
 */
#include <linux/fs.h>
#include <linux/f2fs_fs.h>
#include <linux/buffer_head.h>
#include <linux/mpage.h>
#include <linux/writeback.h>
#include <linux/backing-dev.h>
C
Chao Yu 已提交
17
#include <linux/pagevec.h>
18 19
#include <linux/blkdev.h>
#include <linux/bio.h>
20
#include <linux/prefetch.h>
21
#include <linux/uio.h>
J
Jaegeuk Kim 已提交
22
#include <linux/cleancache.h>
23
#include <linux/sched/signal.h>
24 25 26 27

#include "f2fs.h"
#include "node.h"
#include "segment.h"
J
Jaegeuk Kim 已提交
28
#include "trace.h"
29
#include <trace/events/f2fs.h>
30

31 32 33 34 35
#define NUM_PREALLOC_POST_READ_CTXS	128

static struct kmem_cache *bio_post_read_ctx_cache;
static mempool_t *bio_post_read_ctx_pool;

36 37 38 39 40 41 42 43 44 45 46 47 48 49 50
static bool __is_cp_guaranteed(struct page *page)
{
	struct address_space *mapping = page->mapping;
	struct inode *inode;
	struct f2fs_sb_info *sbi;

	if (!mapping)
		return false;

	inode = mapping->host;
	sbi = F2FS_I_SB(inode);

	if (inode->i_ino == F2FS_META_INO(sbi) ||
			inode->i_ino ==  F2FS_NODE_INO(sbi) ||
			S_ISDIR(inode->i_mode) ||
51 52
			(S_ISREG(inode->i_mode) &&
			is_inode_flag_set(inode, FI_ATOMIC_FILE)) ||
53 54 55 56 57
			is_cold_data(page))
		return true;
	return false;
}

58 59 60 61 62 63 64 65 66 67 68 69 70 71
/* postprocessing steps for read bios */
enum bio_post_read_step {
	STEP_INITIAL = 0,
	STEP_DECRYPT,
};

struct bio_post_read_ctx {
	struct bio *bio;
	struct work_struct work;
	unsigned int cur_step;
	unsigned int enabled_steps;
};

static void __read_end_io(struct bio *bio)
72
{
73 74
	struct page *page;
	struct bio_vec *bv;
75
	int i;
76

77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128
	bio_for_each_segment_all(bv, bio, i) {
		page = bv->bv_page;

		/* PG_error was set if any post_read step failed */
		if (bio->bi_status || PageError(page)) {
			ClearPageUptodate(page);
			SetPageError(page);
		} else {
			SetPageUptodate(page);
		}
		unlock_page(page);
	}
	if (bio->bi_private)
		mempool_free(bio->bi_private, bio_post_read_ctx_pool);
	bio_put(bio);
}

static void bio_post_read_processing(struct bio_post_read_ctx *ctx);

static void decrypt_work(struct work_struct *work)
{
	struct bio_post_read_ctx *ctx =
		container_of(work, struct bio_post_read_ctx, work);

	fscrypt_decrypt_bio(ctx->bio);

	bio_post_read_processing(ctx);
}

static void bio_post_read_processing(struct bio_post_read_ctx *ctx)
{
	switch (++ctx->cur_step) {
	case STEP_DECRYPT:
		if (ctx->enabled_steps & (1 << STEP_DECRYPT)) {
			INIT_WORK(&ctx->work, decrypt_work);
			fscrypt_enqueue_decrypt_work(&ctx->work);
			return;
		}
		ctx->cur_step++;
		/* fall-through */
	default:
		__read_end_io(ctx->bio);
	}
}

static bool f2fs_bio_post_read_required(struct bio *bio)
{
	return bio->bi_private && !bio->bi_status;
}

static void f2fs_read_end_io(struct bio *bio)
{
129
	if (time_to_inject(F2FS_P_SB(bio_first_page_all(bio)), FAULT_IO)) {
130
		f2fs_show_injection_info(FAULT_IO);
131
		bio->bi_status = BLK_STS_IOERR;
132
	}
C
Chao Yu 已提交
133

134 135
	if (f2fs_bio_post_read_required(bio)) {
		struct bio_post_read_ctx *ctx = bio->bi_private;
J
Jaegeuk Kim 已提交
136

137 138 139
		ctx->cur_step = STEP_INITIAL;
		bio_post_read_processing(ctx);
		return;
J
Jaegeuk Kim 已提交
140
	}
141 142

	__read_end_io(bio);
J
Jaegeuk Kim 已提交
143 144
}

145
static void f2fs_write_end_io(struct bio *bio)
146
{
147
	struct f2fs_sb_info *sbi = bio->bi_private;
148 149
	struct bio_vec *bvec;
	int i;
150

151
	bio_for_each_segment_all(bvec, bio, i) {
152
		struct page *page = bvec->bv_page;
153
		enum count_type type = WB_DATA_TYPE(page);
154

155 156 157 158 159 160
		if (IS_DUMMY_WRITTEN_PAGE(page)) {
			set_page_private(page, (unsigned long)NULL);
			ClearPagePrivate(page);
			unlock_page(page);
			mempool_free(page, sbi->write_io_dummy);

161
			if (unlikely(bio->bi_status))
162 163 164 165
				f2fs_stop_checkpoint(sbi, true);
			continue;
		}

166
		fscrypt_pullback_bio_page(&page, true);
167

168
		if (unlikely(bio->bi_status)) {
169
			mapping_set_error(page->mapping, -EIO);
170 171
			if (type == F2FS_WB_CP_DATA)
				f2fs_stop_checkpoint(sbi, true);
172
		}
173 174 175 176

		f2fs_bug_on(sbi, page->mapping == NODE_MAPPING(sbi) &&
					page->index != nid_of_node(page));

177
		dec_page_count(sbi, type);
178 179
		if (f2fs_in_warm_node_list(sbi, page))
			f2fs_del_fsync_node_entry(sbi, page);
180
		clear_cold_data(page);
181
		end_page_writeback(page);
182
	}
183
	if (!get_pages(sbi, F2FS_WB_CP_DATA) &&
184
				wq_has_sleeper(&sbi->cp_wait))
185 186 187 188 189
		wake_up(&sbi->cp_wait);

	bio_put(bio);
}

J
Jaegeuk Kim 已提交
190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207
/*
 * Return true, if pre_bio's bdev is same as its target device.
 */
struct block_device *f2fs_target_device(struct f2fs_sb_info *sbi,
				block_t blk_addr, struct bio *bio)
{
	struct block_device *bdev = sbi->sb->s_bdev;
	int i;

	for (i = 0; i < sbi->s_ndevs; i++) {
		if (FDEV(i).start_blk <= blk_addr &&
					FDEV(i).end_blk >= blk_addr) {
			blk_addr -= FDEV(i).start_blk;
			bdev = FDEV(i).bdev;
			break;
		}
	}
	if (bio) {
208
		bio_set_dev(bio, bdev);
J
Jaegeuk Kim 已提交
209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226
		bio->bi_iter.bi_sector = SECTOR_FROM_BLOCK(blk_addr);
	}
	return bdev;
}

int f2fs_target_device_index(struct f2fs_sb_info *sbi, block_t blkaddr)
{
	int i;

	for (i = 0; i < sbi->s_ndevs; i++)
		if (FDEV(i).start_blk <= blkaddr && FDEV(i).end_blk >= blkaddr)
			return i;
	return 0;
}

static bool __same_bdev(struct f2fs_sb_info *sbi,
				block_t blk_addr, struct bio *bio)
{
227 228
	struct block_device *b = f2fs_target_device(sbi, blk_addr, NULL);
	return bio->bi_disk == b->bd_disk && bio->bi_partno == b->bd_partno;
J
Jaegeuk Kim 已提交
229 230
}

231 232 233 234
/*
 * Low-level block read/write IO operations.
 */
static struct bio *__bio_alloc(struct f2fs_sb_info *sbi, block_t blk_addr,
235
				struct writeback_control *wbc,
236 237
				int npages, bool is_read,
				enum page_type type, enum temp_type temp)
238 239 240
{
	struct bio *bio;

241
	bio = f2fs_bio_alloc(sbi, npages, true);
242

J
Jaegeuk Kim 已提交
243
	f2fs_target_device(sbi, blk_addr, bio);
244 245 246 247 248 249
	if (is_read) {
		bio->bi_end_io = f2fs_read_end_io;
		bio->bi_private = NULL;
	} else {
		bio->bi_end_io = f2fs_write_end_io;
		bio->bi_private = sbi;
C
Chao Yu 已提交
250
		bio->bi_write_hint = f2fs_io_type_to_rw_hint(sbi, type, temp);
251
	}
252 253
	if (wbc)
		wbc_init_bio(wbc, bio);
254 255 256 257

	return bio;
}

258 259
static inline void __submit_bio(struct f2fs_sb_info *sbi,
				struct bio *bio, enum page_type type)
260
{
261
	if (!is_read_io(bio_op(bio))) {
262 263 264 265 266
		unsigned int start;

		if (type != DATA && type != NODE)
			goto submit_io;

267
		if (test_opt(sbi, LFS) && current->plug)
268 269
			blk_finish_plug(current->plug);

270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294
		start = bio->bi_iter.bi_size >> F2FS_BLKSIZE_BITS;
		start %= F2FS_IO_SIZE(sbi);

		if (start == 0)
			goto submit_io;

		/* fill dummy pages */
		for (; start < F2FS_IO_SIZE(sbi); start++) {
			struct page *page =
				mempool_alloc(sbi->write_io_dummy,
					GFP_NOIO | __GFP_ZERO | __GFP_NOFAIL);
			f2fs_bug_on(sbi, !page);

			SetPagePrivate(page);
			set_page_private(page, (unsigned long)DUMMY_WRITTEN_PAGE);
			lock_page(page);
			if (bio_add_page(bio, page, PAGE_SIZE, 0) < PAGE_SIZE)
				f2fs_bug_on(sbi, 1);
		}
		/*
		 * In the NODE case, we lose next block address chain. So, we
		 * need to do checkpoint in f2fs_sync_file.
		 */
		if (type == NODE)
			set_sbi_flag(sbi, SBI_NEED_CP);
J
Jaegeuk Kim 已提交
295
	}
296
submit_io:
J
Jaegeuk Kim 已提交
297 298 299 300
	if (is_read_io(bio_op(bio)))
		trace_f2fs_submit_read_bio(sbi->sb, type, bio);
	else
		trace_f2fs_submit_write_bio(sbi->sb, type, bio);
301
	submit_bio(bio);
302 303
}

J
Jaegeuk Kim 已提交
304
static void __submit_merged_bio(struct f2fs_bio_info *io)
305
{
J
Jaegeuk Kim 已提交
306
	struct f2fs_io_info *fio = &io->fio;
307 308 309 310

	if (!io->bio)
		return;

J
Jaegeuk Kim 已提交
311 312
	bio_set_op_attrs(io->bio, fio->op, fio->op_flags);

M
Mike Christie 已提交
313
	if (is_read_io(fio->op))
J
Jaegeuk Kim 已提交
314
		trace_f2fs_prepare_read_bio(io->sbi->sb, fio->type, io->bio);
315
	else
J
Jaegeuk Kim 已提交
316
		trace_f2fs_prepare_write_bio(io->sbi->sb, fio->type, io->bio);
M
Mike Christie 已提交
317

318
	__submit_bio(io->sbi, io->bio, fio->type);
319 320 321
	io->bio = NULL;
}

322 323
static bool __has_merged_page(struct f2fs_bio_info *io,
				struct inode *inode, nid_t ino, pgoff_t idx)
C
Chao Yu 已提交
324 325 326 327 328
{
	struct bio_vec *bvec;
	struct page *target;
	int i;

329
	if (!io->bio)
C
Chao Yu 已提交
330
		return false;
331

332
	if (!inode && !ino)
333
		return true;
C
Chao Yu 已提交
334 335 336

	bio_for_each_segment_all(bvec, io->bio, i) {

337
		if (bvec->bv_page->mapping)
C
Chao Yu 已提交
338
			target = bvec->bv_page;
339 340
		else
			target = fscrypt_control_page(bvec->bv_page);
C
Chao Yu 已提交
341

342 343 344
		if (idx != target->index)
			continue;

345 346 347
		if (inode && inode == target->mapping->host)
			return true;
		if (ino && ino == ino_of_node(target))
C
Chao Yu 已提交
348 349 350 351 352 353
			return true;
	}

	return false;
}

354
static bool has_merged_page(struct f2fs_sb_info *sbi, struct inode *inode,
355
				nid_t ino, pgoff_t idx, enum page_type type)
356 357
{
	enum page_type btype = PAGE_TYPE_OF_BIO(type);
J
Jaegeuk Kim 已提交
358 359 360
	enum temp_type temp;
	struct f2fs_bio_info *io;
	bool ret = false;
361

J
Jaegeuk Kim 已提交
362 363 364 365 366 367
	for (temp = HOT; temp < NR_TEMP_TYPE; temp++) {
		io = sbi->write_io[btype] + temp;

		down_read(&io->io_rwsem);
		ret = __has_merged_page(io, inode, ino, idx);
		up_read(&io->io_rwsem);
368

J
Jaegeuk Kim 已提交
369 370 371 372
		/* TODO: use HOT temp only for meta pages now. */
		if (ret || btype == META)
			break;
	}
373 374 375
	return ret;
}

376
static void __f2fs_submit_merged_write(struct f2fs_sb_info *sbi,
J
Jaegeuk Kim 已提交
377
				enum page_type type, enum temp_type temp)
378 379
{
	enum page_type btype = PAGE_TYPE_OF_BIO(type);
J
Jaegeuk Kim 已提交
380
	struct f2fs_bio_info *io = sbi->write_io[btype] + temp;
381

382
	down_write(&io->io_rwsem);
J
Jaegeuk Kim 已提交
383 384 385 386

	/* change META to META_FLUSH in the checkpoint procedure */
	if (type >= META_FLUSH) {
		io->fio.type = META_FLUSH;
M
Mike Christie 已提交
387
		io->fio.op = REQ_OP_WRITE;
388
		io->fio.op_flags = REQ_META | REQ_PRIO | REQ_SYNC;
389
		if (!test_opt(sbi, NOBARRIER))
390
			io->fio.op_flags |= REQ_PREFLUSH | REQ_FUA;
J
Jaegeuk Kim 已提交
391 392
	}
	__submit_merged_bio(io);
393
	up_write(&io->io_rwsem);
394 395
}

J
Jaegeuk Kim 已提交
396 397 398
static void __submit_merged_write_cond(struct f2fs_sb_info *sbi,
				struct inode *inode, nid_t ino, pgoff_t idx,
				enum page_type type, bool force)
399
{
J
Jaegeuk Kim 已提交
400 401 402 403 404 405 406 407 408 409 410 411 412
	enum temp_type temp;

	if (!force && !has_merged_page(sbi, inode, ino, idx, type))
		return;

	for (temp = HOT; temp < NR_TEMP_TYPE; temp++) {

		__f2fs_submit_merged_write(sbi, type, temp);

		/* TODO: use HOT temp only for meta pages now. */
		if (type >= META)
			break;
	}
413 414
}

415
void f2fs_submit_merged_write(struct f2fs_sb_info *sbi, enum page_type type)
416
{
J
Jaegeuk Kim 已提交
417
	__submit_merged_write_cond(sbi, NULL, 0, 0, type, true);
418 419
}

420
void f2fs_submit_merged_write_cond(struct f2fs_sb_info *sbi,
421
				struct inode *inode, nid_t ino, pgoff_t idx,
422
				enum page_type type)
423
{
J
Jaegeuk Kim 已提交
424
	__submit_merged_write_cond(sbi, inode, ino, idx, type, false);
425 426
}

427
void f2fs_flush_merged_writes(struct f2fs_sb_info *sbi)
428
{
429 430 431
	f2fs_submit_merged_write(sbi, DATA);
	f2fs_submit_merged_write(sbi, NODE);
	f2fs_submit_merged_write(sbi, META);
432 433
}

434 435
/*
 * Fill the locked page with data located in the block address.
436
 * A caller needs to unlock the page on failure.
437
 */
438
int f2fs_submit_page_bio(struct f2fs_io_info *fio)
439 440
{
	struct bio *bio;
441 442
	struct page *page = fio->encrypted_page ?
			fio->encrypted_page : fio->page;
443

444 445 446 447
	if (!f2fs_is_valid_blkaddr(fio->sbi, fio->new_blkaddr,
			__is_meta_io(fio) ? META_GENERIC : DATA_GENERIC))
		return -EFAULT;

448
	trace_f2fs_submit_page_bio(page, fio);
449
	f2fs_trace_ios(fio, 0);
450 451

	/* Allocate a new bio */
452
	bio = __bio_alloc(fio->sbi, fio->new_blkaddr, fio->io_wbc,
453
				1, is_read_io(fio->op), fio->type, fio->temp);
454

455
	if (bio_add_page(bio, page, PAGE_SIZE, 0) < PAGE_SIZE) {
456 457 458
		bio_put(bio);
		return -EFAULT;
	}
M
Mike Christie 已提交
459
	bio_set_op_attrs(bio, fio->op, fio->op_flags);
460

461
	__submit_bio(fio->sbi, bio, fio->type);
462 463 464

	if (!is_read_io(fio->op))
		inc_page_count(fio->sbi, WB_DATA_TYPE(fio->page));
465 466 467
	return 0;
}

468
void f2fs_submit_page_write(struct f2fs_io_info *fio)
469
{
470
	struct f2fs_sb_info *sbi = fio->sbi;
J
Jaegeuk Kim 已提交
471
	enum page_type btype = PAGE_TYPE_OF_BIO(fio->type);
J
Jaegeuk Kim 已提交
472
	struct f2fs_bio_info *io = sbi->write_io[btype] + fio->temp;
473
	struct page *bio_page;
474

475
	f2fs_bug_on(sbi, is_read_io(fio->op));
476

477 478 479 480 481 482
	down_write(&io->io_rwsem);
next:
	if (fio->in_list) {
		spin_lock(&io->io_lock);
		if (list_empty(&io->io_list)) {
			spin_unlock(&io->io_lock);
483
			goto out;
484 485 486 487 488 489
		}
		fio = list_first_entry(&io->io_list,
						struct f2fs_io_info, list);
		list_del(&fio->list);
		spin_unlock(&io->io_lock);
	}
490

491
	if (__is_valid_data_blkaddr(fio->old_blkaddr))
492 493
		verify_block_addr(fio, fio->old_blkaddr);
	verify_block_addr(fio, fio->new_blkaddr);
494

495 496
	bio_page = fio->encrypted_page ? fio->encrypted_page : fio->page;

497 498
	/* set submitted = true as a return value */
	fio->submitted = true;
499

500
	inc_page_count(sbi, WB_DATA_TYPE(bio_page));
501

502
	if (io->bio && (io->last_block_in_bio != fio->new_blkaddr - 1 ||
J
Jaegeuk Kim 已提交
503 504
	    (io->fio.op != fio->op || io->fio.op_flags != fio->op_flags) ||
			!__same_bdev(sbi, fio->new_blkaddr, io->bio)))
J
Jaegeuk Kim 已提交
505
		__submit_merged_bio(io);
506 507
alloc_new:
	if (io->bio == NULL) {
508 509
		if ((fio->type == DATA || fio->type == NODE) &&
				fio->new_blkaddr & F2FS_IO_SIZE_MASK(sbi)) {
510
			dec_page_count(sbi, WB_DATA_TYPE(bio_page));
511 512
			fio->retry = true;
			goto skip;
513
		}
514
		io->bio = __bio_alloc(sbi, fio->new_blkaddr, fio->io_wbc,
515 516
						BIO_MAX_PAGES, false,
						fio->type, fio->temp);
J
Jaegeuk Kim 已提交
517
		io->fio = *fio;
518 519
	}

J
Jaegeuk Kim 已提交
520
	if (bio_add_page(io->bio, bio_page, PAGE_SIZE, 0) < PAGE_SIZE) {
J
Jaegeuk Kim 已提交
521
		__submit_merged_bio(io);
522 523 524
		goto alloc_new;
	}

525 526 527
	if (fio->io_wbc)
		wbc_account_io(fio->io_wbc, bio_page, PAGE_SIZE);

528
	io->last_block_in_bio = fio->new_blkaddr;
529
	f2fs_trace_ios(fio, 0);
530 531

	trace_f2fs_submit_page_write(fio->page, fio);
532
skip:
533 534
	if (fio->in_list)
		goto next;
535
out:
536
	up_write(&io->io_rwsem);
537 538
}

539
static struct bio *f2fs_grab_read_bio(struct inode *inode, block_t blkaddr,
540
					unsigned nr_pages, unsigned op_flag)
541 542 543
{
	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
	struct bio *bio;
544 545
	struct bio_post_read_ctx *ctx;
	unsigned int post_read_steps = 0;
546

547 548 549
	if (!f2fs_is_valid_blkaddr(sbi, blkaddr, DATA_GENERIC))
		return ERR_PTR(-EFAULT);

550
	bio = f2fs_bio_alloc(sbi, min_t(int, nr_pages, BIO_MAX_PAGES), false);
551
	if (!bio)
552 553 554
		return ERR_PTR(-ENOMEM);
	f2fs_target_device(sbi, blkaddr, bio);
	bio->bi_end_io = f2fs_read_end_io;
555
	bio_set_op_attrs(bio, REQ_OP_READ, op_flag);
556

557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572
	if (f2fs_encrypted_file(inode))
		post_read_steps |= 1 << STEP_DECRYPT;
	if (post_read_steps) {
		ctx = mempool_alloc(bio_post_read_ctx_pool, GFP_NOFS);
		if (!ctx) {
			bio_put(bio);
			return ERR_PTR(-ENOMEM);
		}
		ctx->bio = bio;
		ctx->enabled_steps = post_read_steps;
		bio->bi_private = ctx;

		/* wait the page to be moved by cleaning */
		f2fs_wait_on_block_writeback(sbi, blkaddr);
	}

573 574 575 576 577 578 579
	return bio;
}

/* This can handle encryption stuffs */
static int f2fs_submit_page_read(struct inode *inode, struct page *page,
							block_t blkaddr)
{
580
	struct bio *bio = f2fs_grab_read_bio(inode, blkaddr, 1, 0);
581 582 583 584 585 586 587 588 589 590 591 592

	if (IS_ERR(bio))
		return PTR_ERR(bio);

	if (bio_add_page(bio, page, PAGE_SIZE, 0) < PAGE_SIZE) {
		bio_put(bio);
		return -EFAULT;
	}
	__submit_bio(F2FS_I_SB(inode), bio, DATA);
	return 0;
}

593 594 595 596
static void __set_data_blkaddr(struct dnode_of_data *dn)
{
	struct f2fs_node *rn = F2FS_NODE(dn->node_page);
	__le32 *addr_array;
597 598 599 600
	int base = 0;

	if (IS_INODE(dn->node_page) && f2fs_has_extra_attr(dn->inode))
		base = get_extra_isize(dn->inode);
601 602 603

	/* Get physical address of data block */
	addr_array = blkaddr_in_node(rn);
604
	addr_array[base + dn->ofs_in_node] = cpu_to_le32(dn->data_blkaddr);
605 606
}

J
Jaegeuk Kim 已提交
607
/*
608 609 610 611 612
 * Lock ordering for the change of data block address:
 * ->data_page
 *  ->node_page
 *    update block addresses in the node page
 */
C
Chao Yu 已提交
613
void f2fs_set_data_blkaddr(struct dnode_of_data *dn)
614
{
615 616 617
	f2fs_wait_on_page_writeback(dn->node_page, NODE, true);
	__set_data_blkaddr(dn);
	if (set_page_dirty(dn->node_page))
618
		dn->node_changed = true;
619 620
}

621 622 623
void f2fs_update_data_blkaddr(struct dnode_of_data *dn, block_t blkaddr)
{
	dn->data_blkaddr = blkaddr;
C
Chao Yu 已提交
624
	f2fs_set_data_blkaddr(dn);
625 626 627
	f2fs_update_extent_cache(dn);
}

628
/* dn->ofs_in_node will be returned with up-to-date last block pointer */
C
Chao Yu 已提交
629
int f2fs_reserve_new_blocks(struct dnode_of_data *dn, blkcnt_t count)
630
{
631
	struct f2fs_sb_info *sbi = F2FS_I_SB(dn->inode);
C
Chao Yu 已提交
632
	int err;
633

634 635 636
	if (!count)
		return 0;

637
	if (unlikely(is_inode_flag_set(dn->inode, FI_NO_ALLOC)))
638
		return -EPERM;
C
Chao Yu 已提交
639 640
	if (unlikely((err = inc_valid_block_count(sbi, dn->inode, &count))))
		return err;
641

642 643 644 645 646 647
	trace_f2fs_reserve_new_blocks(dn->inode, dn->nid,
						dn->ofs_in_node, count);

	f2fs_wait_on_page_writeback(dn->node_page, NODE, true);

	for (; count > 0; dn->ofs_in_node++) {
648 649
		block_t blkaddr = datablock_addr(dn->inode,
					dn->node_page, dn->ofs_in_node);
650 651 652 653 654 655 656 657 658
		if (blkaddr == NULL_ADDR) {
			dn->data_blkaddr = NEW_ADDR;
			__set_data_blkaddr(dn);
			count--;
		}
	}

	if (set_page_dirty(dn->node_page))
		dn->node_changed = true;
659 660 661
	return 0;
}

662
/* Should keep dn->ofs_in_node unchanged */
C
Chao Yu 已提交
663
int f2fs_reserve_new_block(struct dnode_of_data *dn)
664 665 666 667
{
	unsigned int ofs_in_node = dn->ofs_in_node;
	int ret;

C
Chao Yu 已提交
668
	ret = f2fs_reserve_new_blocks(dn, 1);
669 670 671 672
	dn->ofs_in_node = ofs_in_node;
	return ret;
}

673 674 675 676 677
int f2fs_reserve_block(struct dnode_of_data *dn, pgoff_t index)
{
	bool need_put = dn->inode_page ? false : true;
	int err;

C
Chao Yu 已提交
678
	err = f2fs_get_dnode_of_data(dn, index, ALLOC_NODE);
679 680
	if (err)
		return err;
681

682
	if (dn->data_blkaddr == NULL_ADDR)
C
Chao Yu 已提交
683
		err = f2fs_reserve_new_block(dn);
684
	if (err || need_put)
685 686 687 688
		f2fs_put_dnode(dn);
	return err;
}

689
int f2fs_get_block(struct dnode_of_data *dn, pgoff_t index)
690
{
691
	struct extent_info ei  = {0,0,0};
692
	struct inode *inode = dn->inode;
693

694 695 696
	if (f2fs_lookup_extent_cache(inode, index, &ei)) {
		dn->data_blkaddr = ei.blk + index - ei.fofs;
		return 0;
697
	}
698

699
	return f2fs_reserve_block(dn, index);
700 701
}

C
Chao Yu 已提交
702
struct page *f2fs_get_read_data_page(struct inode *inode, pgoff_t index,
M
Mike Christie 已提交
703
						int op_flags, bool for_write)
704 705 706 707
{
	struct address_space *mapping = inode->i_mapping;
	struct dnode_of_data dn;
	struct page *page;
708
	struct extent_info ei = {0,0,0};
709
	int err;
710

711
	page = f2fs_grab_cache_page(mapping, index, for_write);
712 713 714
	if (!page)
		return ERR_PTR(-ENOMEM);

C
Chao Yu 已提交
715 716 717 718 719
	if (f2fs_lookup_extent_cache(inode, index, &ei)) {
		dn.data_blkaddr = ei.blk + index - ei.fofs;
		goto got_it;
	}

720
	set_new_dnode(&dn, inode, NULL, NULL, 0);
C
Chao Yu 已提交
721
	err = f2fs_get_dnode_of_data(&dn, index, LOOKUP_NODE);
722 723
	if (err)
		goto put_err;
724 725
	f2fs_put_dnode(&dn);

726
	if (unlikely(dn.data_blkaddr == NULL_ADDR)) {
727 728
		err = -ENOENT;
		goto put_err;
729
	}
C
Chao Yu 已提交
730
got_it:
731 732
	if (PageUptodate(page)) {
		unlock_page(page);
733
		return page;
734
	}
735

J
Jaegeuk Kim 已提交
736 737 738 739
	/*
	 * A new dentry page is allocated but not able to be written, since its
	 * new inode page couldn't be allocated due to -ENOSPC.
	 * In such the case, its blkaddr can be remained as NEW_ADDR.
C
Chao Yu 已提交
740 741
	 * see, f2fs_add_link -> f2fs_get_new_data_page ->
	 * f2fs_init_inode_metadata.
J
Jaegeuk Kim 已提交
742 743
	 */
	if (dn.data_blkaddr == NEW_ADDR) {
744
		zero_user_segment(page, 0, PAGE_SIZE);
745 746
		if (!PageUptodate(page))
			SetPageUptodate(page);
747
		unlock_page(page);
J
Jaegeuk Kim 已提交
748 749
		return page;
	}
750

751
	err = f2fs_submit_page_read(inode, page, dn.data_blkaddr);
752
	if (err)
753
		goto put_err;
754
	return page;
755 756 757 758

put_err:
	f2fs_put_page(page, 1);
	return ERR_PTR(err);
759 760
}

C
Chao Yu 已提交
761
struct page *f2fs_find_data_page(struct inode *inode, pgoff_t index)
762 763 764 765 766 767 768 769 770
{
	struct address_space *mapping = inode->i_mapping;
	struct page *page;

	page = find_get_page(mapping, index);
	if (page && PageUptodate(page))
		return page;
	f2fs_put_page(page, 0);

C
Chao Yu 已提交
771
	page = f2fs_get_read_data_page(inode, index, 0, false);
772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790
	if (IS_ERR(page))
		return page;

	if (PageUptodate(page))
		return page;

	wait_on_page_locked(page);
	if (unlikely(!PageUptodate(page))) {
		f2fs_put_page(page, 0);
		return ERR_PTR(-EIO);
	}
	return page;
}

/*
 * If it tries to access a hole, return an error.
 * Because, the callers, functions in dir.c and GC, should be able to know
 * whether this page exists or not.
 */
C
Chao Yu 已提交
791
struct page *f2fs_get_lock_data_page(struct inode *inode, pgoff_t index,
792
							bool for_write)
793 794 795 796
{
	struct address_space *mapping = inode->i_mapping;
	struct page *page;
repeat:
C
Chao Yu 已提交
797
	page = f2fs_get_read_data_page(inode, index, 0, for_write);
798 799
	if (IS_ERR(page))
		return page;
800

801
	/* wait for read completion */
802
	lock_page(page);
803
	if (unlikely(page->mapping != mapping)) {
804 805
		f2fs_put_page(page, 1);
		goto repeat;
806
	}
807 808 809 810
	if (unlikely(!PageUptodate(page))) {
		f2fs_put_page(page, 1);
		return ERR_PTR(-EIO);
	}
811 812 813
	return page;
}

J
Jaegeuk Kim 已提交
814
/*
815 816
 * Caller ensures that this data page is never allocated.
 * A new zero-filled data page is allocated in the page cache.
817
 *
C
Chao Yu 已提交
818 819
 * Also, caller should grab and release a rwsem by calling f2fs_lock_op() and
 * f2fs_unlock_op().
820 821
 * Note that, ipage is set only by make_empty_dir, and if any error occur,
 * ipage should be released by this function.
822
 */
C
Chao Yu 已提交
823
struct page *f2fs_get_new_data_page(struct inode *inode,
824
		struct page *ipage, pgoff_t index, bool new_i_size)
825 826 827 828 829
{
	struct address_space *mapping = inode->i_mapping;
	struct page *page;
	struct dnode_of_data dn;
	int err;
830

831
	page = f2fs_grab_cache_page(mapping, index, true);
832 833 834 835 836 837
	if (!page) {
		/*
		 * before exiting, we should make sure ipage will be released
		 * if any error occur.
		 */
		f2fs_put_page(ipage, 1);
838
		return ERR_PTR(-ENOMEM);
839
	}
840

841
	set_new_dnode(&dn, inode, ipage, NULL, 0);
842
	err = f2fs_reserve_block(&dn, index);
843 844
	if (err) {
		f2fs_put_page(page, 1);
845
		return ERR_PTR(err);
846
	}
847 848
	if (!ipage)
		f2fs_put_dnode(&dn);
849 850

	if (PageUptodate(page))
851
		goto got_it;
852 853

	if (dn.data_blkaddr == NEW_ADDR) {
854
		zero_user_segment(page, 0, PAGE_SIZE);
855 856
		if (!PageUptodate(page))
			SetPageUptodate(page);
857
	} else {
858
		f2fs_put_page(page, 1);
859

860 861
		/* if ipage exists, blkaddr should be NEW_ADDR */
		f2fs_bug_on(F2FS_I_SB(inode), ipage);
C
Chao Yu 已提交
862
		page = f2fs_get_lock_data_page(inode, index, true);
863
		if (IS_ERR(page))
864
			return page;
865
	}
866
got_it:
C
Chao Yu 已提交
867
	if (new_i_size && i_size_read(inode) <
868
				((loff_t)(index + 1) << PAGE_SHIFT))
869
		f2fs_i_size_write(inode, ((loff_t)(index + 1) << PAGE_SHIFT));
870 871 872
	return page;
}

873
static int __allocate_data_block(struct dnode_of_data *dn, int seg_type)
874
{
875
	struct f2fs_sb_info *sbi = F2FS_I_SB(dn->inode);
876 877
	struct f2fs_summary sum;
	struct node_info ni;
878
	pgoff_t fofs;
879
	blkcnt_t count = 1;
C
Chao Yu 已提交
880
	int err;
881

882
	if (unlikely(is_inode_flag_set(dn->inode, FI_NO_ALLOC)))
883
		return -EPERM;
884

885 886 887 888
	err = f2fs_get_node_info(sbi, dn->nid, &ni);
	if (err)
		return err;

889 890
	dn->data_blkaddr = datablock_addr(dn->inode,
				dn->node_page, dn->ofs_in_node);
891 892 893
	if (dn->data_blkaddr == NEW_ADDR)
		goto alloc;

C
Chao Yu 已提交
894 895
	if (unlikely((err = inc_valid_block_count(sbi, dn->inode, &count))))
		return err;
896

897
alloc:
898 899
	set_summary(&sum, dn->nid, dn->ofs_in_node, ni.version);

C
Chao Yu 已提交
900
	f2fs_allocate_data_block(sbi, NULL, dn->data_blkaddr, &dn->data_blkaddr,
901
					&sum, seg_type, NULL, false);
C
Chao Yu 已提交
902
	f2fs_set_data_blkaddr(dn);
903

904
	/* update i_size */
C
Chao Yu 已提交
905
	fofs = f2fs_start_bidx_of_node(ofs_of_node(dn->node_page), dn->inode) +
906
							dn->ofs_in_node;
907
	if (i_size_read(dn->inode) < ((loff_t)(fofs + 1) << PAGE_SHIFT))
908
		f2fs_i_size_write(dn->inode,
909
				((loff_t)(fofs + 1) << PAGE_SHIFT));
910 911 912
	return 0;
}

913
int f2fs_preallocate_blocks(struct kiocb *iocb, struct iov_iter *from)
914
{
915
	struct inode *inode = file_inode(iocb->ki_filp);
C
Chao Yu 已提交
916
	struct f2fs_map_blocks map;
917
	int flag;
918
	int err = 0;
919
	bool direct_io = iocb->ki_flags & IOCB_DIRECT;
920

921
	/* convert inline data for Direct I/O*/
922
	if (direct_io) {
923 924 925 926 927
		err = f2fs_convert_inline_inode(inode);
		if (err)
			return err;
	}

928 929 930
	if (is_inode_flag_set(inode, FI_NO_PREALLOC))
		return 0;

931
	map.m_lblk = F2FS_BLK_ALIGN(iocb->ki_pos);
932 933 934 935 936 937
	map.m_len = F2FS_BYTES_TO_BLK(iocb->ki_pos + iov_iter_count(from));
	if (map.m_len > map.m_lblk)
		map.m_len -= map.m_lblk;
	else
		map.m_len = 0;

938
	map.m_next_pgofs = NULL;
939
	map.m_next_extent = NULL;
940
	map.m_seg_type = NO_CHECK_TYPE;
941

942
	if (direct_io) {
C
Chao Yu 已提交
943
		map.m_seg_type = f2fs_rw_hint_to_seg_type(iocb->ki_hint);
H
Hyunchul Lee 已提交
944
		flag = f2fs_force_buffered_io(inode, WRITE) ?
945 946 947
					F2FS_GET_BLOCK_PRE_AIO :
					F2FS_GET_BLOCK_PRE_DIO;
		goto map_blocks;
948
	}
C
Chao Yu 已提交
949
	if (iocb->ki_pos + iov_iter_count(from) > MAX_INLINE_DATA(inode)) {
950 951 952
		err = f2fs_convert_inline_inode(inode);
		if (err)
			return err;
953
	}
954
	if (f2fs_has_inline_data(inode))
955
		return err;
956 957 958 959 960 961 962 963 964

	flag = F2FS_GET_BLOCK_PRE_AIO;

map_blocks:
	err = f2fs_map_blocks(inode, &map, 1, flag);
	if (map.m_len > 0 && err == -ENOSPC) {
		if (!direct_io)
			set_inode_flag(inode, FI_NO_PREALLOC);
		err = 0;
965
	}
966
	return err;
967 968
}

969 970 971 972 973 974 975 976 977 978 979 980 981 982 983
static inline void __do_map_lock(struct f2fs_sb_info *sbi, int flag, bool lock)
{
	if (flag == F2FS_GET_BLOCK_PRE_AIO) {
		if (lock)
			down_read(&sbi->node_change);
		else
			up_read(&sbi->node_change);
	} else {
		if (lock)
			f2fs_lock_op(sbi);
		else
			f2fs_unlock_op(sbi);
	}
}

J
Jaegeuk Kim 已提交
984
/*
J
Jaegeuk Kim 已提交
985 986
 * f2fs_map_blocks() now supported readahead/bmap/rw direct_IO with
 * f2fs_map_blocks structure.
C
Chao Yu 已提交
987 988 989 990 991
 * If original data blocks are allocated, then give them to blockdev.
 * Otherwise,
 *     a. preallocate requested block addresses
 *     b. do not use extent cache for better performance
 *     c. give the block addresses to blockdev
992
 */
C
Chao Yu 已提交
993
int f2fs_map_blocks(struct inode *inode, struct f2fs_map_blocks *map,
C
Chao Yu 已提交
994
						int create, int flag)
995
{
J
Jaegeuk Kim 已提交
996
	unsigned int maxblocks = map->m_len;
997
	struct dnode_of_data dn;
998
	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
999
	int mode = create ? ALLOC_NODE : LOOKUP_NODE;
1000
	pgoff_t pgofs, end_offset, end;
1001
	int err = 0, ofs = 1;
1002 1003
	unsigned int ofs_in_node, last_ofs_in_node;
	blkcnt_t prealloc;
1004
	struct extent_info ei = {0,0,0};
1005
	block_t blkaddr;
1006
	unsigned int start_pgofs;
1007

1008 1009 1010
	if (!maxblocks)
		return 0;

J
Jaegeuk Kim 已提交
1011 1012 1013 1014 1015
	map->m_len = 0;
	map->m_flags = 0;

	/* it only supports block size == page size */
	pgofs =	(pgoff_t)map->m_lblk;
1016
	end = pgofs + maxblocks;
1017

1018
	if (!create && f2fs_lookup_extent_cache(inode, pgofs, &ei)) {
J
Jaegeuk Kim 已提交
1019 1020 1021
		map->m_pblk = ei.blk + pgofs - ei.fofs;
		map->m_len = min((pgoff_t)maxblocks, ei.fofs + ei.len - pgofs);
		map->m_flags = F2FS_MAP_MAPPED;
1022 1023
		if (map->m_next_extent)
			*map->m_next_extent = pgofs + map->m_len;
1024
		goto out;
1025
	}
1026

C
Chao Yu 已提交
1027
next_dnode:
1028
	if (create)
1029
		__do_map_lock(sbi, flag, true);
1030 1031 1032

	/* When reading holes, we need its node page */
	set_new_dnode(&dn, inode, NULL, NULL, 0);
C
Chao Yu 已提交
1033
	err = f2fs_get_dnode_of_data(&dn, pgofs, mode);
1034
	if (err) {
C
Chao Yu 已提交
1035 1036
		if (flag == F2FS_GET_BLOCK_BMAP)
			map->m_pblk = 0;
1037
		if (err == -ENOENT) {
1038
			err = 0;
1039 1040
			if (map->m_next_pgofs)
				*map->m_next_pgofs =
C
Chao Yu 已提交
1041
					f2fs_get_next_page_offset(&dn, pgofs);
1042 1043
			if (map->m_next_extent)
				*map->m_next_extent =
C
Chao Yu 已提交
1044
					f2fs_get_next_page_offset(&dn, pgofs);
1045
		}
1046
		goto unlock_out;
1047
	}
C
Chao Yu 已提交
1048

1049
	start_pgofs = pgofs;
1050
	prealloc = 0;
1051
	last_ofs_in_node = ofs_in_node = dn.ofs_in_node;
1052
	end_offset = ADDRS_PER_PAGE(dn.node_page, inode);
C
Chao Yu 已提交
1053 1054

next_block:
1055
	blkaddr = datablock_addr(dn.inode, dn.node_page, dn.ofs_in_node);
C
Chao Yu 已提交
1056

1057 1058 1059 1060 1061 1062
	if (__is_valid_data_blkaddr(blkaddr) &&
		!f2fs_is_valid_blkaddr(sbi, blkaddr, DATA_GENERIC)) {
		err = -EFAULT;
		goto sync_out;
	}

1063
	if (!is_valid_data_blkaddr(sbi, blkaddr)) {
C
Chao Yu 已提交
1064
		if (create) {
1065 1066
			if (unlikely(f2fs_cp_error(sbi))) {
				err = -EIO;
C
Chao Yu 已提交
1067
				goto sync_out;
1068
			}
1069
			if (flag == F2FS_GET_BLOCK_PRE_AIO) {
1070 1071 1072 1073
				if (blkaddr == NULL_ADDR) {
					prealloc++;
					last_ofs_in_node = dn.ofs_in_node;
				}
1074
			} else {
1075 1076
				err = __allocate_data_block(&dn,
							map->m_seg_type);
1077
				if (!err)
1078
					set_inode_flag(inode, FI_APPEND_WRITE);
1079
			}
C
Chao Yu 已提交
1080
			if (err)
C
Chao Yu 已提交
1081
				goto sync_out;
1082
			map->m_flags |= F2FS_MAP_NEW;
C
Chao Yu 已提交
1083
			blkaddr = dn.data_blkaddr;
C
Chao Yu 已提交
1084
		} else {
C
Chao Yu 已提交
1085 1086 1087 1088
			if (flag == F2FS_GET_BLOCK_BMAP) {
				map->m_pblk = 0;
				goto sync_out;
			}
1089 1090
			if (flag == F2FS_GET_BLOCK_PRECACHE)
				goto sync_out;
1091 1092 1093 1094
			if (flag == F2FS_GET_BLOCK_FIEMAP &&
						blkaddr == NULL_ADDR) {
				if (map->m_next_pgofs)
					*map->m_next_pgofs = pgofs + 1;
C
Chao Yu 已提交
1095
				goto sync_out;
1096
			}
1097 1098 1099 1100
			if (flag != F2FS_GET_BLOCK_FIEMAP) {
				/* for defragment case */
				if (map->m_next_pgofs)
					*map->m_next_pgofs = pgofs + 1;
C
Chao Yu 已提交
1101
				goto sync_out;
1102
			}
C
Chao Yu 已提交
1103 1104
		}
	}
1105

1106 1107 1108
	if (flag == F2FS_GET_BLOCK_PRE_AIO)
		goto skip;

C
Chao Yu 已提交
1109 1110 1111 1112 1113 1114 1115 1116 1117 1118
	if (map->m_len == 0) {
		/* preallocated unwritten block should be mapped for fiemap. */
		if (blkaddr == NEW_ADDR)
			map->m_flags |= F2FS_MAP_UNWRITTEN;
		map->m_flags |= F2FS_MAP_MAPPED;

		map->m_pblk = blkaddr;
		map->m_len = 1;
	} else if ((map->m_pblk != NEW_ADDR &&
			blkaddr == (map->m_pblk + ofs)) ||
1119
			(map->m_pblk == NEW_ADDR && blkaddr == NEW_ADDR) ||
1120
			flag == F2FS_GET_BLOCK_PRE_DIO) {
C
Chao Yu 已提交
1121 1122 1123 1124 1125
		ofs++;
		map->m_len++;
	} else {
		goto sync_out;
	}
1126

1127
skip:
1128 1129 1130
	dn.ofs_in_node++;
	pgofs++;

1131 1132 1133
	/* preallocate blocks in batch for one dnode page */
	if (flag == F2FS_GET_BLOCK_PRE_AIO &&
			(pgofs == end || dn.ofs_in_node == end_offset)) {
1134

1135
		dn.ofs_in_node = ofs_in_node;
C
Chao Yu 已提交
1136
		err = f2fs_reserve_new_blocks(&dn, prealloc);
1137 1138
		if (err)
			goto sync_out;
1139

1140 1141 1142 1143
		map->m_len += dn.ofs_in_node - ofs_in_node;
		if (prealloc && dn.ofs_in_node != last_ofs_in_node + 1) {
			err = -ENOSPC;
			goto sync_out;
1144
		}
1145 1146 1147 1148 1149 1150 1151 1152
		dn.ofs_in_node = end_offset;
	}

	if (pgofs >= end)
		goto sync_out;
	else if (dn.ofs_in_node < end_offset)
		goto next_block;

1153 1154 1155 1156 1157 1158 1159 1160 1161 1162
	if (flag == F2FS_GET_BLOCK_PRECACHE) {
		if (map->m_flags & F2FS_MAP_MAPPED) {
			unsigned int ofs = start_pgofs - map->m_lblk;

			f2fs_update_extent_cache_range(&dn,
				start_pgofs, map->m_pblk + ofs,
				map->m_len - ofs);
		}
	}

1163 1164 1165
	f2fs_put_dnode(&dn);

	if (create) {
1166
		__do_map_lock(sbi, flag, false);
1167
		f2fs_balance_fs(sbi, dn.node_changed);
1168
	}
1169
	goto next_dnode;
1170

1171
sync_out:
1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182
	if (flag == F2FS_GET_BLOCK_PRECACHE) {
		if (map->m_flags & F2FS_MAP_MAPPED) {
			unsigned int ofs = start_pgofs - map->m_lblk;

			f2fs_update_extent_cache_range(&dn,
				start_pgofs, map->m_pblk + ofs,
				map->m_len - ofs);
		}
		if (map->m_next_extent)
			*map->m_next_extent = pgofs + 1;
	}
1183
	f2fs_put_dnode(&dn);
1184
unlock_out:
1185
	if (create) {
1186
		__do_map_lock(sbi, flag, false);
1187
		f2fs_balance_fs(sbi, dn.node_changed);
1188
	}
1189
out:
J
Jaegeuk Kim 已提交
1190
	trace_f2fs_map_blocks(inode, map, err);
1191
	return err;
1192 1193
}

H
Hyunchul Lee 已提交
1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218
bool f2fs_overwrite_io(struct inode *inode, loff_t pos, size_t len)
{
	struct f2fs_map_blocks map;
	block_t last_lblk;
	int err;

	if (pos + len > i_size_read(inode))
		return false;

	map.m_lblk = F2FS_BYTES_TO_BLK(pos);
	map.m_next_pgofs = NULL;
	map.m_next_extent = NULL;
	map.m_seg_type = NO_CHECK_TYPE;
	last_lblk = F2FS_BLK_ALIGN(pos + len);

	while (map.m_lblk < last_lblk) {
		map.m_len = last_lblk - map.m_lblk;
		err = f2fs_map_blocks(inode, &map, 0, F2FS_GET_BLOCK_DEFAULT);
		if (err || map.m_len == 0)
			return false;
		map.m_lblk += map.m_len;
	}
	return true;
}

J
Jaegeuk Kim 已提交
1219
static int __get_data_block(struct inode *inode, sector_t iblock,
1220
			struct buffer_head *bh, int create, int flag,
1221
			pgoff_t *next_pgofs, int seg_type)
J
Jaegeuk Kim 已提交
1222 1223
{
	struct f2fs_map_blocks map;
1224
	int err;
J
Jaegeuk Kim 已提交
1225 1226 1227

	map.m_lblk = iblock;
	map.m_len = bh->b_size >> inode->i_blkbits;
1228
	map.m_next_pgofs = next_pgofs;
1229
	map.m_next_extent = NULL;
1230
	map.m_seg_type = seg_type;
J
Jaegeuk Kim 已提交
1231

1232 1233
	err = f2fs_map_blocks(inode, &map, create, flag);
	if (!err) {
J
Jaegeuk Kim 已提交
1234 1235
		map_bh(bh, inode->i_sb, map.m_pblk);
		bh->b_state = (bh->b_state & ~F2FS_MAP_FLAGS) | map.m_flags;
1236
		bh->b_size = (u64)map.m_len << inode->i_blkbits;
J
Jaegeuk Kim 已提交
1237
	}
1238
	return err;
J
Jaegeuk Kim 已提交
1239 1240
}

1241
static int get_data_block(struct inode *inode, sector_t iblock,
1242 1243
			struct buffer_head *bh_result, int create, int flag,
			pgoff_t *next_pgofs)
C
Chao Yu 已提交
1244
{
1245
	return __get_data_block(inode, iblock, bh_result, create,
1246 1247
							flag, next_pgofs,
							NO_CHECK_TYPE);
C
Chao Yu 已提交
1248 1249 1250
}

static int get_data_block_dio(struct inode *inode, sector_t iblock,
1251 1252
			struct buffer_head *bh_result, int create)
{
C
Chao Yu 已提交
1253
	return __get_data_block(inode, iblock, bh_result, create,
1254
						F2FS_GET_BLOCK_DEFAULT, NULL,
C
Chao Yu 已提交
1255
						f2fs_rw_hint_to_seg_type(
1256
							inode->i_write_hint));
1257 1258
}

C
Chao Yu 已提交
1259
static int get_data_block_bmap(struct inode *inode, sector_t iblock,
1260 1261
			struct buffer_head *bh_result, int create)
{
1262
	/* Block number less than F2FS MAX BLOCKS */
C
Chao Yu 已提交
1263
	if (unlikely(iblock >= F2FS_I_SB(inode)->max_file_blocks))
1264 1265
		return -EFBIG;

C
Chao Yu 已提交
1266
	return __get_data_block(inode, iblock, bh_result, create,
1267 1268
						F2FS_GET_BLOCK_BMAP, NULL,
						NO_CHECK_TYPE);
1269 1270
}

1271 1272 1273 1274 1275 1276 1277 1278 1279 1280
static inline sector_t logical_to_blk(struct inode *inode, loff_t offset)
{
	return (offset >> inode->i_blkbits);
}

static inline loff_t blk_to_logical(struct inode *inode, sector_t blk)
{
	return (blk << inode->i_blkbits);
}

C
Chao Yu 已提交
1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299
static int f2fs_xattr_fiemap(struct inode *inode,
				struct fiemap_extent_info *fieinfo)
{
	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
	struct page *page;
	struct node_info ni;
	__u64 phys = 0, len;
	__u32 flags;
	nid_t xnid = F2FS_I(inode)->i_xattr_nid;
	int err = 0;

	if (f2fs_has_inline_xattr(inode)) {
		int offset;

		page = f2fs_grab_cache_page(NODE_MAPPING(sbi),
						inode->i_ino, false);
		if (!page)
			return -ENOMEM;

1300 1301 1302 1303 1304
		err = f2fs_get_node_info(sbi, inode->i_ino, &ni);
		if (err) {
			f2fs_put_page(page, 1);
			return err;
		}
C
Chao Yu 已提交
1305 1306 1307 1308

		phys = (__u64)blk_to_logical(inode, ni.blk_addr);
		offset = offsetof(struct f2fs_inode, i_addr) +
					sizeof(__le32) * (DEF_ADDRS_PER_INODE -
1309
					get_inline_xattr_addrs(inode));
C
Chao Yu 已提交
1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330

		phys += offset;
		len = inline_xattr_size(inode);

		f2fs_put_page(page, 1);

		flags = FIEMAP_EXTENT_DATA_INLINE | FIEMAP_EXTENT_NOT_ALIGNED;

		if (!xnid)
			flags |= FIEMAP_EXTENT_LAST;

		err = fiemap_fill_next_extent(fieinfo, 0, phys, len, flags);
		if (err || err == 1)
			return err;
	}

	if (xnid) {
		page = f2fs_grab_cache_page(NODE_MAPPING(sbi), xnid, false);
		if (!page)
			return -ENOMEM;

1331 1332 1333 1334 1335
		err = f2fs_get_node_info(sbi, xnid, &ni);
		if (err) {
			f2fs_put_page(page, 1);
			return err;
		}
C
Chao Yu 已提交
1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350

		phys = (__u64)blk_to_logical(inode, ni.blk_addr);
		len = inode->i_sb->s_blocksize;

		f2fs_put_page(page, 1);

		flags = FIEMAP_EXTENT_LAST;
	}

	if (phys)
		err = fiemap_fill_next_extent(fieinfo, 0, phys, len, flags);

	return (err < 0 ? err : 0);
}

J
Jaegeuk Kim 已提交
1351 1352 1353
int f2fs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
		u64 start, u64 len)
{
1354 1355
	struct buffer_head map_bh;
	sector_t start_blk, last_blk;
1356
	pgoff_t next_pgofs;
1357 1358 1359 1360
	u64 logical = 0, phys = 0, size = 0;
	u32 flags = 0;
	int ret = 0;

1361 1362 1363 1364 1365 1366
	if (fieinfo->fi_flags & FIEMAP_FLAG_CACHE) {
		ret = f2fs_precache_extents(inode);
		if (ret)
			return ret;
	}

C
Chao Yu 已提交
1367
	ret = fiemap_check_flags(fieinfo, FIEMAP_FLAG_SYNC | FIEMAP_FLAG_XATTR);
1368 1369 1370
	if (ret)
		return ret;

1371 1372
	inode_lock(inode);

C
Chao Yu 已提交
1373 1374 1375 1376 1377
	if (fieinfo->fi_flags & FIEMAP_FLAG_XATTR) {
		ret = f2fs_xattr_fiemap(inode, fieinfo);
		goto out;
	}

J
Jaegeuk Kim 已提交
1378 1379 1380
	if (f2fs_has_inline_data(inode)) {
		ret = f2fs_inline_data_fiemap(inode, fieinfo, start, len);
		if (ret != -EAGAIN)
1381
			goto out;
J
Jaegeuk Kim 已提交
1382 1383
	}

1384 1385 1386 1387 1388
	if (logical_to_blk(inode, len) == 0)
		len = blk_to_logical(inode, 1);

	start_blk = logical_to_blk(inode, start);
	last_blk = logical_to_blk(inode, start + len - 1);
1389

1390 1391 1392 1393
next:
	memset(&map_bh, 0, sizeof(struct buffer_head));
	map_bh.b_size = len;

C
Chao Yu 已提交
1394
	ret = get_data_block(inode, start_blk, &map_bh, 0,
1395
					F2FS_GET_BLOCK_FIEMAP, &next_pgofs);
1396 1397 1398 1399 1400
	if (ret)
		goto out;

	/* HOLE */
	if (!buffer_mapped(&map_bh)) {
1401
		start_blk = next_pgofs;
1402 1403 1404

		if (blk_to_logical(inode, start_blk) < blk_to_logical(inode,
					F2FS_I_SB(inode)->max_file_blocks))
1405
			goto prep_next;
1406

1407 1408
		flags |= FIEMAP_EXTENT_LAST;
	}
1409

1410 1411 1412 1413
	if (size) {
		if (f2fs_encrypted_inode(inode))
			flags |= FIEMAP_EXTENT_DATA_ENCRYPTED;

1414 1415
		ret = fiemap_fill_next_extent(fieinfo, logical,
				phys, size, flags);
1416
	}
1417

1418 1419
	if (start_blk > last_blk || ret)
		goto out;
1420

1421 1422 1423 1424 1425 1426
	logical = blk_to_logical(inode, start_blk);
	phys = blk_to_logical(inode, map_bh.b_blocknr);
	size = map_bh.b_size;
	flags = 0;
	if (buffer_unwritten(&map_bh))
		flags = FIEMAP_EXTENT_UNWRITTEN;
1427

1428
	start_blk += logical_to_blk(inode, size);
1429

1430
prep_next:
1431 1432 1433 1434 1435 1436 1437 1438 1439
	cond_resched();
	if (fatal_signal_pending(current))
		ret = -EINTR;
	else
		goto next;
out:
	if (ret == 1)
		ret = 0;

A
Al Viro 已提交
1440
	inode_unlock(inode);
1441
	return ret;
J
Jaegeuk Kim 已提交
1442 1443
}

J
Jaegeuk Kim 已提交
1444 1445 1446
/*
 * This function was originally taken from fs/mpage.c, and customized for f2fs.
 * Major change was from block_size == page_size in f2fs by default.
1447 1448 1449 1450 1451
 *
 * Note that the aops->readpages() function is ONLY used for read-ahead. If
 * this function ever deviates from doing just read-ahead, it should either
 * use ->readpage() or do the necessary surgery to decouple ->readpages()
 * from read-ahead.
J
Jaegeuk Kim 已提交
1452 1453 1454
 */
static int f2fs_mpage_readpages(struct address_space *mapping,
			struct list_head *pages, struct page *page,
1455
			unsigned nr_pages, bool is_readahead)
J
Jaegeuk Kim 已提交
1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471
{
	struct bio *bio = NULL;
	sector_t last_block_in_bio = 0;
	struct inode *inode = mapping->host;
	const unsigned blkbits = inode->i_blkbits;
	const unsigned blocksize = 1 << blkbits;
	sector_t block_in_file;
	sector_t last_block;
	sector_t last_block_in_file;
	sector_t block_nr;
	struct f2fs_map_blocks map;

	map.m_pblk = 0;
	map.m_lblk = 0;
	map.m_len = 0;
	map.m_flags = 0;
1472
	map.m_next_pgofs = NULL;
1473
	map.m_next_extent = NULL;
1474
	map.m_seg_type = NO_CHECK_TYPE;
J
Jaegeuk Kim 已提交
1475

L
LiFan 已提交
1476
	for (; nr_pages; nr_pages--) {
J
Jaegeuk Kim 已提交
1477
		if (pages) {
1478
			page = list_last_entry(pages, struct page, lru);
1479 1480

			prefetchw(&page->flags);
J
Jaegeuk Kim 已提交
1481 1482
			list_del(&page->lru);
			if (add_to_page_cache_lru(page, mapping,
1483 1484
						  page->index,
						  readahead_gfp_mask(mapping)))
J
Jaegeuk Kim 已提交
1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512
				goto next_page;
		}

		block_in_file = (sector_t)page->index;
		last_block = block_in_file + nr_pages;
		last_block_in_file = (i_size_read(inode) + blocksize - 1) >>
								blkbits;
		if (last_block > last_block_in_file)
			last_block = last_block_in_file;

		/*
		 * Map blocks using the previous result first.
		 */
		if ((map.m_flags & F2FS_MAP_MAPPED) &&
				block_in_file > map.m_lblk &&
				block_in_file < (map.m_lblk + map.m_len))
			goto got_it;

		/*
		 * Then do more f2fs_map_blocks() calls until we are
		 * done with this page.
		 */
		map.m_flags = 0;

		if (block_in_file < last_block) {
			map.m_lblk = block_in_file;
			map.m_len = last_block - block_in_file;

1513
			if (f2fs_map_blocks(inode, &map, 0,
1514
						F2FS_GET_BLOCK_DEFAULT))
J
Jaegeuk Kim 已提交
1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525
				goto set_error_page;
		}
got_it:
		if ((map.m_flags & F2FS_MAP_MAPPED)) {
			block_nr = map.m_pblk + block_in_file - map.m_lblk;
			SetPageMappedToDisk(page);

			if (!PageUptodate(page) && !cleancache_get_page(page)) {
				SetPageUptodate(page);
				goto confused;
			}
1526 1527 1528 1529

			if (!f2fs_is_valid_blkaddr(F2FS_I_SB(inode), block_nr,
								DATA_GENERIC))
				goto set_error_page;
J
Jaegeuk Kim 已提交
1530
		} else {
1531
			zero_user_segment(page, 0, PAGE_SIZE);
1532 1533
			if (!PageUptodate(page))
				SetPageUptodate(page);
J
Jaegeuk Kim 已提交
1534 1535 1536 1537 1538 1539 1540 1541
			unlock_page(page);
			goto next_page;
		}

		/*
		 * This page will go to BIO.  Do we need to send this
		 * BIO off first?
		 */
J
Jaegeuk Kim 已提交
1542 1543
		if (bio && (last_block_in_bio != block_nr - 1 ||
			!__same_bdev(F2FS_I_SB(inode), block_nr, bio))) {
J
Jaegeuk Kim 已提交
1544
submit_and_realloc:
1545
			__submit_bio(F2FS_I_SB(inode), bio, DATA);
J
Jaegeuk Kim 已提交
1546 1547 1548
			bio = NULL;
		}
		if (bio == NULL) {
1549 1550
			bio = f2fs_grab_read_bio(inode, block_nr, nr_pages,
					is_readahead ? REQ_RAHEAD : 0);
J
Jaegeuk Kim 已提交
1551 1552
			if (IS_ERR(bio)) {
				bio = NULL;
J
Jaegeuk Kim 已提交
1553
				goto set_error_page;
1554
			}
J
Jaegeuk Kim 已提交
1555 1556 1557 1558 1559 1560 1561 1562 1563
		}

		if (bio_add_page(bio, page, blocksize, 0) < blocksize)
			goto submit_and_realloc;

		last_block_in_bio = block_nr;
		goto next_page;
set_error_page:
		SetPageError(page);
1564
		zero_user_segment(page, 0, PAGE_SIZE);
J
Jaegeuk Kim 已提交
1565 1566 1567 1568
		unlock_page(page);
		goto next_page;
confused:
		if (bio) {
1569
			__submit_bio(F2FS_I_SB(inode), bio, DATA);
J
Jaegeuk Kim 已提交
1570 1571 1572 1573 1574
			bio = NULL;
		}
		unlock_page(page);
next_page:
		if (pages)
1575
			put_page(page);
J
Jaegeuk Kim 已提交
1576 1577 1578
	}
	BUG_ON(pages && !list_empty(pages));
	if (bio)
1579
		__submit_bio(F2FS_I_SB(inode), bio, DATA);
J
Jaegeuk Kim 已提交
1580 1581 1582
	return 0;
}

1583 1584
static int f2fs_read_data_page(struct file *file, struct page *page)
{
H
Huajun Li 已提交
1585
	struct inode *inode = page->mapping->host;
1586
	int ret = -EAGAIN;
H
Huajun Li 已提交
1587

1588 1589
	trace_f2fs_readpage(page, DATA);

A
arter97 已提交
1590
	/* If the file has inline data, try to read it directly */
H
Huajun Li 已提交
1591 1592
	if (f2fs_has_inline_data(inode))
		ret = f2fs_read_inline_data(inode, page);
1593
	if (ret == -EAGAIN)
1594
		ret = f2fs_mpage_readpages(page->mapping, NULL, page, 1, false);
H
Huajun Li 已提交
1595
	return ret;
1596 1597 1598 1599 1600 1601
}

static int f2fs_read_data_pages(struct file *file,
			struct address_space *mapping,
			struct list_head *pages, unsigned nr_pages)
{
1602
	struct inode *inode = mapping->host;
1603
	struct page *page = list_last_entry(pages, struct page, lru);
1604 1605

	trace_f2fs_readpages(inode, page, nr_pages);
H
Huajun Li 已提交
1606 1607 1608 1609 1610

	/* If the file has inline data, skip readpages */
	if (f2fs_has_inline_data(inode))
		return 0;

1611
	return f2fs_mpage_readpages(mapping, pages, NULL, nr_pages, true);
1612 1613
}

1614 1615 1616 1617 1618
static int encrypt_one_page(struct f2fs_io_info *fio)
{
	struct inode *inode = fio->page->mapping->host;
	gfp_t gfp_flags = GFP_NOFS;

1619
	if (!f2fs_encrypted_file(inode))
1620 1621
		return 0;

1622
	/* wait for GCed page writeback via META_MAPPING */
1623
	f2fs_wait_on_block_writeback(fio->sbi, fio->old_blkaddr);
1624 1625 1626 1627 1628 1629 1630 1631 1632

retry_encrypt:
	fio->encrypted_page = fscrypt_encrypt_page(inode, fio->page,
			PAGE_SIZE, 0, fio->page->index, gfp_flags);
	if (!IS_ERR(fio->encrypted_page))
		return 0;

	/* flush pending IOs and wait for a while in the ENOMEM case */
	if (PTR_ERR(fio->encrypted_page) == -ENOMEM) {
1633
		f2fs_flush_merged_writes(fio->sbi);
1634 1635 1636 1637 1638 1639 1640
		congestion_wait(BLK_RW_ASYNC, HZ/50);
		gfp_flags |= __GFP_NOFAIL;
		goto retry_encrypt;
	}
	return PTR_ERR(fio->encrypted_page);
}

C
Chao Yu 已提交
1641 1642
static inline bool check_inplace_update_policy(struct inode *inode,
				struct f2fs_io_info *fio)
1643
{
C
Chao Yu 已提交
1644 1645
	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
	unsigned int policy = SM_I(sbi)->ipu_policy;
1646

C
Chao Yu 已提交
1647 1648
	if (policy & (0x1 << F2FS_IPU_FORCE))
		return true;
C
Chao Yu 已提交
1649
	if (policy & (0x1 << F2FS_IPU_SSR) && f2fs_need_SSR(sbi))
C
Chao Yu 已提交
1650 1651 1652 1653
		return true;
	if (policy & (0x1 << F2FS_IPU_UTIL) &&
			utilization(sbi) > SM_I(sbi)->min_ipu_util)
		return true;
C
Chao Yu 已提交
1654
	if (policy & (0x1 << F2FS_IPU_SSR_UTIL) && f2fs_need_SSR(sbi) &&
C
Chao Yu 已提交
1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674
			utilization(sbi) > SM_I(sbi)->min_ipu_util)
		return true;

	/*
	 * IPU for rewrite async pages
	 */
	if (policy & (0x1 << F2FS_IPU_ASYNC) &&
			fio && fio->op == REQ_OP_WRITE &&
			!(fio->op_flags & REQ_SYNC) &&
			!f2fs_encrypted_inode(inode))
		return true;

	/* this is only set during fdatasync */
	if (policy & (0x1 << F2FS_IPU_FSYNC) &&
			is_inode_flag_set(inode, FI_NEED_IPU))
		return true;

	return false;
}

C
Chao Yu 已提交
1675
bool f2fs_should_update_inplace(struct inode *inode, struct f2fs_io_info *fio)
C
Chao Yu 已提交
1676
{
1677 1678
	if (f2fs_is_pinned_file(inode))
		return true;
C
Chao Yu 已提交
1679 1680 1681 1682 1683 1684 1685 1686

	/* if this is cold file, we should overwrite to avoid fragmentation */
	if (file_is_cold(inode))
		return true;

	return check_inplace_update_policy(inode, fio);
}

C
Chao Yu 已提交
1687
bool f2fs_should_update_outplace(struct inode *inode, struct f2fs_io_info *fio)
C
Chao Yu 已提交
1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705
{
	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);

	if (test_opt(sbi, LFS))
		return true;
	if (S_ISDIR(inode->i_mode))
		return true;
	if (f2fs_is_atomic_file(inode))
		return true;
	if (fio) {
		if (is_cold_data(fio->page))
			return true;
		if (IS_ATOMIC_WRITTEN_PAGE(fio->page))
			return true;
	}
	return false;
}

1706 1707 1708 1709
static inline bool need_inplace_update(struct f2fs_io_info *fio)
{
	struct inode *inode = fio->page->mapping->host;

C
Chao Yu 已提交
1710
	if (f2fs_should_update_outplace(inode, fio))
1711 1712
		return false;

C
Chao Yu 已提交
1713
	return f2fs_should_update_inplace(inode, fio);
1714 1715
}

C
Chao Yu 已提交
1716
int f2fs_do_write_data_page(struct f2fs_io_info *fio)
1717
{
1718
	struct page *page = fio->page;
1719 1720
	struct inode *inode = page->mapping->host;
	struct dnode_of_data dn;
1721
	struct extent_info ei = {0,0,0};
1722
	struct node_info ni;
1723
	bool ipu_force = false;
1724 1725 1726
	int err = 0;

	set_new_dnode(&dn, inode, NULL, NULL, 0);
1727 1728 1729
	if (need_inplace_update(fio) &&
			f2fs_lookup_extent_cache(inode, page->index, &ei)) {
		fio->old_blkaddr = ei.blk + page->index - ei.fofs;
1730

1731 1732 1733 1734 1735 1736 1737
		if (!f2fs_is_valid_blkaddr(fio->sbi, fio->old_blkaddr,
							DATA_GENERIC))
			return -EFAULT;

		ipu_force = true;
		fio->need_lock = LOCK_DONE;
		goto got_it;
1738
	}
1739

1740 1741 1742
	/* Deadlock due to between page->lock and f2fs_lock_op */
	if (fio->need_lock == LOCK_REQ && !f2fs_trylock_op(fio->sbi))
		return -EAGAIN;
1743

C
Chao Yu 已提交
1744
	err = f2fs_get_dnode_of_data(&dn, page->index, LOOKUP_NODE);
1745
	if (err)
1746
		goto out;
1747

1748
	fio->old_blkaddr = dn.data_blkaddr;
1749 1750

	/* This page is already truncated */
1751
	if (fio->old_blkaddr == NULL_ADDR) {
1752
		ClearPageUptodate(page);
1753
		goto out_writepage;
1754
	}
1755
got_it:
1756 1757 1758 1759 1760 1761
	if (__is_valid_data_blkaddr(fio->old_blkaddr) &&
		!f2fs_is_valid_blkaddr(fio->sbi, fio->old_blkaddr,
							DATA_GENERIC)) {
		err = -EFAULT;
		goto out_writepage;
	}
1762 1763 1764 1765
	/*
	 * If current allocation needs SSR,
	 * it had better in-place writes for updated data.
	 */
1766
	if (ipu_force || (is_valid_data_blkaddr(fio->sbi, fio->old_blkaddr) &&
C
Chao Yu 已提交
1767
					need_inplace_update(fio))) {
1768 1769 1770 1771 1772
		err = encrypt_one_page(fio);
		if (err)
			goto out_writepage;

		set_page_writeback(page);
J
Jaegeuk Kim 已提交
1773
		ClearPageError(page);
1774
		f2fs_put_dnode(&dn);
1775
		if (fio->need_lock == LOCK_REQ)
1776
			f2fs_unlock_op(fio->sbi);
C
Chao Yu 已提交
1777
		err = f2fs_inplace_write_data(fio);
1778
		trace_f2fs_do_write_data_page(fio->page, IPU);
1779
		set_inode_flag(inode, FI_UPDATE_WRITE);
1780
		return err;
1781
	}
1782

1783 1784 1785 1786 1787 1788 1789 1790
	if (fio->need_lock == LOCK_RETRY) {
		if (!f2fs_trylock_op(fio->sbi)) {
			err = -EAGAIN;
			goto out_writepage;
		}
		fio->need_lock = LOCK_REQ;
	}

1791 1792 1793 1794 1795 1796
	err = f2fs_get_node_info(fio->sbi, dn.nid, &ni);
	if (err)
		goto out_writepage;

	fio->version = ni.version;

1797 1798 1799 1800 1801
	err = encrypt_one_page(fio);
	if (err)
		goto out_writepage;

	set_page_writeback(page);
J
Jaegeuk Kim 已提交
1802
	ClearPageError(page);
1803

1804
	/* LFS mode write path */
C
Chao Yu 已提交
1805
	f2fs_outplace_write_data(&dn, fio);
1806 1807 1808 1809
	trace_f2fs_do_write_data_page(page, OPU);
	set_inode_flag(inode, FI_APPEND_WRITE);
	if (page->index == 0)
		set_inode_flag(inode, FI_FIRST_BLOCK_WRITTEN);
1810 1811
out_writepage:
	f2fs_put_dnode(&dn);
1812
out:
1813
	if (fio->need_lock == LOCK_REQ)
1814
		f2fs_unlock_op(fio->sbi);
1815 1816 1817
	return err;
}

1818
static int __write_data_page(struct page *page, bool *submitted,
C
Chao Yu 已提交
1819 1820
				struct writeback_control *wbc,
				enum iostat_type io_type)
1821 1822
{
	struct inode *inode = page->mapping->host;
1823
	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
1824 1825
	loff_t i_size = i_size_read(inode);
	const pgoff_t end_index = ((unsigned long long) i_size)
1826
							>> PAGE_SHIFT;
1827
	loff_t psize = (page->index + 1) << PAGE_SHIFT;
H
Huajun Li 已提交
1828
	unsigned offset = 0;
1829
	bool need_balance_fs = false;
1830
	int err = 0;
J
Jaegeuk Kim 已提交
1831
	struct f2fs_io_info fio = {
1832
		.sbi = sbi,
C
Chao Yu 已提交
1833
		.ino = inode->i_ino,
J
Jaegeuk Kim 已提交
1834
		.type = DATA,
M
Mike Christie 已提交
1835
		.op = REQ_OP_WRITE,
J
Jens Axboe 已提交
1836
		.op_flags = wbc_to_write_flags(wbc),
1837
		.old_blkaddr = NULL_ADDR,
1838
		.page = page,
1839
		.encrypted_page = NULL,
1840
		.submitted = false,
1841
		.need_lock = LOCK_RETRY,
C
Chao Yu 已提交
1842
		.io_type = io_type,
1843
		.io_wbc = wbc,
J
Jaegeuk Kim 已提交
1844
	};
1845

1846 1847
	trace_f2fs_writepage(page, DATA);

1848 1849 1850
	/* we should bypass data pages to proceed the kworkder jobs */
	if (unlikely(f2fs_cp_error(sbi))) {
		mapping_set_error(page->mapping, -EIO);
1851 1852 1853 1854 1855 1856
		/*
		 * don't drop any dirty dentry pages for keeping lastest
		 * directory structure.
		 */
		if (S_ISDIR(inode->i_mode))
			goto redirty_out;
1857 1858 1859
		goto out;
	}

1860 1861 1862
	if (unlikely(is_sbi_flag_set(sbi, SBI_POR_DOING)))
		goto redirty_out;

1863
	if (page->index < end_index)
1864
		goto write;
1865 1866 1867 1868 1869

	/*
	 * If the offset is out-of-range of file size,
	 * this page does not have to be written to disk.
	 */
1870
	offset = i_size & (PAGE_SIZE - 1);
1871
	if ((page->index >= end_index + 1) || !offset)
1872
		goto out;
1873

1874
	zero_user_segment(page, offset, PAGE_SIZE);
1875
write:
1876 1877
	if (f2fs_is_drop_cache(inode))
		goto out;
1878 1879 1880
	/* we should not write 0'th page having journal header */
	if (f2fs_is_volatile_file(inode) && (!page->index ||
			(!wbc->for_reclaim &&
C
Chao Yu 已提交
1881
			f2fs_available_free_memory(sbi, BASE_CHECK))))
1882
		goto redirty_out;
1883

1884
	/* Dentry blocks are controlled by checkpoint */
1885
	if (S_ISDIR(inode->i_mode)) {
1886
		fio.need_lock = LOCK_DONE;
C
Chao Yu 已提交
1887
		err = f2fs_do_write_data_page(&fio);
1888 1889
		goto done;
	}
H
Huajun Li 已提交
1890

1891
	if (!wbc->for_reclaim)
1892
		need_balance_fs = true;
1893
	else if (has_not_enough_free_secs(sbi, 0, 0))
1894
		goto redirty_out;
1895 1896
	else
		set_inode_flag(inode, FI_HOT_DATA);
1897

1898
	err = -EAGAIN;
1899
	if (f2fs_has_inline_data(inode)) {
1900
		err = f2fs_write_inline_data(inode, page);
1901 1902 1903
		if (!err)
			goto out;
	}
1904

1905
	if (err == -EAGAIN) {
C
Chao Yu 已提交
1906
		err = f2fs_do_write_data_page(&fio);
1907 1908
		if (err == -EAGAIN) {
			fio.need_lock = LOCK_REQ;
C
Chao Yu 已提交
1909
			err = f2fs_do_write_data_page(&fio);
1910 1911
		}
	}
1912

1913 1914 1915 1916 1917 1918 1919 1920
	if (err) {
		file_set_keep_isize(inode);
	} else {
		down_write(&F2FS_I(inode)->i_sem);
		if (F2FS_I(inode)->last_disk_size < psize)
			F2FS_I(inode)->last_disk_size = psize;
		up_write(&F2FS_I(inode)->i_sem);
	}
1921

1922 1923 1924
done:
	if (err && err != -ENOENT)
		goto redirty_out;
1925

1926
out:
1927
	inode_dec_dirty_pages(inode);
1928 1929
	if (err)
		ClearPageUptodate(page);
1930 1931

	if (wbc->for_reclaim) {
1932
		f2fs_submit_merged_write_cond(sbi, inode, 0, page->index, DATA);
1933
		clear_inode_flag(inode, FI_HOT_DATA);
C
Chao Yu 已提交
1934
		f2fs_remove_dirty_inode(inode);
1935
		submitted = NULL;
1936 1937
	}

1938
	unlock_page(page);
J
Jaegeuk Kim 已提交
1939 1940
	if (!S_ISDIR(inode->i_mode))
		f2fs_balance_fs(sbi, need_balance_fs);
1941

1942
	if (unlikely(f2fs_cp_error(sbi))) {
1943
		f2fs_submit_merged_write(sbi, DATA);
1944 1945 1946 1947 1948
		submitted = NULL;
	}

	if (submitted)
		*submitted = fio.submitted;
1949

1950 1951 1952
	return 0;

redirty_out:
1953
	redirty_page_for_writepage(wbc, page);
1954 1955 1956 1957 1958 1959 1960
	/*
	 * pageout() in MM traslates EAGAIN, so calls handle_write_error()
	 * -> mapping_set_error() -> set_bit(AS_EIO, ...).
	 * file_write_and_wait_range() will see EIO error, which is critical
	 * to return value of fsync() followed by atomic_write failure to user.
	 */
	if (!err || wbc->for_reclaim)
1961
		return AOP_WRITEPAGE_ACTIVATE;
J
Jaegeuk Kim 已提交
1962 1963
	unlock_page(page);
	return err;
1964 1965
}

1966 1967 1968
static int f2fs_write_data_page(struct page *page,
					struct writeback_control *wbc)
{
C
Chao Yu 已提交
1969
	return __write_data_page(page, NULL, wbc, FS_DATA_IO);
1970 1971
}

C
Chao Yu 已提交
1972 1973 1974 1975 1976 1977
/*
 * This function was copied from write_cche_pages from mm/page-writeback.c.
 * The major change is making write step of cold data page separately from
 * warm/hot data page.
 */
static int f2fs_write_cache_pages(struct address_space *mapping,
C
Chao Yu 已提交
1978 1979
					struct writeback_control *wbc,
					enum iostat_type io_type)
C
Chao Yu 已提交
1980 1981 1982 1983
{
	int ret = 0;
	int done = 0;
	struct pagevec pvec;
1984
	struct f2fs_sb_info *sbi = F2FS_M_SB(mapping);
C
Chao Yu 已提交
1985 1986 1987 1988 1989
	int nr_pages;
	pgoff_t uninitialized_var(writeback_index);
	pgoff_t index;
	pgoff_t end;		/* Inclusive */
	pgoff_t done_index;
1990
	pgoff_t last_idx = ULONG_MAX;
C
Chao Yu 已提交
1991 1992 1993 1994
	int cycled;
	int range_whole = 0;
	int tag;

1995
	pagevec_init(&pvec);
1996

1997 1998 1999 2000 2001 2002
	if (get_dirty_pages(mapping->host) <=
				SM_I(F2FS_M_SB(mapping))->min_hot_blocks)
		set_inode_flag(mapping->host, FI_HOT_DATA);
	else
		clear_inode_flag(mapping->host, FI_HOT_DATA);

C
Chao Yu 已提交
2003 2004 2005 2006 2007 2008 2009 2010 2011
	if (wbc->range_cyclic) {
		writeback_index = mapping->writeback_index; /* prev offset */
		index = writeback_index;
		if (index == 0)
			cycled = 1;
		else
			cycled = 0;
		end = -1;
	} else {
2012 2013
		index = wbc->range_start >> PAGE_SHIFT;
		end = wbc->range_end >> PAGE_SHIFT;
C
Chao Yu 已提交
2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028
		if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX)
			range_whole = 1;
		cycled = 1; /* ignore range_cyclic tests */
	}
	if (wbc->sync_mode == WB_SYNC_ALL || wbc->tagged_writepages)
		tag = PAGECACHE_TAG_TOWRITE;
	else
		tag = PAGECACHE_TAG_DIRTY;
retry:
	if (wbc->sync_mode == WB_SYNC_ALL || wbc->tagged_writepages)
		tag_pages_for_writeback(mapping, index, end);
	done_index = index;
	while (!done && (index <= end)) {
		int i;

J
Jan Kara 已提交
2029
		nr_pages = pagevec_lookup_range_tag(&pvec, mapping, &index, end,
2030
				tag);
C
Chao Yu 已提交
2031 2032 2033 2034 2035
		if (nr_pages == 0)
			break;

		for (i = 0; i < nr_pages; i++) {
			struct page *page = pvec.pages[i];
2036
			bool submitted = false;
C
Chao Yu 已提交
2037

2038
			/* give a priority to WB_SYNC threads */
2039
			if (atomic_read(&sbi->wb_sync_req[DATA]) &&
2040 2041 2042 2043 2044
					wbc->sync_mode == WB_SYNC_NONE) {
				done = 1;
				break;
			}

C
Chao Yu 已提交
2045
			done_index = page->index;
2046
retry_write:
C
Chao Yu 已提交
2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 2061
			lock_page(page);

			if (unlikely(page->mapping != mapping)) {
continue_unlock:
				unlock_page(page);
				continue;
			}

			if (!PageDirty(page)) {
				/* someone wrote it for us */
				goto continue_unlock;
			}

			if (PageWriteback(page)) {
				if (wbc->sync_mode != WB_SYNC_NONE)
2062 2063
					f2fs_wait_on_page_writeback(page,
								DATA, true);
C
Chao Yu 已提交
2064 2065 2066 2067 2068 2069 2070 2071
				else
					goto continue_unlock;
			}

			BUG_ON(PageWriteback(page));
			if (!clear_page_dirty_for_io(page))
				goto continue_unlock;

C
Chao Yu 已提交
2072
			ret = __write_data_page(page, &submitted, wbc, io_type);
C
Chao Yu 已提交
2073
			if (unlikely(ret)) {
2074 2075 2076 2077 2078 2079 2080 2081
				/*
				 * keep nr_to_write, since vfs uses this to
				 * get # of written pages.
				 */
				if (ret == AOP_WRITEPAGE_ACTIVATE) {
					unlock_page(page);
					ret = 0;
					continue;
2082 2083 2084 2085 2086 2087 2088 2089 2090
				} else if (ret == -EAGAIN) {
					ret = 0;
					if (wbc->sync_mode == WB_SYNC_ALL) {
						cond_resched();
						congestion_wait(BLK_RW_ASYNC,
									HZ/50);
						goto retry_write;
					}
					continue;
2091
				}
J
Jaegeuk Kim 已提交
2092 2093 2094
				done_index = page->index + 1;
				done = 1;
				break;
2095
			} else if (submitted) {
2096
				last_idx = page->index;
C
Chao Yu 已提交
2097 2098
			}

2099
			if (--wbc->nr_to_write <= 0 &&
2100
					wbc->sync_mode == WB_SYNC_NONE) {
C
Chao Yu 已提交
2101 2102 2103 2104 2105 2106 2107 2108 2109 2110 2111 2112 2113 2114 2115 2116 2117
				done = 1;
				break;
			}
		}
		pagevec_release(&pvec);
		cond_resched();
	}

	if (!cycled && !done) {
		cycled = 1;
		index = 0;
		end = writeback_index - 1;
		goto retry;
	}
	if (wbc->range_cyclic || (range_whole && wbc->nr_to_write > 0))
		mapping->writeback_index = done_index;

2118
	if (last_idx != ULONG_MAX)
2119 2120
		f2fs_submit_merged_write_cond(F2FS_M_SB(mapping), mapping->host,
						0, last_idx, DATA);
C
Chao Yu 已提交
2121

C
Chao Yu 已提交
2122 2123 2124
	return ret;
}

2125
static int __f2fs_write_data_pages(struct address_space *mapping,
C
Chao Yu 已提交
2126 2127
						struct writeback_control *wbc,
						enum iostat_type io_type)
2128 2129
{
	struct inode *inode = mapping->host;
2130
	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
2131
	struct blk_plug plug;
2132 2133
	int ret;

P
P J P 已提交
2134 2135 2136 2137
	/* deal with chardevs and other special file */
	if (!mapping->a_ops->writepage)
		return 0;

2138 2139 2140 2141
	/* skip writing if there is no dirty page in this inode */
	if (!get_dirty_pages(inode) && wbc->sync_mode == WB_SYNC_NONE)
		return 0;

2142 2143 2144 2145
	/* during POR, we don't need to trigger writepage at all. */
	if (unlikely(is_sbi_flag_set(sbi, SBI_POR_DOING)))
		goto skip_write;

2146 2147
	if (S_ISDIR(inode->i_mode) && wbc->sync_mode == WB_SYNC_NONE &&
			get_dirty_pages(inode) < nr_pages_to_skip(sbi, DATA) &&
C
Chao Yu 已提交
2148
			f2fs_available_free_memory(sbi, DIRTY_DENTS))
2149 2150
		goto skip_write;

C
Chao Yu 已提交
2151
	/* skip writing during file defragment */
2152
	if (is_inode_flag_set(inode, FI_DO_DEFRAG))
C
Chao Yu 已提交
2153 2154
		goto skip_write;

Y
Yunlei He 已提交
2155 2156
	trace_f2fs_writepages(mapping->host, wbc, DATA);

2157 2158
	/* to avoid spliting IOs due to mixed WB_SYNC_ALL and WB_SYNC_NONE */
	if (wbc->sync_mode == WB_SYNC_ALL)
2159 2160
		atomic_inc(&sbi->wb_sync_req[DATA]);
	else if (atomic_read(&sbi->wb_sync_req[DATA]))
2161 2162
		goto skip_write;

2163
	blk_start_plug(&plug);
C
Chao Yu 已提交
2164
	ret = f2fs_write_cache_pages(mapping, wbc, io_type);
2165
	blk_finish_plug(&plug);
2166 2167

	if (wbc->sync_mode == WB_SYNC_ALL)
2168
		atomic_dec(&sbi->wb_sync_req[DATA]);
2169 2170 2171 2172
	/*
	 * if some pages were truncated, we cannot guarantee its mapping->host
	 * to detect pending bios.
	 */
J
Jaegeuk Kim 已提交
2173

C
Chao Yu 已提交
2174
	f2fs_remove_dirty_inode(inode);
2175
	return ret;
2176 2177

skip_write:
2178
	wbc->pages_skipped += get_dirty_pages(inode);
Y
Yunlei He 已提交
2179
	trace_f2fs_writepages(mapping->host, wbc, DATA);
2180
	return 0;
2181 2182
}

C
Chao Yu 已提交
2183 2184 2185 2186 2187 2188 2189 2190 2191 2192
static int f2fs_write_data_pages(struct address_space *mapping,
			    struct writeback_control *wbc)
{
	struct inode *inode = mapping->host;

	return __f2fs_write_data_pages(mapping, wbc,
			F2FS_I(inode)->cp_task == current ?
			FS_CP_DATA_IO : FS_DATA_IO);
}

2193 2194 2195
static void f2fs_write_failed(struct address_space *mapping, loff_t to)
{
	struct inode *inode = mapping->host;
J
Jaegeuk Kim 已提交
2196
	loff_t i_size = i_size_read(inode);
2197

J
Jaegeuk Kim 已提交
2198
	if (to > i_size) {
2199
		down_write(&F2FS_I(inode)->i_mmap_sem);
2200 2201
		down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);

J
Jaegeuk Kim 已提交
2202
		truncate_pagecache(inode, i_size);
C
Chao Yu 已提交
2203
		f2fs_truncate_blocks(inode, i_size, true);
2204 2205

		up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
2206
		up_write(&F2FS_I(inode)->i_mmap_sem);
2207 2208 2209
	}
}

2210 2211 2212 2213 2214 2215 2216 2217
static int prepare_write_begin(struct f2fs_sb_info *sbi,
			struct page *page, loff_t pos, unsigned len,
			block_t *blk_addr, bool *node_changed)
{
	struct inode *inode = page->mapping->host;
	pgoff_t index = page->index;
	struct dnode_of_data dn;
	struct page *ipage;
2218
	bool locked = false;
2219
	struct extent_info ei = {0,0,0};
2220 2221
	int err = 0;

2222 2223 2224 2225
	/*
	 * we already allocated all the blocks, so we don't need to get
	 * the block addresses when there is no need to fill the page.
	 */
2226 2227
	if (!f2fs_has_inline_data(inode) && len == PAGE_SIZE &&
			!is_inode_flag_set(inode, FI_NO_PREALLOC))
2228 2229
		return 0;

2230
	if (f2fs_has_inline_data(inode) ||
2231
			(pos & PAGE_MASK) >= i_size_read(inode)) {
2232
		__do_map_lock(sbi, F2FS_GET_BLOCK_PRE_AIO, true);
2233 2234 2235
		locked = true;
	}
restart:
2236
	/* check inline_data */
C
Chao Yu 已提交
2237
	ipage = f2fs_get_node_page(sbi, inode->i_ino);
2238 2239 2240 2241 2242 2243 2244 2245
	if (IS_ERR(ipage)) {
		err = PTR_ERR(ipage);
		goto unlock_out;
	}

	set_new_dnode(&dn, inode, ipage, ipage, 0);

	if (f2fs_has_inline_data(inode)) {
C
Chao Yu 已提交
2246
		if (pos + len <= MAX_INLINE_DATA(inode)) {
C
Chao Yu 已提交
2247
			f2fs_do_read_inline_data(page, ipage);
2248
			set_inode_flag(inode, FI_DATA_EXIST);
2249 2250
			if (inode->i_nlink)
				set_inline_node(ipage);
2251 2252 2253
		} else {
			err = f2fs_convert_inline_page(&dn, page);
			if (err)
2254 2255 2256 2257 2258 2259 2260 2261 2262 2263 2264
				goto out;
			if (dn.data_blkaddr == NULL_ADDR)
				err = f2fs_get_block(&dn, index);
		}
	} else if (locked) {
		err = f2fs_get_block(&dn, index);
	} else {
		if (f2fs_lookup_extent_cache(inode, index, &ei)) {
			dn.data_blkaddr = ei.blk + index - ei.fofs;
		} else {
			/* hole case */
C
Chao Yu 已提交
2265
			err = f2fs_get_dnode_of_data(&dn, index, LOOKUP_NODE);
2266
			if (err || dn.data_blkaddr == NULL_ADDR) {
2267
				f2fs_put_dnode(&dn);
2268 2269
				__do_map_lock(sbi, F2FS_GET_BLOCK_PRE_AIO,
								true);
2270 2271 2272
				locked = true;
				goto restart;
			}
2273 2274
		}
	}
2275

2276 2277 2278
	/* convert_inline_page can make node_changed */
	*blk_addr = dn.data_blkaddr;
	*node_changed = dn.node_changed;
2279
out:
2280 2281
	f2fs_put_dnode(&dn);
unlock_out:
2282
	if (locked)
2283
		__do_map_lock(sbi, F2FS_GET_BLOCK_PRE_AIO, false);
2284 2285 2286
	return err;
}

2287 2288 2289 2290 2291
static int f2fs_write_begin(struct file *file, struct address_space *mapping,
		loff_t pos, unsigned len, unsigned flags,
		struct page **pagep, void **fsdata)
{
	struct inode *inode = mapping->host;
2292
	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
2293
	struct page *page = NULL;
2294
	pgoff_t index = ((unsigned long long) pos) >> PAGE_SHIFT;
2295
	bool need_balance = false, drop_atomic = false;
2296
	block_t blkaddr = NULL_ADDR;
2297 2298
	int err = 0;

2299 2300
	trace_f2fs_write_begin(inode, pos, len, flags);

2301 2302 2303
	if ((f2fs_is_atomic_file(inode) &&
			!f2fs_available_free_memory(sbi, INMEM_PAGES)) ||
			is_inode_flag_set(inode, FI_ATOMIC_REVOKE_REQUEST)) {
J
Jaegeuk Kim 已提交
2304
		err = -ENOMEM;
2305
		drop_atomic = true;
J
Jaegeuk Kim 已提交
2306 2307 2308
		goto fail;
	}

2309 2310 2311 2312 2313 2314 2315 2316 2317 2318
	/*
	 * We should check this at this moment to avoid deadlock on inode page
	 * and #0 page. The locking rule for inline_data conversion should be:
	 * lock_page(page #0) -> lock_page(inode_page)
	 */
	if (index != 0) {
		err = f2fs_convert_inline_inode(inode);
		if (err)
			goto fail;
	}
2319
repeat:
2320 2321 2322 2323
	/*
	 * Do not use grab_cache_page_write_begin() to avoid deadlock due to
	 * wait_for_stable_page. Will wait that below with our IO control.
	 */
C
Chao Yu 已提交
2324
	page = f2fs_pagecache_get_page(mapping, index,
2325
				FGP_LOCK | FGP_WRITE | FGP_CREAT, GFP_NOFS);
2326 2327 2328 2329
	if (!page) {
		err = -ENOMEM;
		goto fail;
	}
2330

2331 2332
	*pagep = page;

2333 2334
	err = prepare_write_begin(sbi, page, pos, len,
					&blkaddr, &need_balance);
2335
	if (err)
2336
		goto fail;
2337

2338
	if (need_balance && has_not_enough_free_secs(sbi, 0, 0)) {
2339
		unlock_page(page);
J
Jaegeuk Kim 已提交
2340
		f2fs_balance_fs(sbi, true);
2341 2342 2343 2344 2345 2346 2347 2348
		lock_page(page);
		if (page->mapping != mapping) {
			/* The page got truncated from under us */
			f2fs_put_page(page, 1);
			goto repeat;
		}
	}

2349
	f2fs_wait_on_page_writeback(page, DATA, false);
2350

2351 2352
	/* wait for GCed page writeback via META_MAPPING */
	if (f2fs_post_read_required(inode))
2353
		f2fs_wait_on_block_writeback(sbi, blkaddr);
2354

2355 2356
	if (len == PAGE_SIZE || PageUptodate(page))
		return 0;
2357

2358 2359 2360 2361 2362
	if (!(pos & (PAGE_SIZE - 1)) && (pos + len) >= i_size_read(inode)) {
		zero_user_segment(page, len, PAGE_SIZE);
		return 0;
	}

2363
	if (blkaddr == NEW_ADDR) {
2364
		zero_user_segment(page, 0, PAGE_SIZE);
2365
		SetPageUptodate(page);
2366
	} else {
2367 2368
		err = f2fs_submit_page_read(inode, page, blkaddr);
		if (err)
2369
			goto fail;
2370

2371
		lock_page(page);
2372
		if (unlikely(page->mapping != mapping)) {
2373 2374
			f2fs_put_page(page, 1);
			goto repeat;
2375
		}
2376 2377 2378
		if (unlikely(!PageUptodate(page))) {
			err = -EIO;
			goto fail;
2379
		}
2380 2381
	}
	return 0;
2382

2383
fail:
2384
	f2fs_put_page(page, 1);
2385
	f2fs_write_failed(mapping, pos + len);
2386
	if (drop_atomic)
C
Chao Yu 已提交
2387
		f2fs_drop_inmem_pages_all(sbi, false);
2388
	return err;
2389 2390
}

2391 2392 2393 2394 2395 2396 2397
static int f2fs_write_end(struct file *file,
			struct address_space *mapping,
			loff_t pos, unsigned len, unsigned copied,
			struct page *page, void *fsdata)
{
	struct inode *inode = page->mapping->host;

2398 2399
	trace_f2fs_write_end(inode, pos, len, copied);

2400 2401 2402 2403 2404 2405
	/*
	 * This should be come from len == PAGE_SIZE, and we expect copied
	 * should be PAGE_SIZE. Otherwise, we treat it with zero copied and
	 * let generic_perform_write() try to copy data again through copied=0.
	 */
	if (!PageUptodate(page)) {
2406
		if (unlikely(copied != len))
2407 2408 2409 2410 2411 2412 2413
			copied = 0;
		else
			SetPageUptodate(page);
	}
	if (!copied)
		goto unlock_out;

2414
	set_page_dirty(page);
2415

2416 2417
	if (pos + copied > i_size_read(inode))
		f2fs_i_size_write(inode, pos + copied);
2418
unlock_out:
2419
	f2fs_put_page(page, 1);
2420
	f2fs_update_time(F2FS_I_SB(inode), REQ_TIME);
2421 2422 2423
	return copied;
}

2424 2425
static int check_direct_IO(struct inode *inode, struct iov_iter *iter,
			   loff_t offset)
2426
{
2427 2428 2429 2430 2431 2432 2433 2434 2435 2436 2437 2438 2439 2440
	unsigned i_blkbits = READ_ONCE(inode->i_blkbits);
	unsigned blkbits = i_blkbits;
	unsigned blocksize_mask = (1 << blkbits) - 1;
	unsigned long align = offset | iov_iter_alignment(iter);
	struct block_device *bdev = inode->i_sb->s_bdev;

	if (align & blocksize_mask) {
		if (bdev)
			blkbits = blksize_bits(bdev_logical_block_size(bdev));
		blocksize_mask = (1 << blkbits) - 1;
		if (align & blocksize_mask)
			return -EINVAL;
		return 1;
	}
2441 2442 2443
	return 0;
}

2444
static ssize_t f2fs_direct_IO(struct kiocb *iocb, struct iov_iter *iter)
2445
{
2446
	struct address_space *mapping = iocb->ki_filp->f_mapping;
2447
	struct inode *inode = mapping->host;
2448
	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
2449
	size_t count = iov_iter_count(iter);
2450
	loff_t offset = iocb->ki_pos;
2451
	int rw = iov_iter_rw(iter);
2452
	int err;
2453
	enum rw_hint hint = iocb->ki_hint;
2454
	int whint_mode = F2FS_OPTION(sbi).whint_mode;
2455

2456
	err = check_direct_IO(inode, iter, offset);
2457
	if (err)
2458
		return err < 0 ? err : 0;
H
Huajun Li 已提交
2459

H
Hyunchul Lee 已提交
2460
	if (f2fs_force_buffered_io(inode, rw))
2461
		return 0;
2462

2463
	trace_f2fs_direct_IO_enter(inode, offset, count, rw);
2464

2465 2466 2467
	if (rw == WRITE && whint_mode == WHINT_MODE_OFF)
		iocb->ki_hint = WRITE_LIFE_NOT_SET;

C
Chao Yu 已提交
2468
	if (!down_read_trylock(&F2FS_I(inode)->i_gc_rwsem[rw])) {
H
Hyunchul Lee 已提交
2469 2470 2471 2472 2473
		if (iocb->ki_flags & IOCB_NOWAIT) {
			iocb->ki_hint = hint;
			err = -EAGAIN;
			goto out;
		}
C
Chao Yu 已提交
2474
		down_read(&F2FS_I(inode)->i_gc_rwsem[rw]);
H
Hyunchul Lee 已提交
2475 2476
	}

2477
	err = blockdev_direct_IO(iocb, inode, iter, get_data_block_dio);
C
Chao Yu 已提交
2478
	up_read(&F2FS_I(inode)->i_gc_rwsem[rw]);
2479 2480

	if (rw == WRITE) {
2481 2482
		if (whint_mode == WHINT_MODE_OFF)
			iocb->ki_hint = hint;
C
Chao Yu 已提交
2483 2484 2485
		if (err > 0) {
			f2fs_update_iostat(F2FS_I_SB(inode), APP_DIRECT_IO,
									err);
2486
			set_inode_flag(inode, FI_UPDATE_WRITE);
C
Chao Yu 已提交
2487
		} else if (err < 0) {
2488
			f2fs_write_failed(mapping, offset + count);
C
Chao Yu 已提交
2489
		}
2490
	}
2491

H
Hyunchul Lee 已提交
2492
out:
2493
	trace_f2fs_direct_IO_exit(inode, offset, count, rw, err);
2494

2495
	return err;
2496 2497
}

2498 2499
void f2fs_invalidate_page(struct page *page, unsigned int offset,
							unsigned int length)
2500 2501
{
	struct inode *inode = page->mapping->host;
2502
	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
2503

2504
	if (inode->i_ino >= F2FS_ROOT_INO(sbi) &&
2505
		(offset % PAGE_SIZE || length != PAGE_SIZE))
2506 2507
		return;

2508
	if (PageDirty(page)) {
2509
		if (inode->i_ino == F2FS_META_INO(sbi)) {
2510
			dec_page_count(sbi, F2FS_DIRTY_META);
2511
		} else if (inode->i_ino == F2FS_NODE_INO(sbi)) {
2512
			dec_page_count(sbi, F2FS_DIRTY_NODES);
2513
		} else {
2514
			inode_dec_dirty_pages(inode);
C
Chao Yu 已提交
2515
			f2fs_remove_dirty_inode(inode);
2516
		}
2517
	}
C
Chao Yu 已提交
2518 2519 2520

	/* This is atomic written page, keep Private */
	if (IS_ATOMIC_WRITTEN_PAGE(page))
C
Chao Yu 已提交
2521
		return f2fs_drop_inmem_page(inode, page);
C
Chao Yu 已提交
2522

2523
	set_page_private(page, 0);
2524 2525 2526
	ClearPagePrivate(page);
}

2527
int f2fs_release_page(struct page *page, gfp_t wait)
2528
{
2529 2530 2531 2532
	/* If this is dirty page, keep PagePrivate */
	if (PageDirty(page))
		return 0;

C
Chao Yu 已提交
2533 2534 2535 2536
	/* This is atomic written page, keep Private */
	if (IS_ATOMIC_WRITTEN_PAGE(page))
		return 0;

2537
	set_page_private(page, 0);
2538
	ClearPagePrivate(page);
2539
	return 1;
2540 2541 2542 2543 2544 2545 2546
}

static int f2fs_set_data_page_dirty(struct page *page)
{
	struct address_space *mapping = page->mapping;
	struct inode *inode = mapping->host;

2547 2548
	trace_f2fs_set_page_dirty(page, DATA);

2549 2550
	if (!PageUptodate(page))
		SetPageUptodate(page);
2551

2552 2553 2554 2555
	/* don't remain PG_checked flag which was set during GC */
	if (is_cold_data(page))
		clear_cold_data(page);

C
Chao Yu 已提交
2556
	if (f2fs_is_atomic_file(inode) && !f2fs_is_commit_atomic_write(inode)) {
C
Chao Yu 已提交
2557
		if (!IS_ATOMIC_WRITTEN_PAGE(page)) {
C
Chao Yu 已提交
2558
			f2fs_register_inmem_page(inode, page);
C
Chao Yu 已提交
2559 2560 2561 2562 2563 2564 2565
			return 1;
		}
		/*
		 * Previously, this page has been registered, we just
		 * return here.
		 */
		return 0;
2566 2567
	}

2568
	if (!PageDirty(page)) {
2569
		__set_page_dirty_nobuffers(page);
C
Chao Yu 已提交
2570
		f2fs_update_dirty_page(inode, page);
2571 2572 2573 2574 2575
		return 1;
	}
	return 0;
}

J
Jaegeuk Kim 已提交
2576 2577
static sector_t f2fs_bmap(struct address_space *mapping, sector_t block)
{
2578 2579
	struct inode *inode = mapping->host;

J
Jaegeuk Kim 已提交
2580 2581 2582 2583 2584 2585 2586
	if (f2fs_has_inline_data(inode))
		return 0;

	/* make sure allocating whole blocks */
	if (mapping_tagged(mapping, PAGECACHE_TAG_DIRTY))
		filemap_write_and_wait(mapping);

C
Chao Yu 已提交
2587
	return generic_block_bmap(mapping, block, get_data_block_bmap);
2588 2589
}

2590 2591 2592 2593 2594 2595 2596 2597 2598 2599 2600 2601 2602
#ifdef CONFIG_MIGRATION
#include <linux/migrate.h>

int f2fs_migrate_page(struct address_space *mapping,
		struct page *newpage, struct page *page, enum migrate_mode mode)
{
	int rc, extra_count;
	struct f2fs_inode_info *fi = F2FS_I(mapping->host);
	bool atomic_written = IS_ATOMIC_WRITTEN_PAGE(page);

	BUG_ON(PageWriteback(page));

	/* migrating an atomic written page is safe with the inmem_lock hold */
2603 2604 2605 2606 2607 2608
	if (atomic_written) {
		if (mode != MIGRATE_SYNC)
			return -EBUSY;
		if (!mutex_trylock(&fi->inmem_lock))
			return -EAGAIN;
	}
2609 2610 2611 2612 2613 2614 2615 2616 2617 2618 2619 2620 2621 2622 2623 2624 2625 2626 2627 2628 2629 2630 2631 2632 2633 2634 2635 2636 2637 2638 2639

	/*
	 * A reference is expected if PagePrivate set when move mapping,
	 * however F2FS breaks this for maintaining dirty page counts when
	 * truncating pages. So here adjusting the 'extra_count' make it work.
	 */
	extra_count = (atomic_written ? 1 : 0) - page_has_private(page);
	rc = migrate_page_move_mapping(mapping, newpage,
				page, NULL, mode, extra_count);
	if (rc != MIGRATEPAGE_SUCCESS) {
		if (atomic_written)
			mutex_unlock(&fi->inmem_lock);
		return rc;
	}

	if (atomic_written) {
		struct inmem_pages *cur;
		list_for_each_entry(cur, &fi->inmem_pages, list)
			if (cur->page == page) {
				cur->page = newpage;
				break;
			}
		mutex_unlock(&fi->inmem_lock);
		put_page(page);
		get_page(newpage);
	}

	if (PagePrivate(page))
		SetPagePrivate(newpage);
	set_page_private(newpage, page_private(page));

2640 2641 2642 2643
	if (mode != MIGRATE_SYNC_NO_COPY)
		migrate_page_copy(newpage, page);
	else
		migrate_page_states(newpage, page);
2644 2645 2646 2647 2648

	return MIGRATEPAGE_SUCCESS;
}
#endif

2649 2650 2651 2652 2653 2654
const struct address_space_operations f2fs_dblock_aops = {
	.readpage	= f2fs_read_data_page,
	.readpages	= f2fs_read_data_pages,
	.writepage	= f2fs_write_data_page,
	.writepages	= f2fs_write_data_pages,
	.write_begin	= f2fs_write_begin,
2655
	.write_end	= f2fs_write_end,
2656
	.set_page_dirty	= f2fs_set_data_page_dirty,
2657 2658
	.invalidatepage	= f2fs_invalidate_page,
	.releasepage	= f2fs_release_page,
2659
	.direct_IO	= f2fs_direct_IO,
J
Jaegeuk Kim 已提交
2660
	.bmap		= f2fs_bmap,
2661 2662 2663
#ifdef CONFIG_MIGRATION
	.migratepage    = f2fs_migrate_page,
#endif
2664
};
2665

C
Chao Yu 已提交
2666
void f2fs_clear_radix_tree_dirty_tag(struct page *page)
2667 2668 2669 2670 2671 2672 2673 2674 2675 2676
{
	struct address_space *mapping = page_mapping(page);
	unsigned long flags;

	xa_lock_irqsave(&mapping->i_pages, flags);
	radix_tree_tag_clear(&mapping->i_pages, page_index(page),
						PAGECACHE_TAG_DIRTY);
	xa_unlock_irqrestore(&mapping->i_pages, flags);
}

2677 2678 2679 2680 2681 2682 2683 2684 2685 2686 2687 2688 2689 2690 2691 2692 2693 2694 2695 2696 2697 2698 2699
int __init f2fs_init_post_read_processing(void)
{
	bio_post_read_ctx_cache = KMEM_CACHE(bio_post_read_ctx, 0);
	if (!bio_post_read_ctx_cache)
		goto fail;
	bio_post_read_ctx_pool =
		mempool_create_slab_pool(NUM_PREALLOC_POST_READ_CTXS,
					 bio_post_read_ctx_cache);
	if (!bio_post_read_ctx_pool)
		goto fail_free_cache;
	return 0;

fail_free_cache:
	kmem_cache_destroy(bio_post_read_ctx_cache);
fail:
	return -ENOMEM;
}

void __exit f2fs_destroy_post_read_processing(void)
{
	mempool_destroy(bio_post_read_ctx_pool);
	kmem_cache_destroy(bio_post_read_ctx_cache);
}