data.c 65.9 KB
Newer Older
C
Chao Yu 已提交
1
// SPDX-License-Identifier: GPL-2.0
J
Jaegeuk Kim 已提交
2
/*
3 4 5 6 7 8 9 10 11 12 13
 * fs/f2fs/data.c
 *
 * Copyright (c) 2012 Samsung Electronics Co., Ltd.
 *             http://www.samsung.com/
 */
#include <linux/fs.h>
#include <linux/f2fs_fs.h>
#include <linux/buffer_head.h>
#include <linux/mpage.h>
#include <linux/writeback.h>
#include <linux/backing-dev.h>
C
Chao Yu 已提交
14
#include <linux/pagevec.h>
15 16
#include <linux/blkdev.h>
#include <linux/bio.h>
17
#include <linux/prefetch.h>
18
#include <linux/uio.h>
J
Jaegeuk Kim 已提交
19
#include <linux/cleancache.h>
20
#include <linux/sched/signal.h>
21 22 23 24

#include "f2fs.h"
#include "node.h"
#include "segment.h"
J
Jaegeuk Kim 已提交
25
#include "trace.h"
26
#include <trace/events/f2fs.h>
27

28 29 30 31 32
#define NUM_PREALLOC_POST_READ_CTXS	128

static struct kmem_cache *bio_post_read_ctx_cache;
static mempool_t *bio_post_read_ctx_pool;

33 34 35 36 37 38 39 40 41 42 43 44 45 46 47
static bool __is_cp_guaranteed(struct page *page)
{
	struct address_space *mapping = page->mapping;
	struct inode *inode;
	struct f2fs_sb_info *sbi;

	if (!mapping)
		return false;

	inode = mapping->host;
	sbi = F2FS_I_SB(inode);

	if (inode->i_ino == F2FS_META_INO(sbi) ||
			inode->i_ino ==  F2FS_NODE_INO(sbi) ||
			S_ISDIR(inode->i_mode) ||
48 49
			(S_ISREG(inode->i_mode) &&
			is_inode_flag_set(inode, FI_ATOMIC_FILE)) ||
50 51 52 53 54
			is_cold_data(page))
		return true;
	return false;
}

55 56 57 58 59 60 61 62 63 64 65 66 67 68
/* postprocessing steps for read bios */
enum bio_post_read_step {
	STEP_INITIAL = 0,
	STEP_DECRYPT,
};

struct bio_post_read_ctx {
	struct bio *bio;
	struct work_struct work;
	unsigned int cur_step;
	unsigned int enabled_steps;
};

static void __read_end_io(struct bio *bio)
69
{
70 71
	struct page *page;
	struct bio_vec *bv;
72
	int i;
73

74 75 76 77 78 79
	bio_for_each_segment_all(bv, bio, i) {
		page = bv->bv_page;

		/* PG_error was set if any post_read step failed */
		if (bio->bi_status || PageError(page)) {
			ClearPageUptodate(page);
80 81
			/* will re-read again later */
			ClearPageError(page);
82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126
		} else {
			SetPageUptodate(page);
		}
		unlock_page(page);
	}
	if (bio->bi_private)
		mempool_free(bio->bi_private, bio_post_read_ctx_pool);
	bio_put(bio);
}

static void bio_post_read_processing(struct bio_post_read_ctx *ctx);

static void decrypt_work(struct work_struct *work)
{
	struct bio_post_read_ctx *ctx =
		container_of(work, struct bio_post_read_ctx, work);

	fscrypt_decrypt_bio(ctx->bio);

	bio_post_read_processing(ctx);
}

static void bio_post_read_processing(struct bio_post_read_ctx *ctx)
{
	switch (++ctx->cur_step) {
	case STEP_DECRYPT:
		if (ctx->enabled_steps & (1 << STEP_DECRYPT)) {
			INIT_WORK(&ctx->work, decrypt_work);
			fscrypt_enqueue_decrypt_work(&ctx->work);
			return;
		}
		ctx->cur_step++;
		/* fall-through */
	default:
		__read_end_io(ctx->bio);
	}
}

static bool f2fs_bio_post_read_required(struct bio *bio)
{
	return bio->bi_private && !bio->bi_status;
}

static void f2fs_read_end_io(struct bio *bio)
{
127 128 129
	if (time_to_inject(F2FS_P_SB(bio_first_page_all(bio)),
						FAULT_READ_IO)) {
		f2fs_show_injection_info(FAULT_READ_IO);
130
		bio->bi_status = BLK_STS_IOERR;
131
	}
C
Chao Yu 已提交
132

133 134
	if (f2fs_bio_post_read_required(bio)) {
		struct bio_post_read_ctx *ctx = bio->bi_private;
J
Jaegeuk Kim 已提交
135

136 137 138
		ctx->cur_step = STEP_INITIAL;
		bio_post_read_processing(ctx);
		return;
J
Jaegeuk Kim 已提交
139
	}
140 141

	__read_end_io(bio);
J
Jaegeuk Kim 已提交
142 143
}

144
static void f2fs_write_end_io(struct bio *bio)
145
{
146
	struct f2fs_sb_info *sbi = bio->bi_private;
147 148
	struct bio_vec *bvec;
	int i;
149

150 151 152 153 154
	if (time_to_inject(sbi, FAULT_WRITE_IO)) {
		f2fs_show_injection_info(FAULT_WRITE_IO);
		bio->bi_status = BLK_STS_IOERR;
	}

155
	bio_for_each_segment_all(bvec, bio, i) {
156
		struct page *page = bvec->bv_page;
157
		enum count_type type = WB_DATA_TYPE(page);
158

159 160 161 162 163 164
		if (IS_DUMMY_WRITTEN_PAGE(page)) {
			set_page_private(page, (unsigned long)NULL);
			ClearPagePrivate(page);
			unlock_page(page);
			mempool_free(page, sbi->write_io_dummy);

165
			if (unlikely(bio->bi_status))
166 167 168 169
				f2fs_stop_checkpoint(sbi, true);
			continue;
		}

170
		fscrypt_pullback_bio_page(&page, true);
171

172
		if (unlikely(bio->bi_status)) {
173
			mapping_set_error(page->mapping, -EIO);
174 175
			if (type == F2FS_WB_CP_DATA)
				f2fs_stop_checkpoint(sbi, true);
176
		}
177 178 179 180

		f2fs_bug_on(sbi, page->mapping == NODE_MAPPING(sbi) &&
					page->index != nid_of_node(page));

181
		dec_page_count(sbi, type);
182 183
		if (f2fs_in_warm_node_list(sbi, page))
			f2fs_del_fsync_node_entry(sbi, page);
184
		clear_cold_data(page);
185
		end_page_writeback(page);
186
	}
187
	if (!get_pages(sbi, F2FS_WB_CP_DATA) &&
188
				wq_has_sleeper(&sbi->cp_wait))
189 190 191 192 193
		wake_up(&sbi->cp_wait);

	bio_put(bio);
}

J
Jaegeuk Kim 已提交
194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211
/*
 * Return true, if pre_bio's bdev is same as its target device.
 */
struct block_device *f2fs_target_device(struct f2fs_sb_info *sbi,
				block_t blk_addr, struct bio *bio)
{
	struct block_device *bdev = sbi->sb->s_bdev;
	int i;

	for (i = 0; i < sbi->s_ndevs; i++) {
		if (FDEV(i).start_blk <= blk_addr &&
					FDEV(i).end_blk >= blk_addr) {
			blk_addr -= FDEV(i).start_blk;
			bdev = FDEV(i).bdev;
			break;
		}
	}
	if (bio) {
212
		bio_set_dev(bio, bdev);
J
Jaegeuk Kim 已提交
213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230
		bio->bi_iter.bi_sector = SECTOR_FROM_BLOCK(blk_addr);
	}
	return bdev;
}

int f2fs_target_device_index(struct f2fs_sb_info *sbi, block_t blkaddr)
{
	int i;

	for (i = 0; i < sbi->s_ndevs; i++)
		if (FDEV(i).start_blk <= blkaddr && FDEV(i).end_blk >= blkaddr)
			return i;
	return 0;
}

static bool __same_bdev(struct f2fs_sb_info *sbi,
				block_t blk_addr, struct bio *bio)
{
231 232
	struct block_device *b = f2fs_target_device(sbi, blk_addr, NULL);
	return bio->bi_disk == b->bd_disk && bio->bi_partno == b->bd_partno;
J
Jaegeuk Kim 已提交
233 234
}

235 236 237 238
/*
 * Low-level block read/write IO operations.
 */
static struct bio *__bio_alloc(struct f2fs_sb_info *sbi, block_t blk_addr,
239
				struct writeback_control *wbc,
240 241
				int npages, bool is_read,
				enum page_type type, enum temp_type temp)
242 243 244
{
	struct bio *bio;

245
	bio = f2fs_bio_alloc(sbi, npages, true);
246

J
Jaegeuk Kim 已提交
247
	f2fs_target_device(sbi, blk_addr, bio);
248 249 250 251 252 253
	if (is_read) {
		bio->bi_end_io = f2fs_read_end_io;
		bio->bi_private = NULL;
	} else {
		bio->bi_end_io = f2fs_write_end_io;
		bio->bi_private = sbi;
C
Chao Yu 已提交
254
		bio->bi_write_hint = f2fs_io_type_to_rw_hint(sbi, type, temp);
255
	}
256 257
	if (wbc)
		wbc_init_bio(wbc, bio);
258 259 260 261

	return bio;
}

262 263
static inline void __submit_bio(struct f2fs_sb_info *sbi,
				struct bio *bio, enum page_type type)
264
{
265
	if (!is_read_io(bio_op(bio))) {
266 267 268 269 270
		unsigned int start;

		if (type != DATA && type != NODE)
			goto submit_io;

271
		if (test_opt(sbi, LFS) && current->plug)
272 273
			blk_finish_plug(current->plug);

274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298
		start = bio->bi_iter.bi_size >> F2FS_BLKSIZE_BITS;
		start %= F2FS_IO_SIZE(sbi);

		if (start == 0)
			goto submit_io;

		/* fill dummy pages */
		for (; start < F2FS_IO_SIZE(sbi); start++) {
			struct page *page =
				mempool_alloc(sbi->write_io_dummy,
					GFP_NOIO | __GFP_ZERO | __GFP_NOFAIL);
			f2fs_bug_on(sbi, !page);

			SetPagePrivate(page);
			set_page_private(page, (unsigned long)DUMMY_WRITTEN_PAGE);
			lock_page(page);
			if (bio_add_page(bio, page, PAGE_SIZE, 0) < PAGE_SIZE)
				f2fs_bug_on(sbi, 1);
		}
		/*
		 * In the NODE case, we lose next block address chain. So, we
		 * need to do checkpoint in f2fs_sync_file.
		 */
		if (type == NODE)
			set_sbi_flag(sbi, SBI_NEED_CP);
J
Jaegeuk Kim 已提交
299
	}
300
submit_io:
J
Jaegeuk Kim 已提交
301 302 303 304
	if (is_read_io(bio_op(bio)))
		trace_f2fs_submit_read_bio(sbi->sb, type, bio);
	else
		trace_f2fs_submit_write_bio(sbi->sb, type, bio);
305
	submit_bio(bio);
306 307
}

J
Jaegeuk Kim 已提交
308
static void __submit_merged_bio(struct f2fs_bio_info *io)
309
{
J
Jaegeuk Kim 已提交
310
	struct f2fs_io_info *fio = &io->fio;
311 312 313 314

	if (!io->bio)
		return;

J
Jaegeuk Kim 已提交
315 316
	bio_set_op_attrs(io->bio, fio->op, fio->op_flags);

M
Mike Christie 已提交
317
	if (is_read_io(fio->op))
J
Jaegeuk Kim 已提交
318
		trace_f2fs_prepare_read_bio(io->sbi->sb, fio->type, io->bio);
319
	else
J
Jaegeuk Kim 已提交
320
		trace_f2fs_prepare_write_bio(io->sbi->sb, fio->type, io->bio);
M
Mike Christie 已提交
321

322
	__submit_bio(io->sbi, io->bio, fio->type);
323 324 325
	io->bio = NULL;
}

326 327
static bool __has_merged_page(struct f2fs_bio_info *io, struct inode *inode,
						struct page *page, nid_t ino)
C
Chao Yu 已提交
328 329 330 331 332
{
	struct bio_vec *bvec;
	struct page *target;
	int i;

333
	if (!io->bio)
C
Chao Yu 已提交
334
		return false;
335

336
	if (!inode && !page && !ino)
337
		return true;
C
Chao Yu 已提交
338 339 340

	bio_for_each_segment_all(bvec, io->bio, i) {

341
		if (bvec->bv_page->mapping)
C
Chao Yu 已提交
342
			target = bvec->bv_page;
343 344
		else
			target = fscrypt_control_page(bvec->bv_page);
C
Chao Yu 已提交
345

346 347
		if (inode && inode == target->mapping->host)
			return true;
348 349
		if (page && page == target)
			return true;
350
		if (ino && ino == ino_of_node(target))
C
Chao Yu 已提交
351 352 353 354 355 356
			return true;
	}

	return false;
}

357
static bool has_merged_page(struct f2fs_sb_info *sbi, struct inode *inode,
358 359
						struct page *page, nid_t ino,
						enum page_type type)
360 361
{
	enum page_type btype = PAGE_TYPE_OF_BIO(type);
J
Jaegeuk Kim 已提交
362 363 364
	enum temp_type temp;
	struct f2fs_bio_info *io;
	bool ret = false;
365

J
Jaegeuk Kim 已提交
366 367 368 369
	for (temp = HOT; temp < NR_TEMP_TYPE; temp++) {
		io = sbi->write_io[btype] + temp;

		down_read(&io->io_rwsem);
370
		ret = __has_merged_page(io, inode, page, ino);
J
Jaegeuk Kim 已提交
371
		up_read(&io->io_rwsem);
372

J
Jaegeuk Kim 已提交
373 374 375 376
		/* TODO: use HOT temp only for meta pages now. */
		if (ret || btype == META)
			break;
	}
377 378 379
	return ret;
}

380
static void __f2fs_submit_merged_write(struct f2fs_sb_info *sbi,
J
Jaegeuk Kim 已提交
381
				enum page_type type, enum temp_type temp)
382 383
{
	enum page_type btype = PAGE_TYPE_OF_BIO(type);
J
Jaegeuk Kim 已提交
384
	struct f2fs_bio_info *io = sbi->write_io[btype] + temp;
385

386
	down_write(&io->io_rwsem);
J
Jaegeuk Kim 已提交
387 388 389 390

	/* change META to META_FLUSH in the checkpoint procedure */
	if (type >= META_FLUSH) {
		io->fio.type = META_FLUSH;
M
Mike Christie 已提交
391
		io->fio.op = REQ_OP_WRITE;
392
		io->fio.op_flags = REQ_META | REQ_PRIO | REQ_SYNC;
393
		if (!test_opt(sbi, NOBARRIER))
394
			io->fio.op_flags |= REQ_PREFLUSH | REQ_FUA;
J
Jaegeuk Kim 已提交
395 396
	}
	__submit_merged_bio(io);
397
	up_write(&io->io_rwsem);
398 399
}

J
Jaegeuk Kim 已提交
400
static void __submit_merged_write_cond(struct f2fs_sb_info *sbi,
401 402
				struct inode *inode, struct page *page,
				nid_t ino, enum page_type type, bool force)
403
{
J
Jaegeuk Kim 已提交
404 405
	enum temp_type temp;

406
	if (!force && !has_merged_page(sbi, inode, page, ino, type))
J
Jaegeuk Kim 已提交
407 408 409 410 411 412 413 414 415 416
		return;

	for (temp = HOT; temp < NR_TEMP_TYPE; temp++) {

		__f2fs_submit_merged_write(sbi, type, temp);

		/* TODO: use HOT temp only for meta pages now. */
		if (type >= META)
			break;
	}
417 418
}

419
void f2fs_submit_merged_write(struct f2fs_sb_info *sbi, enum page_type type)
420
{
J
Jaegeuk Kim 已提交
421
	__submit_merged_write_cond(sbi, NULL, 0, 0, type, true);
422 423
}

424
void f2fs_submit_merged_write_cond(struct f2fs_sb_info *sbi,
425 426
				struct inode *inode, struct page *page,
				nid_t ino, enum page_type type)
427
{
428
	__submit_merged_write_cond(sbi, inode, page, ino, type, false);
429 430
}

431
void f2fs_flush_merged_writes(struct f2fs_sb_info *sbi)
432
{
433 434 435
	f2fs_submit_merged_write(sbi, DATA);
	f2fs_submit_merged_write(sbi, NODE);
	f2fs_submit_merged_write(sbi, META);
436 437
}

438 439
/*
 * Fill the locked page with data located in the block address.
440
 * A caller needs to unlock the page on failure.
441
 */
442
int f2fs_submit_page_bio(struct f2fs_io_info *fio)
443 444
{
	struct bio *bio;
445 446
	struct page *page = fio->encrypted_page ?
			fio->encrypted_page : fio->page;
447

448 449 450 451
	if (!f2fs_is_valid_blkaddr(fio->sbi, fio->new_blkaddr,
			__is_meta_io(fio) ? META_GENERIC : DATA_GENERIC))
		return -EFAULT;

452
	trace_f2fs_submit_page_bio(page, fio);
453
	f2fs_trace_ios(fio, 0);
454 455

	/* Allocate a new bio */
456
	bio = __bio_alloc(fio->sbi, fio->new_blkaddr, fio->io_wbc,
457
				1, is_read_io(fio->op), fio->type, fio->temp);
458

459
	if (bio_add_page(bio, page, PAGE_SIZE, 0) < PAGE_SIZE) {
460 461 462
		bio_put(bio);
		return -EFAULT;
	}
M
Mike Christie 已提交
463
	bio_set_op_attrs(bio, fio->op, fio->op_flags);
464

465
	__submit_bio(fio->sbi, bio, fio->type);
466 467 468

	if (!is_read_io(fio->op))
		inc_page_count(fio->sbi, WB_DATA_TYPE(fio->page));
469 470 471
	return 0;
}

472
void f2fs_submit_page_write(struct f2fs_io_info *fio)
473
{
474
	struct f2fs_sb_info *sbi = fio->sbi;
J
Jaegeuk Kim 已提交
475
	enum page_type btype = PAGE_TYPE_OF_BIO(fio->type);
J
Jaegeuk Kim 已提交
476
	struct f2fs_bio_info *io = sbi->write_io[btype] + fio->temp;
477
	struct page *bio_page;
478

479
	f2fs_bug_on(sbi, is_read_io(fio->op));
480

481 482 483 484 485 486
	down_write(&io->io_rwsem);
next:
	if (fio->in_list) {
		spin_lock(&io->io_lock);
		if (list_empty(&io->io_list)) {
			spin_unlock(&io->io_lock);
487
			goto out;
488 489 490 491 492 493
		}
		fio = list_first_entry(&io->io_list,
						struct f2fs_io_info, list);
		list_del(&fio->list);
		spin_unlock(&io->io_lock);
	}
494

495
	if (__is_valid_data_blkaddr(fio->old_blkaddr))
496 497
		verify_block_addr(fio, fio->old_blkaddr);
	verify_block_addr(fio, fio->new_blkaddr);
498

499 500
	bio_page = fio->encrypted_page ? fio->encrypted_page : fio->page;

501 502
	/* set submitted = true as a return value */
	fio->submitted = true;
503

504
	inc_page_count(sbi, WB_DATA_TYPE(bio_page));
505

506
	if (io->bio && (io->last_block_in_bio != fio->new_blkaddr - 1 ||
J
Jaegeuk Kim 已提交
507 508
	    (io->fio.op != fio->op || io->fio.op_flags != fio->op_flags) ||
			!__same_bdev(sbi, fio->new_blkaddr, io->bio)))
J
Jaegeuk Kim 已提交
509
		__submit_merged_bio(io);
510 511
alloc_new:
	if (io->bio == NULL) {
512 513
		if ((fio->type == DATA || fio->type == NODE) &&
				fio->new_blkaddr & F2FS_IO_SIZE_MASK(sbi)) {
514
			dec_page_count(sbi, WB_DATA_TYPE(bio_page));
515 516
			fio->retry = true;
			goto skip;
517
		}
518
		io->bio = __bio_alloc(sbi, fio->new_blkaddr, fio->io_wbc,
519 520
						BIO_MAX_PAGES, false,
						fio->type, fio->temp);
J
Jaegeuk Kim 已提交
521
		io->fio = *fio;
522 523
	}

J
Jaegeuk Kim 已提交
524
	if (bio_add_page(io->bio, bio_page, PAGE_SIZE, 0) < PAGE_SIZE) {
J
Jaegeuk Kim 已提交
525
		__submit_merged_bio(io);
526 527 528
		goto alloc_new;
	}

529 530 531
	if (fio->io_wbc)
		wbc_account_io(fio->io_wbc, bio_page, PAGE_SIZE);

532
	io->last_block_in_bio = fio->new_blkaddr;
533
	f2fs_trace_ios(fio, 0);
534 535

	trace_f2fs_submit_page_write(fio->page, fio);
536
skip:
537 538
	if (fio->in_list)
		goto next;
539
out:
J
Jaegeuk Kim 已提交
540 541
	if (is_sbi_flag_set(sbi, SBI_IS_SHUTDOWN))
		__submit_merged_bio(io);
542
	up_write(&io->io_rwsem);
543 544
}

545
static struct bio *f2fs_grab_read_bio(struct inode *inode, block_t blkaddr,
546
					unsigned nr_pages, unsigned op_flag)
547 548 549
{
	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
	struct bio *bio;
550 551
	struct bio_post_read_ctx *ctx;
	unsigned int post_read_steps = 0;
552

553 554 555
	if (!f2fs_is_valid_blkaddr(sbi, blkaddr, DATA_GENERIC))
		return ERR_PTR(-EFAULT);

556
	bio = f2fs_bio_alloc(sbi, min_t(int, nr_pages, BIO_MAX_PAGES), false);
557
	if (!bio)
558 559 560
		return ERR_PTR(-ENOMEM);
	f2fs_target_device(sbi, blkaddr, bio);
	bio->bi_end_io = f2fs_read_end_io;
561
	bio_set_op_attrs(bio, REQ_OP_READ, op_flag);
562

563 564 565 566 567 568 569 570 571 572 573 574 575
	if (f2fs_encrypted_file(inode))
		post_read_steps |= 1 << STEP_DECRYPT;
	if (post_read_steps) {
		ctx = mempool_alloc(bio_post_read_ctx_pool, GFP_NOFS);
		if (!ctx) {
			bio_put(bio);
			return ERR_PTR(-ENOMEM);
		}
		ctx->bio = bio;
		ctx->enabled_steps = post_read_steps;
		bio->bi_private = ctx;
	}

576 577 578 579 580 581 582
	return bio;
}

/* This can handle encryption stuffs */
static int f2fs_submit_page_read(struct inode *inode, struct page *page,
							block_t blkaddr)
{
583
	struct bio *bio = f2fs_grab_read_bio(inode, blkaddr, 1, 0);
584 585 586 587

	if (IS_ERR(bio))
		return PTR_ERR(bio);

588 589 590
	/* wait for GCed page writeback via META_MAPPING */
	f2fs_wait_on_block_writeback(inode, blkaddr);

591 592 593 594
	if (bio_add_page(bio, page, PAGE_SIZE, 0) < PAGE_SIZE) {
		bio_put(bio);
		return -EFAULT;
	}
595
	ClearPageError(page);
596 597 598 599
	__submit_bio(F2FS_I_SB(inode), bio, DATA);
	return 0;
}

600 601 602 603
static void __set_data_blkaddr(struct dnode_of_data *dn)
{
	struct f2fs_node *rn = F2FS_NODE(dn->node_page);
	__le32 *addr_array;
604 605 606 607
	int base = 0;

	if (IS_INODE(dn->node_page) && f2fs_has_extra_attr(dn->inode))
		base = get_extra_isize(dn->inode);
608 609 610

	/* Get physical address of data block */
	addr_array = blkaddr_in_node(rn);
611
	addr_array[base + dn->ofs_in_node] = cpu_to_le32(dn->data_blkaddr);
612 613
}

J
Jaegeuk Kim 已提交
614
/*
615 616 617 618 619
 * Lock ordering for the change of data block address:
 * ->data_page
 *  ->node_page
 *    update block addresses in the node page
 */
C
Chao Yu 已提交
620
void f2fs_set_data_blkaddr(struct dnode_of_data *dn)
621
{
622 623 624
	f2fs_wait_on_page_writeback(dn->node_page, NODE, true);
	__set_data_blkaddr(dn);
	if (set_page_dirty(dn->node_page))
625
		dn->node_changed = true;
626 627
}

628 629 630
void f2fs_update_data_blkaddr(struct dnode_of_data *dn, block_t blkaddr)
{
	dn->data_blkaddr = blkaddr;
C
Chao Yu 已提交
631
	f2fs_set_data_blkaddr(dn);
632 633 634
	f2fs_update_extent_cache(dn);
}

635
/* dn->ofs_in_node will be returned with up-to-date last block pointer */
C
Chao Yu 已提交
636
int f2fs_reserve_new_blocks(struct dnode_of_data *dn, blkcnt_t count)
637
{
638
	struct f2fs_sb_info *sbi = F2FS_I_SB(dn->inode);
C
Chao Yu 已提交
639
	int err;
640

641 642 643
	if (!count)
		return 0;

644
	if (unlikely(is_inode_flag_set(dn->inode, FI_NO_ALLOC)))
645
		return -EPERM;
C
Chao Yu 已提交
646 647
	if (unlikely((err = inc_valid_block_count(sbi, dn->inode, &count))))
		return err;
648

649 650 651 652 653 654
	trace_f2fs_reserve_new_blocks(dn->inode, dn->nid,
						dn->ofs_in_node, count);

	f2fs_wait_on_page_writeback(dn->node_page, NODE, true);

	for (; count > 0; dn->ofs_in_node++) {
655 656
		block_t blkaddr = datablock_addr(dn->inode,
					dn->node_page, dn->ofs_in_node);
657 658 659 660 661 662 663 664 665
		if (blkaddr == NULL_ADDR) {
			dn->data_blkaddr = NEW_ADDR;
			__set_data_blkaddr(dn);
			count--;
		}
	}

	if (set_page_dirty(dn->node_page))
		dn->node_changed = true;
666 667 668
	return 0;
}

669
/* Should keep dn->ofs_in_node unchanged */
C
Chao Yu 已提交
670
int f2fs_reserve_new_block(struct dnode_of_data *dn)
671 672 673 674
{
	unsigned int ofs_in_node = dn->ofs_in_node;
	int ret;

C
Chao Yu 已提交
675
	ret = f2fs_reserve_new_blocks(dn, 1);
676 677 678 679
	dn->ofs_in_node = ofs_in_node;
	return ret;
}

680 681 682 683 684
int f2fs_reserve_block(struct dnode_of_data *dn, pgoff_t index)
{
	bool need_put = dn->inode_page ? false : true;
	int err;

C
Chao Yu 已提交
685
	err = f2fs_get_dnode_of_data(dn, index, ALLOC_NODE);
686 687
	if (err)
		return err;
688

689
	if (dn->data_blkaddr == NULL_ADDR)
C
Chao Yu 已提交
690
		err = f2fs_reserve_new_block(dn);
691
	if (err || need_put)
692 693 694 695
		f2fs_put_dnode(dn);
	return err;
}

696
int f2fs_get_block(struct dnode_of_data *dn, pgoff_t index)
697
{
698
	struct extent_info ei  = {0,0,0};
699
	struct inode *inode = dn->inode;
700

701 702 703
	if (f2fs_lookup_extent_cache(inode, index, &ei)) {
		dn->data_blkaddr = ei.blk + index - ei.fofs;
		return 0;
704
	}
705

706
	return f2fs_reserve_block(dn, index);
707 708
}

C
Chao Yu 已提交
709
struct page *f2fs_get_read_data_page(struct inode *inode, pgoff_t index,
M
Mike Christie 已提交
710
						int op_flags, bool for_write)
711 712 713 714
{
	struct address_space *mapping = inode->i_mapping;
	struct dnode_of_data dn;
	struct page *page;
715
	struct extent_info ei = {0,0,0};
716
	int err;
717

718
	page = f2fs_grab_cache_page(mapping, index, for_write);
719 720 721
	if (!page)
		return ERR_PTR(-ENOMEM);

C
Chao Yu 已提交
722 723 724 725 726
	if (f2fs_lookup_extent_cache(inode, index, &ei)) {
		dn.data_blkaddr = ei.blk + index - ei.fofs;
		goto got_it;
	}

727
	set_new_dnode(&dn, inode, NULL, NULL, 0);
C
Chao Yu 已提交
728
	err = f2fs_get_dnode_of_data(&dn, index, LOOKUP_NODE);
729 730
	if (err)
		goto put_err;
731 732
	f2fs_put_dnode(&dn);

733
	if (unlikely(dn.data_blkaddr == NULL_ADDR)) {
734 735
		err = -ENOENT;
		goto put_err;
736
	}
C
Chao Yu 已提交
737
got_it:
738 739
	if (PageUptodate(page)) {
		unlock_page(page);
740
		return page;
741
	}
742

J
Jaegeuk Kim 已提交
743 744 745 746
	/*
	 * A new dentry page is allocated but not able to be written, since its
	 * new inode page couldn't be allocated due to -ENOSPC.
	 * In such the case, its blkaddr can be remained as NEW_ADDR.
C
Chao Yu 已提交
747 748
	 * see, f2fs_add_link -> f2fs_get_new_data_page ->
	 * f2fs_init_inode_metadata.
J
Jaegeuk Kim 已提交
749 750
	 */
	if (dn.data_blkaddr == NEW_ADDR) {
751
		zero_user_segment(page, 0, PAGE_SIZE);
752 753
		if (!PageUptodate(page))
			SetPageUptodate(page);
754
		unlock_page(page);
J
Jaegeuk Kim 已提交
755 756
		return page;
	}
757

758
	err = f2fs_submit_page_read(inode, page, dn.data_blkaddr);
759
	if (err)
760
		goto put_err;
761
	return page;
762 763 764 765

put_err:
	f2fs_put_page(page, 1);
	return ERR_PTR(err);
766 767
}

C
Chao Yu 已提交
768
struct page *f2fs_find_data_page(struct inode *inode, pgoff_t index)
769 770 771 772 773 774 775 776 777
{
	struct address_space *mapping = inode->i_mapping;
	struct page *page;

	page = find_get_page(mapping, index);
	if (page && PageUptodate(page))
		return page;
	f2fs_put_page(page, 0);

C
Chao Yu 已提交
778
	page = f2fs_get_read_data_page(inode, index, 0, false);
779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797
	if (IS_ERR(page))
		return page;

	if (PageUptodate(page))
		return page;

	wait_on_page_locked(page);
	if (unlikely(!PageUptodate(page))) {
		f2fs_put_page(page, 0);
		return ERR_PTR(-EIO);
	}
	return page;
}

/*
 * If it tries to access a hole, return an error.
 * Because, the callers, functions in dir.c and GC, should be able to know
 * whether this page exists or not.
 */
C
Chao Yu 已提交
798
struct page *f2fs_get_lock_data_page(struct inode *inode, pgoff_t index,
799
							bool for_write)
800 801 802 803
{
	struct address_space *mapping = inode->i_mapping;
	struct page *page;
repeat:
C
Chao Yu 已提交
804
	page = f2fs_get_read_data_page(inode, index, 0, for_write);
805 806
	if (IS_ERR(page))
		return page;
807

808
	/* wait for read completion */
809
	lock_page(page);
810
	if (unlikely(page->mapping != mapping)) {
811 812
		f2fs_put_page(page, 1);
		goto repeat;
813
	}
814 815 816 817
	if (unlikely(!PageUptodate(page))) {
		f2fs_put_page(page, 1);
		return ERR_PTR(-EIO);
	}
818 819 820
	return page;
}

J
Jaegeuk Kim 已提交
821
/*
822 823
 * Caller ensures that this data page is never allocated.
 * A new zero-filled data page is allocated in the page cache.
824
 *
C
Chao Yu 已提交
825 826
 * Also, caller should grab and release a rwsem by calling f2fs_lock_op() and
 * f2fs_unlock_op().
827 828
 * Note that, ipage is set only by make_empty_dir, and if any error occur,
 * ipage should be released by this function.
829
 */
C
Chao Yu 已提交
830
struct page *f2fs_get_new_data_page(struct inode *inode,
831
		struct page *ipage, pgoff_t index, bool new_i_size)
832 833 834 835 836
{
	struct address_space *mapping = inode->i_mapping;
	struct page *page;
	struct dnode_of_data dn;
	int err;
837

838
	page = f2fs_grab_cache_page(mapping, index, true);
839 840 841 842 843 844
	if (!page) {
		/*
		 * before exiting, we should make sure ipage will be released
		 * if any error occur.
		 */
		f2fs_put_page(ipage, 1);
845
		return ERR_PTR(-ENOMEM);
846
	}
847

848
	set_new_dnode(&dn, inode, ipage, NULL, 0);
849
	err = f2fs_reserve_block(&dn, index);
850 851
	if (err) {
		f2fs_put_page(page, 1);
852
		return ERR_PTR(err);
853
	}
854 855
	if (!ipage)
		f2fs_put_dnode(&dn);
856 857

	if (PageUptodate(page))
858
		goto got_it;
859 860

	if (dn.data_blkaddr == NEW_ADDR) {
861
		zero_user_segment(page, 0, PAGE_SIZE);
862 863
		if (!PageUptodate(page))
			SetPageUptodate(page);
864
	} else {
865
		f2fs_put_page(page, 1);
866

867 868
		/* if ipage exists, blkaddr should be NEW_ADDR */
		f2fs_bug_on(F2FS_I_SB(inode), ipage);
C
Chao Yu 已提交
869
		page = f2fs_get_lock_data_page(inode, index, true);
870
		if (IS_ERR(page))
871
			return page;
872
	}
873
got_it:
C
Chao Yu 已提交
874
	if (new_i_size && i_size_read(inode) <
875
				((loff_t)(index + 1) << PAGE_SHIFT))
876
		f2fs_i_size_write(inode, ((loff_t)(index + 1) << PAGE_SHIFT));
877 878 879
	return page;
}

880
static int __allocate_data_block(struct dnode_of_data *dn, int seg_type)
881
{
882
	struct f2fs_sb_info *sbi = F2FS_I_SB(dn->inode);
883 884
	struct f2fs_summary sum;
	struct node_info ni;
885
	block_t old_blkaddr;
886
	blkcnt_t count = 1;
C
Chao Yu 已提交
887
	int err;
888

889
	if (unlikely(is_inode_flag_set(dn->inode, FI_NO_ALLOC)))
890
		return -EPERM;
891

892 893 894 895
	err = f2fs_get_node_info(sbi, dn->nid, &ni);
	if (err)
		return err;

896 897
	dn->data_blkaddr = datablock_addr(dn->inode,
				dn->node_page, dn->ofs_in_node);
898
	if (dn->data_blkaddr != NULL_ADDR)
899 900
		goto alloc;

C
Chao Yu 已提交
901 902
	if (unlikely((err = inc_valid_block_count(sbi, dn->inode, &count))))
		return err;
903

904
alloc:
905
	set_summary(&sum, dn->nid, dn->ofs_in_node, ni.version);
906 907
	old_blkaddr = dn->data_blkaddr;
	f2fs_allocate_data_block(sbi, NULL, old_blkaddr, &dn->data_blkaddr,
908
					&sum, seg_type, NULL, false);
909 910 911
	if (GET_SEGNO(sbi, old_blkaddr) != NULL_SEGNO)
		invalidate_mapping_pages(META_MAPPING(sbi),
					old_blkaddr, old_blkaddr);
C
Chao Yu 已提交
912
	f2fs_set_data_blkaddr(dn);
913

914 915 916 917
	/*
	 * i_size will be updated by direct_IO. Otherwise, we'll get stale
	 * data from unwritten block via dio_read.
	 */
918 919 920
	return 0;
}

921
int f2fs_preallocate_blocks(struct kiocb *iocb, struct iov_iter *from)
922
{
923
	struct inode *inode = file_inode(iocb->ki_filp);
C
Chao Yu 已提交
924
	struct f2fs_map_blocks map;
925
	int flag;
926
	int err = 0;
927
	bool direct_io = iocb->ki_flags & IOCB_DIRECT;
928

929
	/* convert inline data for Direct I/O*/
930
	if (direct_io) {
931 932 933 934 935
		err = f2fs_convert_inline_inode(inode);
		if (err)
			return err;
	}

936 937 938
	if (is_inode_flag_set(inode, FI_NO_PREALLOC))
		return 0;

939
	map.m_lblk = F2FS_BLK_ALIGN(iocb->ki_pos);
940 941 942 943 944 945
	map.m_len = F2FS_BYTES_TO_BLK(iocb->ki_pos + iov_iter_count(from));
	if (map.m_len > map.m_lblk)
		map.m_len -= map.m_lblk;
	else
		map.m_len = 0;

946
	map.m_next_pgofs = NULL;
947
	map.m_next_extent = NULL;
948
	map.m_seg_type = NO_CHECK_TYPE;
949

950
	if (direct_io) {
C
Chao Yu 已提交
951
		map.m_seg_type = f2fs_rw_hint_to_seg_type(iocb->ki_hint);
952
		flag = f2fs_force_buffered_io(inode, iocb, from) ?
953 954 955
					F2FS_GET_BLOCK_PRE_AIO :
					F2FS_GET_BLOCK_PRE_DIO;
		goto map_blocks;
956
	}
C
Chao Yu 已提交
957
	if (iocb->ki_pos + iov_iter_count(from) > MAX_INLINE_DATA(inode)) {
958 959 960
		err = f2fs_convert_inline_inode(inode);
		if (err)
			return err;
961
	}
962
	if (f2fs_has_inline_data(inode))
963
		return err;
964 965 966 967 968 969 970 971 972

	flag = F2FS_GET_BLOCK_PRE_AIO;

map_blocks:
	err = f2fs_map_blocks(inode, &map, 1, flag);
	if (map.m_len > 0 && err == -ENOSPC) {
		if (!direct_io)
			set_inode_flag(inode, FI_NO_PREALLOC);
		err = 0;
973
	}
974
	return err;
975 976
}

C
Chao Yu 已提交
977
void __do_map_lock(struct f2fs_sb_info *sbi, int flag, bool lock)
978 979 980 981 982 983 984 985 986 987 988 989 990 991
{
	if (flag == F2FS_GET_BLOCK_PRE_AIO) {
		if (lock)
			down_read(&sbi->node_change);
		else
			up_read(&sbi->node_change);
	} else {
		if (lock)
			f2fs_lock_op(sbi);
		else
			f2fs_unlock_op(sbi);
	}
}

J
Jaegeuk Kim 已提交
992
/*
J
Jaegeuk Kim 已提交
993 994
 * f2fs_map_blocks() now supported readahead/bmap/rw direct_IO with
 * f2fs_map_blocks structure.
C
Chao Yu 已提交
995 996 997 998 999
 * If original data blocks are allocated, then give them to blockdev.
 * Otherwise,
 *     a. preallocate requested block addresses
 *     b. do not use extent cache for better performance
 *     c. give the block addresses to blockdev
1000
 */
C
Chao Yu 已提交
1001
int f2fs_map_blocks(struct inode *inode, struct f2fs_map_blocks *map,
C
Chao Yu 已提交
1002
						int create, int flag)
1003
{
J
Jaegeuk Kim 已提交
1004
	unsigned int maxblocks = map->m_len;
1005
	struct dnode_of_data dn;
1006
	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
1007
	int mode = create ? ALLOC_NODE : LOOKUP_NODE;
1008
	pgoff_t pgofs, end_offset, end;
1009
	int err = 0, ofs = 1;
1010 1011
	unsigned int ofs_in_node, last_ofs_in_node;
	blkcnt_t prealloc;
1012
	struct extent_info ei = {0,0,0};
1013
	block_t blkaddr;
1014
	unsigned int start_pgofs;
1015

1016 1017 1018
	if (!maxblocks)
		return 0;

J
Jaegeuk Kim 已提交
1019 1020 1021 1022 1023
	map->m_len = 0;
	map->m_flags = 0;

	/* it only supports block size == page size */
	pgofs =	(pgoff_t)map->m_lblk;
1024
	end = pgofs + maxblocks;
1025

1026
	if (!create && f2fs_lookup_extent_cache(inode, pgofs, &ei)) {
J
Jaegeuk Kim 已提交
1027 1028 1029
		map->m_pblk = ei.blk + pgofs - ei.fofs;
		map->m_len = min((pgoff_t)maxblocks, ei.fofs + ei.len - pgofs);
		map->m_flags = F2FS_MAP_MAPPED;
1030 1031
		if (map->m_next_extent)
			*map->m_next_extent = pgofs + map->m_len;
1032
		goto out;
1033
	}
1034

C
Chao Yu 已提交
1035
next_dnode:
1036
	if (create)
1037
		__do_map_lock(sbi, flag, true);
1038 1039 1040

	/* When reading holes, we need its node page */
	set_new_dnode(&dn, inode, NULL, NULL, 0);
C
Chao Yu 已提交
1041
	err = f2fs_get_dnode_of_data(&dn, pgofs, mode);
1042
	if (err) {
C
Chao Yu 已提交
1043 1044
		if (flag == F2FS_GET_BLOCK_BMAP)
			map->m_pblk = 0;
1045
		if (err == -ENOENT) {
1046
			err = 0;
1047 1048
			if (map->m_next_pgofs)
				*map->m_next_pgofs =
C
Chao Yu 已提交
1049
					f2fs_get_next_page_offset(&dn, pgofs);
1050 1051
			if (map->m_next_extent)
				*map->m_next_extent =
C
Chao Yu 已提交
1052
					f2fs_get_next_page_offset(&dn, pgofs);
1053
		}
1054
		goto unlock_out;
1055
	}
C
Chao Yu 已提交
1056

1057
	start_pgofs = pgofs;
1058
	prealloc = 0;
1059
	last_ofs_in_node = ofs_in_node = dn.ofs_in_node;
1060
	end_offset = ADDRS_PER_PAGE(dn.node_page, inode);
C
Chao Yu 已提交
1061 1062

next_block:
1063
	blkaddr = datablock_addr(dn.inode, dn.node_page, dn.ofs_in_node);
C
Chao Yu 已提交
1064

1065 1066 1067 1068 1069 1070
	if (__is_valid_data_blkaddr(blkaddr) &&
		!f2fs_is_valid_blkaddr(sbi, blkaddr, DATA_GENERIC)) {
		err = -EFAULT;
		goto sync_out;
	}

1071 1072 1073 1074 1075 1076 1077 1078 1079
	if (is_valid_data_blkaddr(sbi, blkaddr)) {
		/* use out-place-update for driect IO under LFS mode */
		if (test_opt(sbi, LFS) && create &&
				flag == F2FS_GET_BLOCK_DIO) {
			err = __allocate_data_block(&dn, map->m_seg_type);
			if (!err)
				set_inode_flag(inode, FI_APPEND_WRITE);
		}
	} else {
C
Chao Yu 已提交
1080
		if (create) {
1081 1082
			if (unlikely(f2fs_cp_error(sbi))) {
				err = -EIO;
C
Chao Yu 已提交
1083
				goto sync_out;
1084
			}
1085
			if (flag == F2FS_GET_BLOCK_PRE_AIO) {
1086 1087 1088 1089
				if (blkaddr == NULL_ADDR) {
					prealloc++;
					last_ofs_in_node = dn.ofs_in_node;
				}
1090
			} else {
1091 1092
				WARN_ON(flag != F2FS_GET_BLOCK_PRE_DIO &&
					flag != F2FS_GET_BLOCK_DIO);
1093 1094
				err = __allocate_data_block(&dn,
							map->m_seg_type);
1095
				if (!err)
1096
					set_inode_flag(inode, FI_APPEND_WRITE);
1097
			}
C
Chao Yu 已提交
1098
			if (err)
C
Chao Yu 已提交
1099
				goto sync_out;
1100
			map->m_flags |= F2FS_MAP_NEW;
C
Chao Yu 已提交
1101
			blkaddr = dn.data_blkaddr;
C
Chao Yu 已提交
1102
		} else {
C
Chao Yu 已提交
1103 1104 1105 1106
			if (flag == F2FS_GET_BLOCK_BMAP) {
				map->m_pblk = 0;
				goto sync_out;
			}
1107 1108
			if (flag == F2FS_GET_BLOCK_PRECACHE)
				goto sync_out;
1109 1110 1111 1112
			if (flag == F2FS_GET_BLOCK_FIEMAP &&
						blkaddr == NULL_ADDR) {
				if (map->m_next_pgofs)
					*map->m_next_pgofs = pgofs + 1;
C
Chao Yu 已提交
1113
				goto sync_out;
1114
			}
1115 1116 1117 1118
			if (flag != F2FS_GET_BLOCK_FIEMAP) {
				/* for defragment case */
				if (map->m_next_pgofs)
					*map->m_next_pgofs = pgofs + 1;
C
Chao Yu 已提交
1119
				goto sync_out;
1120
			}
C
Chao Yu 已提交
1121 1122
		}
	}
1123

1124 1125 1126
	if (flag == F2FS_GET_BLOCK_PRE_AIO)
		goto skip;

C
Chao Yu 已提交
1127 1128 1129 1130 1131 1132 1133 1134 1135 1136
	if (map->m_len == 0) {
		/* preallocated unwritten block should be mapped for fiemap. */
		if (blkaddr == NEW_ADDR)
			map->m_flags |= F2FS_MAP_UNWRITTEN;
		map->m_flags |= F2FS_MAP_MAPPED;

		map->m_pblk = blkaddr;
		map->m_len = 1;
	} else if ((map->m_pblk != NEW_ADDR &&
			blkaddr == (map->m_pblk + ofs)) ||
1137
			(map->m_pblk == NEW_ADDR && blkaddr == NEW_ADDR) ||
1138
			flag == F2FS_GET_BLOCK_PRE_DIO) {
C
Chao Yu 已提交
1139 1140 1141 1142 1143
		ofs++;
		map->m_len++;
	} else {
		goto sync_out;
	}
1144

1145
skip:
1146 1147 1148
	dn.ofs_in_node++;
	pgofs++;

1149 1150 1151
	/* preallocate blocks in batch for one dnode page */
	if (flag == F2FS_GET_BLOCK_PRE_AIO &&
			(pgofs == end || dn.ofs_in_node == end_offset)) {
1152

1153
		dn.ofs_in_node = ofs_in_node;
C
Chao Yu 已提交
1154
		err = f2fs_reserve_new_blocks(&dn, prealloc);
1155 1156
		if (err)
			goto sync_out;
1157

1158 1159 1160 1161
		map->m_len += dn.ofs_in_node - ofs_in_node;
		if (prealloc && dn.ofs_in_node != last_ofs_in_node + 1) {
			err = -ENOSPC;
			goto sync_out;
1162
		}
1163 1164 1165 1166 1167 1168 1169 1170
		dn.ofs_in_node = end_offset;
	}

	if (pgofs >= end)
		goto sync_out;
	else if (dn.ofs_in_node < end_offset)
		goto next_block;

1171 1172 1173 1174 1175 1176 1177 1178 1179 1180
	if (flag == F2FS_GET_BLOCK_PRECACHE) {
		if (map->m_flags & F2FS_MAP_MAPPED) {
			unsigned int ofs = start_pgofs - map->m_lblk;

			f2fs_update_extent_cache_range(&dn,
				start_pgofs, map->m_pblk + ofs,
				map->m_len - ofs);
		}
	}

1181 1182 1183
	f2fs_put_dnode(&dn);

	if (create) {
1184
		__do_map_lock(sbi, flag, false);
1185
		f2fs_balance_fs(sbi, dn.node_changed);
1186
	}
1187
	goto next_dnode;
1188

1189
sync_out:
1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200
	if (flag == F2FS_GET_BLOCK_PRECACHE) {
		if (map->m_flags & F2FS_MAP_MAPPED) {
			unsigned int ofs = start_pgofs - map->m_lblk;

			f2fs_update_extent_cache_range(&dn,
				start_pgofs, map->m_pblk + ofs,
				map->m_len - ofs);
		}
		if (map->m_next_extent)
			*map->m_next_extent = pgofs + 1;
	}
1201
	f2fs_put_dnode(&dn);
1202
unlock_out:
1203
	if (create) {
1204
		__do_map_lock(sbi, flag, false);
1205
		f2fs_balance_fs(sbi, dn.node_changed);
1206
	}
1207
out:
J
Jaegeuk Kim 已提交
1208
	trace_f2fs_map_blocks(inode, map, err);
1209
	return err;
1210 1211
}

H
Hyunchul Lee 已提交
1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236
bool f2fs_overwrite_io(struct inode *inode, loff_t pos, size_t len)
{
	struct f2fs_map_blocks map;
	block_t last_lblk;
	int err;

	if (pos + len > i_size_read(inode))
		return false;

	map.m_lblk = F2FS_BYTES_TO_BLK(pos);
	map.m_next_pgofs = NULL;
	map.m_next_extent = NULL;
	map.m_seg_type = NO_CHECK_TYPE;
	last_lblk = F2FS_BLK_ALIGN(pos + len);

	while (map.m_lblk < last_lblk) {
		map.m_len = last_lblk - map.m_lblk;
		err = f2fs_map_blocks(inode, &map, 0, F2FS_GET_BLOCK_DEFAULT);
		if (err || map.m_len == 0)
			return false;
		map.m_lblk += map.m_len;
	}
	return true;
}

J
Jaegeuk Kim 已提交
1237
static int __get_data_block(struct inode *inode, sector_t iblock,
1238
			struct buffer_head *bh, int create, int flag,
1239
			pgoff_t *next_pgofs, int seg_type)
J
Jaegeuk Kim 已提交
1240 1241
{
	struct f2fs_map_blocks map;
1242
	int err;
J
Jaegeuk Kim 已提交
1243 1244 1245

	map.m_lblk = iblock;
	map.m_len = bh->b_size >> inode->i_blkbits;
1246
	map.m_next_pgofs = next_pgofs;
1247
	map.m_next_extent = NULL;
1248
	map.m_seg_type = seg_type;
J
Jaegeuk Kim 已提交
1249

1250 1251
	err = f2fs_map_blocks(inode, &map, create, flag);
	if (!err) {
J
Jaegeuk Kim 已提交
1252 1253
		map_bh(bh, inode->i_sb, map.m_pblk);
		bh->b_state = (bh->b_state & ~F2FS_MAP_FLAGS) | map.m_flags;
1254
		bh->b_size = (u64)map.m_len << inode->i_blkbits;
J
Jaegeuk Kim 已提交
1255
	}
1256
	return err;
J
Jaegeuk Kim 已提交
1257 1258
}

1259
static int get_data_block(struct inode *inode, sector_t iblock,
1260 1261
			struct buffer_head *bh_result, int create, int flag,
			pgoff_t *next_pgofs)
C
Chao Yu 已提交
1262
{
1263
	return __get_data_block(inode, iblock, bh_result, create,
1264 1265
							flag, next_pgofs,
							NO_CHECK_TYPE);
C
Chao Yu 已提交
1266 1267 1268
}

static int get_data_block_dio(struct inode *inode, sector_t iblock,
1269 1270
			struct buffer_head *bh_result, int create)
{
C
Chao Yu 已提交
1271
	return __get_data_block(inode, iblock, bh_result, create,
1272
						F2FS_GET_BLOCK_DIO, NULL,
C
Chao Yu 已提交
1273
						f2fs_rw_hint_to_seg_type(
1274
							inode->i_write_hint));
1275 1276
}

C
Chao Yu 已提交
1277
static int get_data_block_bmap(struct inode *inode, sector_t iblock,
1278 1279
			struct buffer_head *bh_result, int create)
{
1280
	/* Block number less than F2FS MAX BLOCKS */
C
Chao Yu 已提交
1281
	if (unlikely(iblock >= F2FS_I_SB(inode)->max_file_blocks))
1282 1283
		return -EFBIG;

C
Chao Yu 已提交
1284
	return __get_data_block(inode, iblock, bh_result, create,
1285 1286
						F2FS_GET_BLOCK_BMAP, NULL,
						NO_CHECK_TYPE);
1287 1288
}

1289 1290 1291 1292 1293 1294 1295 1296 1297 1298
static inline sector_t logical_to_blk(struct inode *inode, loff_t offset)
{
	return (offset >> inode->i_blkbits);
}

static inline loff_t blk_to_logical(struct inode *inode, sector_t blk)
{
	return (blk << inode->i_blkbits);
}

C
Chao Yu 已提交
1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317
static int f2fs_xattr_fiemap(struct inode *inode,
				struct fiemap_extent_info *fieinfo)
{
	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
	struct page *page;
	struct node_info ni;
	__u64 phys = 0, len;
	__u32 flags;
	nid_t xnid = F2FS_I(inode)->i_xattr_nid;
	int err = 0;

	if (f2fs_has_inline_xattr(inode)) {
		int offset;

		page = f2fs_grab_cache_page(NODE_MAPPING(sbi),
						inode->i_ino, false);
		if (!page)
			return -ENOMEM;

1318 1319 1320 1321 1322
		err = f2fs_get_node_info(sbi, inode->i_ino, &ni);
		if (err) {
			f2fs_put_page(page, 1);
			return err;
		}
C
Chao Yu 已提交
1323 1324 1325 1326

		phys = (__u64)blk_to_logical(inode, ni.blk_addr);
		offset = offsetof(struct f2fs_inode, i_addr) +
					sizeof(__le32) * (DEF_ADDRS_PER_INODE -
1327
					get_inline_xattr_addrs(inode));
C
Chao Yu 已提交
1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348

		phys += offset;
		len = inline_xattr_size(inode);

		f2fs_put_page(page, 1);

		flags = FIEMAP_EXTENT_DATA_INLINE | FIEMAP_EXTENT_NOT_ALIGNED;

		if (!xnid)
			flags |= FIEMAP_EXTENT_LAST;

		err = fiemap_fill_next_extent(fieinfo, 0, phys, len, flags);
		if (err || err == 1)
			return err;
	}

	if (xnid) {
		page = f2fs_grab_cache_page(NODE_MAPPING(sbi), xnid, false);
		if (!page)
			return -ENOMEM;

1349 1350 1351 1352 1353
		err = f2fs_get_node_info(sbi, xnid, &ni);
		if (err) {
			f2fs_put_page(page, 1);
			return err;
		}
C
Chao Yu 已提交
1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368

		phys = (__u64)blk_to_logical(inode, ni.blk_addr);
		len = inode->i_sb->s_blocksize;

		f2fs_put_page(page, 1);

		flags = FIEMAP_EXTENT_LAST;
	}

	if (phys)
		err = fiemap_fill_next_extent(fieinfo, 0, phys, len, flags);

	return (err < 0 ? err : 0);
}

J
Jaegeuk Kim 已提交
1369 1370 1371
int f2fs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
		u64 start, u64 len)
{
1372 1373
	struct buffer_head map_bh;
	sector_t start_blk, last_blk;
1374
	pgoff_t next_pgofs;
1375 1376 1377 1378
	u64 logical = 0, phys = 0, size = 0;
	u32 flags = 0;
	int ret = 0;

1379 1380 1381 1382 1383 1384
	if (fieinfo->fi_flags & FIEMAP_FLAG_CACHE) {
		ret = f2fs_precache_extents(inode);
		if (ret)
			return ret;
	}

C
Chao Yu 已提交
1385
	ret = fiemap_check_flags(fieinfo, FIEMAP_FLAG_SYNC | FIEMAP_FLAG_XATTR);
1386 1387 1388
	if (ret)
		return ret;

1389 1390
	inode_lock(inode);

C
Chao Yu 已提交
1391 1392 1393 1394 1395
	if (fieinfo->fi_flags & FIEMAP_FLAG_XATTR) {
		ret = f2fs_xattr_fiemap(inode, fieinfo);
		goto out;
	}

J
Jaegeuk Kim 已提交
1396 1397 1398
	if (f2fs_has_inline_data(inode)) {
		ret = f2fs_inline_data_fiemap(inode, fieinfo, start, len);
		if (ret != -EAGAIN)
1399
			goto out;
J
Jaegeuk Kim 已提交
1400 1401
	}

1402 1403 1404 1405 1406
	if (logical_to_blk(inode, len) == 0)
		len = blk_to_logical(inode, 1);

	start_blk = logical_to_blk(inode, start);
	last_blk = logical_to_blk(inode, start + len - 1);
1407

1408 1409 1410 1411
next:
	memset(&map_bh, 0, sizeof(struct buffer_head));
	map_bh.b_size = len;

C
Chao Yu 已提交
1412
	ret = get_data_block(inode, start_blk, &map_bh, 0,
1413
					F2FS_GET_BLOCK_FIEMAP, &next_pgofs);
1414 1415 1416 1417 1418
	if (ret)
		goto out;

	/* HOLE */
	if (!buffer_mapped(&map_bh)) {
1419
		start_blk = next_pgofs;
1420 1421 1422

		if (blk_to_logical(inode, start_blk) < blk_to_logical(inode,
					F2FS_I_SB(inode)->max_file_blocks))
1423
			goto prep_next;
1424

1425 1426
		flags |= FIEMAP_EXTENT_LAST;
	}
1427

1428 1429 1430 1431
	if (size) {
		if (f2fs_encrypted_inode(inode))
			flags |= FIEMAP_EXTENT_DATA_ENCRYPTED;

1432 1433
		ret = fiemap_fill_next_extent(fieinfo, logical,
				phys, size, flags);
1434
	}
1435

1436 1437
	if (start_blk > last_blk || ret)
		goto out;
1438

1439 1440 1441 1442 1443 1444
	logical = blk_to_logical(inode, start_blk);
	phys = blk_to_logical(inode, map_bh.b_blocknr);
	size = map_bh.b_size;
	flags = 0;
	if (buffer_unwritten(&map_bh))
		flags = FIEMAP_EXTENT_UNWRITTEN;
1445

1446
	start_blk += logical_to_blk(inode, size);
1447

1448
prep_next:
1449 1450 1451 1452 1453 1454 1455 1456 1457
	cond_resched();
	if (fatal_signal_pending(current))
		ret = -EINTR;
	else
		goto next;
out:
	if (ret == 1)
		ret = 0;

A
Al Viro 已提交
1458
	inode_unlock(inode);
1459
	return ret;
J
Jaegeuk Kim 已提交
1460 1461
}

J
Jaegeuk Kim 已提交
1462 1463 1464
/*
 * This function was originally taken from fs/mpage.c, and customized for f2fs.
 * Major change was from block_size == page_size in f2fs by default.
1465 1466 1467 1468 1469
 *
 * Note that the aops->readpages() function is ONLY used for read-ahead. If
 * this function ever deviates from doing just read-ahead, it should either
 * use ->readpage() or do the necessary surgery to decouple ->readpages()
 * from read-ahead.
J
Jaegeuk Kim 已提交
1470 1471 1472
 */
static int f2fs_mpage_readpages(struct address_space *mapping,
			struct list_head *pages, struct page *page,
1473
			unsigned nr_pages, bool is_readahead)
J
Jaegeuk Kim 已提交
1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489
{
	struct bio *bio = NULL;
	sector_t last_block_in_bio = 0;
	struct inode *inode = mapping->host;
	const unsigned blkbits = inode->i_blkbits;
	const unsigned blocksize = 1 << blkbits;
	sector_t block_in_file;
	sector_t last_block;
	sector_t last_block_in_file;
	sector_t block_nr;
	struct f2fs_map_blocks map;

	map.m_pblk = 0;
	map.m_lblk = 0;
	map.m_len = 0;
	map.m_flags = 0;
1490
	map.m_next_pgofs = NULL;
1491
	map.m_next_extent = NULL;
1492
	map.m_seg_type = NO_CHECK_TYPE;
J
Jaegeuk Kim 已提交
1493

L
LiFan 已提交
1494
	for (; nr_pages; nr_pages--) {
J
Jaegeuk Kim 已提交
1495
		if (pages) {
1496
			page = list_last_entry(pages, struct page, lru);
1497 1498

			prefetchw(&page->flags);
J
Jaegeuk Kim 已提交
1499 1500
			list_del(&page->lru);
			if (add_to_page_cache_lru(page, mapping,
1501 1502
						  page->index,
						  readahead_gfp_mask(mapping)))
J
Jaegeuk Kim 已提交
1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530
				goto next_page;
		}

		block_in_file = (sector_t)page->index;
		last_block = block_in_file + nr_pages;
		last_block_in_file = (i_size_read(inode) + blocksize - 1) >>
								blkbits;
		if (last_block > last_block_in_file)
			last_block = last_block_in_file;

		/*
		 * Map blocks using the previous result first.
		 */
		if ((map.m_flags & F2FS_MAP_MAPPED) &&
				block_in_file > map.m_lblk &&
				block_in_file < (map.m_lblk + map.m_len))
			goto got_it;

		/*
		 * Then do more f2fs_map_blocks() calls until we are
		 * done with this page.
		 */
		map.m_flags = 0;

		if (block_in_file < last_block) {
			map.m_lblk = block_in_file;
			map.m_len = last_block - block_in_file;

1531
			if (f2fs_map_blocks(inode, &map, 0,
1532
						F2FS_GET_BLOCK_DEFAULT))
J
Jaegeuk Kim 已提交
1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543
				goto set_error_page;
		}
got_it:
		if ((map.m_flags & F2FS_MAP_MAPPED)) {
			block_nr = map.m_pblk + block_in_file - map.m_lblk;
			SetPageMappedToDisk(page);

			if (!PageUptodate(page) && !cleancache_get_page(page)) {
				SetPageUptodate(page);
				goto confused;
			}
1544 1545 1546 1547

			if (!f2fs_is_valid_blkaddr(F2FS_I_SB(inode), block_nr,
								DATA_GENERIC))
				goto set_error_page;
J
Jaegeuk Kim 已提交
1548
		} else {
1549
			zero_user_segment(page, 0, PAGE_SIZE);
1550 1551
			if (!PageUptodate(page))
				SetPageUptodate(page);
J
Jaegeuk Kim 已提交
1552 1553 1554 1555 1556 1557 1558 1559
			unlock_page(page);
			goto next_page;
		}

		/*
		 * This page will go to BIO.  Do we need to send this
		 * BIO off first?
		 */
J
Jaegeuk Kim 已提交
1560 1561
		if (bio && (last_block_in_bio != block_nr - 1 ||
			!__same_bdev(F2FS_I_SB(inode), block_nr, bio))) {
J
Jaegeuk Kim 已提交
1562
submit_and_realloc:
1563
			__submit_bio(F2FS_I_SB(inode), bio, DATA);
J
Jaegeuk Kim 已提交
1564 1565 1566
			bio = NULL;
		}
		if (bio == NULL) {
1567 1568
			bio = f2fs_grab_read_bio(inode, block_nr, nr_pages,
					is_readahead ? REQ_RAHEAD : 0);
J
Jaegeuk Kim 已提交
1569 1570
			if (IS_ERR(bio)) {
				bio = NULL;
J
Jaegeuk Kim 已提交
1571
				goto set_error_page;
1572
			}
J
Jaegeuk Kim 已提交
1573 1574
		}

1575 1576 1577 1578 1579 1580
		/*
		 * If the page is under writeback, we need to wait for
		 * its completion to see the correct decrypted data.
		 */
		f2fs_wait_on_block_writeback(inode, block_nr);

J
Jaegeuk Kim 已提交
1581 1582 1583
		if (bio_add_page(bio, page, blocksize, 0) < blocksize)
			goto submit_and_realloc;

1584
		ClearPageError(page);
J
Jaegeuk Kim 已提交
1585 1586 1587 1588
		last_block_in_bio = block_nr;
		goto next_page;
set_error_page:
		SetPageError(page);
1589
		zero_user_segment(page, 0, PAGE_SIZE);
J
Jaegeuk Kim 已提交
1590 1591 1592 1593
		unlock_page(page);
		goto next_page;
confused:
		if (bio) {
1594
			__submit_bio(F2FS_I_SB(inode), bio, DATA);
J
Jaegeuk Kim 已提交
1595 1596 1597 1598 1599
			bio = NULL;
		}
		unlock_page(page);
next_page:
		if (pages)
1600
			put_page(page);
J
Jaegeuk Kim 已提交
1601 1602 1603
	}
	BUG_ON(pages && !list_empty(pages));
	if (bio)
1604
		__submit_bio(F2FS_I_SB(inode), bio, DATA);
J
Jaegeuk Kim 已提交
1605 1606 1607
	return 0;
}

1608 1609
static int f2fs_read_data_page(struct file *file, struct page *page)
{
H
Huajun Li 已提交
1610
	struct inode *inode = page->mapping->host;
1611
	int ret = -EAGAIN;
H
Huajun Li 已提交
1612

1613 1614
	trace_f2fs_readpage(page, DATA);

A
arter97 已提交
1615
	/* If the file has inline data, try to read it directly */
H
Huajun Li 已提交
1616 1617
	if (f2fs_has_inline_data(inode))
		ret = f2fs_read_inline_data(inode, page);
1618
	if (ret == -EAGAIN)
1619
		ret = f2fs_mpage_readpages(page->mapping, NULL, page, 1, false);
H
Huajun Li 已提交
1620
	return ret;
1621 1622 1623 1624 1625 1626
}

static int f2fs_read_data_pages(struct file *file,
			struct address_space *mapping,
			struct list_head *pages, unsigned nr_pages)
{
1627
	struct inode *inode = mapping->host;
1628
	struct page *page = list_last_entry(pages, struct page, lru);
1629 1630

	trace_f2fs_readpages(inode, page, nr_pages);
H
Huajun Li 已提交
1631 1632 1633 1634 1635

	/* If the file has inline data, skip readpages */
	if (f2fs_has_inline_data(inode))
		return 0;

1636
	return f2fs_mpage_readpages(mapping, pages, NULL, nr_pages, true);
1637 1638
}

1639 1640 1641
static int encrypt_one_page(struct f2fs_io_info *fio)
{
	struct inode *inode = fio->page->mapping->host;
1642
	struct page *mpage;
1643 1644
	gfp_t gfp_flags = GFP_NOFS;

1645
	if (!f2fs_encrypted_file(inode))
1646 1647
		return 0;

1648
	/* wait for GCed page writeback via META_MAPPING */
1649
	f2fs_wait_on_block_writeback(inode, fio->old_blkaddr);
1650 1651 1652 1653

retry_encrypt:
	fio->encrypted_page = fscrypt_encrypt_page(inode, fio->page,
			PAGE_SIZE, 0, fio->page->index, gfp_flags);
1654 1655 1656 1657 1658 1659 1660 1661 1662 1663
	if (IS_ERR(fio->encrypted_page)) {
		/* flush pending IOs and wait for a while in the ENOMEM case */
		if (PTR_ERR(fio->encrypted_page) == -ENOMEM) {
			f2fs_flush_merged_writes(fio->sbi);
			congestion_wait(BLK_RW_ASYNC, HZ/50);
			gfp_flags |= __GFP_NOFAIL;
			goto retry_encrypt;
		}
		return PTR_ERR(fio->encrypted_page);
	}
1664

1665 1666 1667 1668 1669 1670
	mpage = find_lock_page(META_MAPPING(fio->sbi), fio->old_blkaddr);
	if (mpage) {
		if (PageUptodate(mpage))
			memcpy(page_address(mpage),
				page_address(fio->encrypted_page), PAGE_SIZE);
		f2fs_put_page(mpage, 1);
1671
	}
1672
	return 0;
1673 1674
}

C
Chao Yu 已提交
1675 1676
static inline bool check_inplace_update_policy(struct inode *inode,
				struct f2fs_io_info *fio)
1677
{
C
Chao Yu 已提交
1678 1679
	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
	unsigned int policy = SM_I(sbi)->ipu_policy;
1680

C
Chao Yu 已提交
1681 1682
	if (policy & (0x1 << F2FS_IPU_FORCE))
		return true;
C
Chao Yu 已提交
1683
	if (policy & (0x1 << F2FS_IPU_SSR) && f2fs_need_SSR(sbi))
C
Chao Yu 已提交
1684 1685 1686 1687
		return true;
	if (policy & (0x1 << F2FS_IPU_UTIL) &&
			utilization(sbi) > SM_I(sbi)->min_ipu_util)
		return true;
C
Chao Yu 已提交
1688
	if (policy & (0x1 << F2FS_IPU_SSR_UTIL) && f2fs_need_SSR(sbi) &&
C
Chao Yu 已提交
1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708
			utilization(sbi) > SM_I(sbi)->min_ipu_util)
		return true;

	/*
	 * IPU for rewrite async pages
	 */
	if (policy & (0x1 << F2FS_IPU_ASYNC) &&
			fio && fio->op == REQ_OP_WRITE &&
			!(fio->op_flags & REQ_SYNC) &&
			!f2fs_encrypted_inode(inode))
		return true;

	/* this is only set during fdatasync */
	if (policy & (0x1 << F2FS_IPU_FSYNC) &&
			is_inode_flag_set(inode, FI_NEED_IPU))
		return true;

	return false;
}

C
Chao Yu 已提交
1709
bool f2fs_should_update_inplace(struct inode *inode, struct f2fs_io_info *fio)
C
Chao Yu 已提交
1710
{
1711 1712
	if (f2fs_is_pinned_file(inode))
		return true;
C
Chao Yu 已提交
1713 1714 1715 1716 1717 1718 1719 1720

	/* if this is cold file, we should overwrite to avoid fragmentation */
	if (file_is_cold(inode))
		return true;

	return check_inplace_update_policy(inode, fio);
}

C
Chao Yu 已提交
1721
bool f2fs_should_update_outplace(struct inode *inode, struct f2fs_io_info *fio)
C
Chao Yu 已提交
1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739
{
	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);

	if (test_opt(sbi, LFS))
		return true;
	if (S_ISDIR(inode->i_mode))
		return true;
	if (f2fs_is_atomic_file(inode))
		return true;
	if (fio) {
		if (is_cold_data(fio->page))
			return true;
		if (IS_ATOMIC_WRITTEN_PAGE(fio->page))
			return true;
	}
	return false;
}

1740 1741 1742 1743
static inline bool need_inplace_update(struct f2fs_io_info *fio)
{
	struct inode *inode = fio->page->mapping->host;

C
Chao Yu 已提交
1744
	if (f2fs_should_update_outplace(inode, fio))
1745 1746
		return false;

C
Chao Yu 已提交
1747
	return f2fs_should_update_inplace(inode, fio);
1748 1749
}

C
Chao Yu 已提交
1750
int f2fs_do_write_data_page(struct f2fs_io_info *fio)
1751
{
1752
	struct page *page = fio->page;
1753 1754
	struct inode *inode = page->mapping->host;
	struct dnode_of_data dn;
1755
	struct extent_info ei = {0,0,0};
1756
	struct node_info ni;
1757
	bool ipu_force = false;
1758 1759 1760
	int err = 0;

	set_new_dnode(&dn, inode, NULL, NULL, 0);
1761 1762 1763
	if (need_inplace_update(fio) &&
			f2fs_lookup_extent_cache(inode, page->index, &ei)) {
		fio->old_blkaddr = ei.blk + page->index - ei.fofs;
1764

1765 1766 1767 1768 1769 1770 1771
		if (!f2fs_is_valid_blkaddr(fio->sbi, fio->old_blkaddr,
							DATA_GENERIC))
			return -EFAULT;

		ipu_force = true;
		fio->need_lock = LOCK_DONE;
		goto got_it;
1772
	}
1773

1774 1775 1776
	/* Deadlock due to between page->lock and f2fs_lock_op */
	if (fio->need_lock == LOCK_REQ && !f2fs_trylock_op(fio->sbi))
		return -EAGAIN;
1777

C
Chao Yu 已提交
1778
	err = f2fs_get_dnode_of_data(&dn, page->index, LOOKUP_NODE);
1779
	if (err)
1780
		goto out;
1781

1782
	fio->old_blkaddr = dn.data_blkaddr;
1783 1784

	/* This page is already truncated */
1785
	if (fio->old_blkaddr == NULL_ADDR) {
1786
		ClearPageUptodate(page);
1787
		goto out_writepage;
1788
	}
1789
got_it:
1790 1791 1792 1793 1794 1795
	if (__is_valid_data_blkaddr(fio->old_blkaddr) &&
		!f2fs_is_valid_blkaddr(fio->sbi, fio->old_blkaddr,
							DATA_GENERIC)) {
		err = -EFAULT;
		goto out_writepage;
	}
1796 1797 1798 1799
	/*
	 * If current allocation needs SSR,
	 * it had better in-place writes for updated data.
	 */
1800
	if (ipu_force || (is_valid_data_blkaddr(fio->sbi, fio->old_blkaddr) &&
C
Chao Yu 已提交
1801
					need_inplace_update(fio))) {
1802 1803 1804 1805 1806
		err = encrypt_one_page(fio);
		if (err)
			goto out_writepage;

		set_page_writeback(page);
J
Jaegeuk Kim 已提交
1807
		ClearPageError(page);
1808
		f2fs_put_dnode(&dn);
1809
		if (fio->need_lock == LOCK_REQ)
1810
			f2fs_unlock_op(fio->sbi);
C
Chao Yu 已提交
1811
		err = f2fs_inplace_write_data(fio);
1812
		trace_f2fs_do_write_data_page(fio->page, IPU);
1813
		set_inode_flag(inode, FI_UPDATE_WRITE);
1814
		return err;
1815
	}
1816

1817 1818 1819 1820 1821 1822 1823 1824
	if (fio->need_lock == LOCK_RETRY) {
		if (!f2fs_trylock_op(fio->sbi)) {
			err = -EAGAIN;
			goto out_writepage;
		}
		fio->need_lock = LOCK_REQ;
	}

1825 1826 1827 1828 1829 1830
	err = f2fs_get_node_info(fio->sbi, dn.nid, &ni);
	if (err)
		goto out_writepage;

	fio->version = ni.version;

1831 1832 1833 1834 1835
	err = encrypt_one_page(fio);
	if (err)
		goto out_writepage;

	set_page_writeback(page);
J
Jaegeuk Kim 已提交
1836
	ClearPageError(page);
1837

1838
	/* LFS mode write path */
C
Chao Yu 已提交
1839
	f2fs_outplace_write_data(&dn, fio);
1840 1841 1842 1843
	trace_f2fs_do_write_data_page(page, OPU);
	set_inode_flag(inode, FI_APPEND_WRITE);
	if (page->index == 0)
		set_inode_flag(inode, FI_FIRST_BLOCK_WRITTEN);
1844 1845
out_writepage:
	f2fs_put_dnode(&dn);
1846
out:
1847
	if (fio->need_lock == LOCK_REQ)
1848
		f2fs_unlock_op(fio->sbi);
1849 1850 1851
	return err;
}

1852
static int __write_data_page(struct page *page, bool *submitted,
C
Chao Yu 已提交
1853 1854
				struct writeback_control *wbc,
				enum iostat_type io_type)
1855 1856
{
	struct inode *inode = page->mapping->host;
1857
	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
1858 1859
	loff_t i_size = i_size_read(inode);
	const pgoff_t end_index = ((unsigned long long) i_size)
1860
							>> PAGE_SHIFT;
1861
	loff_t psize = (page->index + 1) << PAGE_SHIFT;
H
Huajun Li 已提交
1862
	unsigned offset = 0;
1863
	bool need_balance_fs = false;
1864
	int err = 0;
J
Jaegeuk Kim 已提交
1865
	struct f2fs_io_info fio = {
1866
		.sbi = sbi,
C
Chao Yu 已提交
1867
		.ino = inode->i_ino,
J
Jaegeuk Kim 已提交
1868
		.type = DATA,
M
Mike Christie 已提交
1869
		.op = REQ_OP_WRITE,
J
Jens Axboe 已提交
1870
		.op_flags = wbc_to_write_flags(wbc),
1871
		.old_blkaddr = NULL_ADDR,
1872
		.page = page,
1873
		.encrypted_page = NULL,
1874
		.submitted = false,
1875
		.need_lock = LOCK_RETRY,
C
Chao Yu 已提交
1876
		.io_type = io_type,
1877
		.io_wbc = wbc,
J
Jaegeuk Kim 已提交
1878
	};
1879

1880 1881
	trace_f2fs_writepage(page, DATA);

1882 1883 1884
	/* we should bypass data pages to proceed the kworkder jobs */
	if (unlikely(f2fs_cp_error(sbi))) {
		mapping_set_error(page->mapping, -EIO);
1885 1886 1887 1888 1889 1890
		/*
		 * don't drop any dirty dentry pages for keeping lastest
		 * directory structure.
		 */
		if (S_ISDIR(inode->i_mode))
			goto redirty_out;
1891 1892 1893
		goto out;
	}

1894 1895 1896
	if (unlikely(is_sbi_flag_set(sbi, SBI_POR_DOING)))
		goto redirty_out;

1897
	if (page->index < end_index)
1898
		goto write;
1899 1900 1901 1902 1903

	/*
	 * If the offset is out-of-range of file size,
	 * this page does not have to be written to disk.
	 */
1904
	offset = i_size & (PAGE_SIZE - 1);
1905
	if ((page->index >= end_index + 1) || !offset)
1906
		goto out;
1907

1908
	zero_user_segment(page, offset, PAGE_SIZE);
1909
write:
1910 1911
	if (f2fs_is_drop_cache(inode))
		goto out;
1912 1913 1914
	/* we should not write 0'th page having journal header */
	if (f2fs_is_volatile_file(inode) && (!page->index ||
			(!wbc->for_reclaim &&
C
Chao Yu 已提交
1915
			f2fs_available_free_memory(sbi, BASE_CHECK))))
1916
		goto redirty_out;
1917

1918
	/* Dentry blocks are controlled by checkpoint */
1919
	if (S_ISDIR(inode->i_mode)) {
1920
		fio.need_lock = LOCK_DONE;
C
Chao Yu 已提交
1921
		err = f2fs_do_write_data_page(&fio);
1922 1923
		goto done;
	}
H
Huajun Li 已提交
1924

1925
	if (!wbc->for_reclaim)
1926
		need_balance_fs = true;
1927
	else if (has_not_enough_free_secs(sbi, 0, 0))
1928
		goto redirty_out;
1929 1930
	else
		set_inode_flag(inode, FI_HOT_DATA);
1931

1932
	err = -EAGAIN;
1933
	if (f2fs_has_inline_data(inode)) {
1934
		err = f2fs_write_inline_data(inode, page);
1935 1936 1937
		if (!err)
			goto out;
	}
1938

1939
	if (err == -EAGAIN) {
C
Chao Yu 已提交
1940
		err = f2fs_do_write_data_page(&fio);
1941 1942
		if (err == -EAGAIN) {
			fio.need_lock = LOCK_REQ;
C
Chao Yu 已提交
1943
			err = f2fs_do_write_data_page(&fio);
1944 1945
		}
	}
1946

1947 1948 1949 1950 1951 1952 1953 1954
	if (err) {
		file_set_keep_isize(inode);
	} else {
		down_write(&F2FS_I(inode)->i_sem);
		if (F2FS_I(inode)->last_disk_size < psize)
			F2FS_I(inode)->last_disk_size = psize;
		up_write(&F2FS_I(inode)->i_sem);
	}
1955

1956 1957 1958
done:
	if (err && err != -ENOENT)
		goto redirty_out;
1959

1960
out:
1961
	inode_dec_dirty_pages(inode);
1962 1963
	if (err)
		ClearPageUptodate(page);
1964 1965

	if (wbc->for_reclaim) {
1966
		f2fs_submit_merged_write_cond(sbi, NULL, page, 0, DATA);
1967
		clear_inode_flag(inode, FI_HOT_DATA);
C
Chao Yu 已提交
1968
		f2fs_remove_dirty_inode(inode);
1969
		submitted = NULL;
1970 1971
	}

1972
	unlock_page(page);
J
Jaegeuk Kim 已提交
1973 1974
	if (!S_ISDIR(inode->i_mode))
		f2fs_balance_fs(sbi, need_balance_fs);
1975

1976
	if (unlikely(f2fs_cp_error(sbi))) {
1977
		f2fs_submit_merged_write(sbi, DATA);
1978 1979 1980 1981 1982
		submitted = NULL;
	}

	if (submitted)
		*submitted = fio.submitted;
1983

1984 1985 1986
	return 0;

redirty_out:
1987
	redirty_page_for_writepage(wbc, page);
1988 1989 1990 1991 1992 1993 1994
	/*
	 * pageout() in MM traslates EAGAIN, so calls handle_write_error()
	 * -> mapping_set_error() -> set_bit(AS_EIO, ...).
	 * file_write_and_wait_range() will see EIO error, which is critical
	 * to return value of fsync() followed by atomic_write failure to user.
	 */
	if (!err || wbc->for_reclaim)
1995
		return AOP_WRITEPAGE_ACTIVATE;
J
Jaegeuk Kim 已提交
1996 1997
	unlock_page(page);
	return err;
1998 1999
}

2000 2001 2002
static int f2fs_write_data_page(struct page *page,
					struct writeback_control *wbc)
{
C
Chao Yu 已提交
2003
	return __write_data_page(page, NULL, wbc, FS_DATA_IO);
2004 2005
}

C
Chao Yu 已提交
2006 2007 2008 2009 2010 2011
/*
 * This function was copied from write_cche_pages from mm/page-writeback.c.
 * The major change is making write step of cold data page separately from
 * warm/hot data page.
 */
static int f2fs_write_cache_pages(struct address_space *mapping,
C
Chao Yu 已提交
2012 2013
					struct writeback_control *wbc,
					enum iostat_type io_type)
C
Chao Yu 已提交
2014 2015 2016 2017
{
	int ret = 0;
	int done = 0;
	struct pagevec pvec;
2018
	struct f2fs_sb_info *sbi = F2FS_M_SB(mapping);
C
Chao Yu 已提交
2019 2020 2021 2022 2023 2024 2025 2026
	int nr_pages;
	pgoff_t uninitialized_var(writeback_index);
	pgoff_t index;
	pgoff_t end;		/* Inclusive */
	pgoff_t done_index;
	int cycled;
	int range_whole = 0;
	int tag;
2027
	int nwritten = 0;
C
Chao Yu 已提交
2028

2029
	pagevec_init(&pvec);
2030

2031 2032 2033 2034 2035 2036
	if (get_dirty_pages(mapping->host) <=
				SM_I(F2FS_M_SB(mapping))->min_hot_blocks)
		set_inode_flag(mapping->host, FI_HOT_DATA);
	else
		clear_inode_flag(mapping->host, FI_HOT_DATA);

C
Chao Yu 已提交
2037 2038 2039 2040 2041 2042 2043 2044 2045
	if (wbc->range_cyclic) {
		writeback_index = mapping->writeback_index; /* prev offset */
		index = writeback_index;
		if (index == 0)
			cycled = 1;
		else
			cycled = 0;
		end = -1;
	} else {
2046 2047
		index = wbc->range_start >> PAGE_SHIFT;
		end = wbc->range_end >> PAGE_SHIFT;
C
Chao Yu 已提交
2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 2061 2062
		if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX)
			range_whole = 1;
		cycled = 1; /* ignore range_cyclic tests */
	}
	if (wbc->sync_mode == WB_SYNC_ALL || wbc->tagged_writepages)
		tag = PAGECACHE_TAG_TOWRITE;
	else
		tag = PAGECACHE_TAG_DIRTY;
retry:
	if (wbc->sync_mode == WB_SYNC_ALL || wbc->tagged_writepages)
		tag_pages_for_writeback(mapping, index, end);
	done_index = index;
	while (!done && (index <= end)) {
		int i;

J
Jan Kara 已提交
2063
		nr_pages = pagevec_lookup_range_tag(&pvec, mapping, &index, end,
2064
				tag);
C
Chao Yu 已提交
2065 2066 2067 2068 2069
		if (nr_pages == 0)
			break;

		for (i = 0; i < nr_pages; i++) {
			struct page *page = pvec.pages[i];
2070
			bool submitted = false;
C
Chao Yu 已提交
2071

2072
			/* give a priority to WB_SYNC threads */
2073
			if (atomic_read(&sbi->wb_sync_req[DATA]) &&
2074 2075 2076 2077 2078
					wbc->sync_mode == WB_SYNC_NONE) {
				done = 1;
				break;
			}

C
Chao Yu 已提交
2079
			done_index = page->index;
2080
retry_write:
C
Chao Yu 已提交
2081 2082 2083 2084 2085 2086 2087 2088 2089 2090 2091 2092 2093 2094 2095
			lock_page(page);

			if (unlikely(page->mapping != mapping)) {
continue_unlock:
				unlock_page(page);
				continue;
			}

			if (!PageDirty(page)) {
				/* someone wrote it for us */
				goto continue_unlock;
			}

			if (PageWriteback(page)) {
				if (wbc->sync_mode != WB_SYNC_NONE)
2096 2097
					f2fs_wait_on_page_writeback(page,
								DATA, true);
C
Chao Yu 已提交
2098 2099 2100 2101 2102 2103 2104 2105
				else
					goto continue_unlock;
			}

			BUG_ON(PageWriteback(page));
			if (!clear_page_dirty_for_io(page))
				goto continue_unlock;

C
Chao Yu 已提交
2106
			ret = __write_data_page(page, &submitted, wbc, io_type);
C
Chao Yu 已提交
2107
			if (unlikely(ret)) {
2108 2109 2110 2111 2112 2113 2114 2115
				/*
				 * keep nr_to_write, since vfs uses this to
				 * get # of written pages.
				 */
				if (ret == AOP_WRITEPAGE_ACTIVATE) {
					unlock_page(page);
					ret = 0;
					continue;
2116 2117 2118 2119 2120 2121 2122 2123 2124
				} else if (ret == -EAGAIN) {
					ret = 0;
					if (wbc->sync_mode == WB_SYNC_ALL) {
						cond_resched();
						congestion_wait(BLK_RW_ASYNC,
									HZ/50);
						goto retry_write;
					}
					continue;
2125
				}
J
Jaegeuk Kim 已提交
2126 2127 2128
				done_index = page->index + 1;
				done = 1;
				break;
2129
			} else if (submitted) {
2130
				nwritten++;
C
Chao Yu 已提交
2131 2132
			}

2133
			if (--wbc->nr_to_write <= 0 &&
2134
					wbc->sync_mode == WB_SYNC_NONE) {
C
Chao Yu 已提交
2135 2136 2137 2138 2139 2140 2141 2142 2143 2144 2145 2146 2147 2148 2149 2150 2151
				done = 1;
				break;
			}
		}
		pagevec_release(&pvec);
		cond_resched();
	}

	if (!cycled && !done) {
		cycled = 1;
		index = 0;
		end = writeback_index - 1;
		goto retry;
	}
	if (wbc->range_cyclic || (range_whole && wbc->nr_to_write > 0))
		mapping->writeback_index = done_index;

2152
	if (nwritten)
2153
		f2fs_submit_merged_write_cond(F2FS_M_SB(mapping), mapping->host,
2154
								NULL, 0, DATA);
C
Chao Yu 已提交
2155

C
Chao Yu 已提交
2156 2157 2158
	return ret;
}

2159 2160 2161 2162 2163 2164 2165 2166 2167 2168 2169 2170
static inline bool __should_serialize_io(struct inode *inode,
					struct writeback_control *wbc)
{
	if (!S_ISREG(inode->i_mode))
		return false;
	if (wbc->sync_mode != WB_SYNC_ALL)
		return true;
	if (get_dirty_pages(inode) >= SM_I(F2FS_I_SB(inode))->min_seq_blocks)
		return true;
	return false;
}

2171
static int __f2fs_write_data_pages(struct address_space *mapping,
C
Chao Yu 已提交
2172 2173
						struct writeback_control *wbc,
						enum iostat_type io_type)
2174 2175
{
	struct inode *inode = mapping->host;
2176
	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
2177
	struct blk_plug plug;
2178
	int ret;
2179
	bool locked = false;
2180

P
P J P 已提交
2181 2182 2183 2184
	/* deal with chardevs and other special file */
	if (!mapping->a_ops->writepage)
		return 0;

2185 2186 2187 2188
	/* skip writing if there is no dirty page in this inode */
	if (!get_dirty_pages(inode) && wbc->sync_mode == WB_SYNC_NONE)
		return 0;

2189 2190 2191 2192
	/* during POR, we don't need to trigger writepage at all. */
	if (unlikely(is_sbi_flag_set(sbi, SBI_POR_DOING)))
		goto skip_write;

2193 2194
	if (S_ISDIR(inode->i_mode) && wbc->sync_mode == WB_SYNC_NONE &&
			get_dirty_pages(inode) < nr_pages_to_skip(sbi, DATA) &&
C
Chao Yu 已提交
2195
			f2fs_available_free_memory(sbi, DIRTY_DENTS))
2196 2197
		goto skip_write;

C
Chao Yu 已提交
2198
	/* skip writing during file defragment */
2199
	if (is_inode_flag_set(inode, FI_DO_DEFRAG))
C
Chao Yu 已提交
2200 2201
		goto skip_write;

Y
Yunlei He 已提交
2202 2203
	trace_f2fs_writepages(mapping->host, wbc, DATA);

2204 2205
	/* to avoid spliting IOs due to mixed WB_SYNC_ALL and WB_SYNC_NONE */
	if (wbc->sync_mode == WB_SYNC_ALL)
2206 2207
		atomic_inc(&sbi->wb_sync_req[DATA]);
	else if (atomic_read(&sbi->wb_sync_req[DATA]))
2208 2209
		goto skip_write;

2210 2211 2212 2213 2214
	if (__should_serialize_io(inode, wbc)) {
		mutex_lock(&sbi->writepages);
		locked = true;
	}

2215
	blk_start_plug(&plug);
C
Chao Yu 已提交
2216
	ret = f2fs_write_cache_pages(mapping, wbc, io_type);
2217
	blk_finish_plug(&plug);
2218

2219 2220 2221
	if (locked)
		mutex_unlock(&sbi->writepages);

2222
	if (wbc->sync_mode == WB_SYNC_ALL)
2223
		atomic_dec(&sbi->wb_sync_req[DATA]);
2224 2225 2226 2227
	/*
	 * if some pages were truncated, we cannot guarantee its mapping->host
	 * to detect pending bios.
	 */
J
Jaegeuk Kim 已提交
2228

C
Chao Yu 已提交
2229
	f2fs_remove_dirty_inode(inode);
2230
	return ret;
2231 2232

skip_write:
2233
	wbc->pages_skipped += get_dirty_pages(inode);
Y
Yunlei He 已提交
2234
	trace_f2fs_writepages(mapping->host, wbc, DATA);
2235
	return 0;
2236 2237
}

C
Chao Yu 已提交
2238 2239 2240 2241 2242 2243 2244 2245 2246 2247
static int f2fs_write_data_pages(struct address_space *mapping,
			    struct writeback_control *wbc)
{
	struct inode *inode = mapping->host;

	return __f2fs_write_data_pages(mapping, wbc,
			F2FS_I(inode)->cp_task == current ?
			FS_CP_DATA_IO : FS_DATA_IO);
}

2248 2249 2250
static void f2fs_write_failed(struct address_space *mapping, loff_t to)
{
	struct inode *inode = mapping->host;
J
Jaegeuk Kim 已提交
2251
	loff_t i_size = i_size_read(inode);
2252

J
Jaegeuk Kim 已提交
2253
	if (to > i_size) {
2254
		down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
2255
		down_write(&F2FS_I(inode)->i_mmap_sem);
2256

J
Jaegeuk Kim 已提交
2257
		truncate_pagecache(inode, i_size);
C
Chao Yu 已提交
2258
		f2fs_truncate_blocks(inode, i_size, true);
2259

2260
		up_write(&F2FS_I(inode)->i_mmap_sem);
2261
		up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
2262 2263 2264
	}
}

2265 2266 2267 2268 2269 2270 2271 2272
static int prepare_write_begin(struct f2fs_sb_info *sbi,
			struct page *page, loff_t pos, unsigned len,
			block_t *blk_addr, bool *node_changed)
{
	struct inode *inode = page->mapping->host;
	pgoff_t index = page->index;
	struct dnode_of_data dn;
	struct page *ipage;
2273
	bool locked = false;
2274
	struct extent_info ei = {0,0,0};
2275 2276
	int err = 0;

2277 2278 2279 2280
	/*
	 * we already allocated all the blocks, so we don't need to get
	 * the block addresses when there is no need to fill the page.
	 */
2281 2282
	if (!f2fs_has_inline_data(inode) && len == PAGE_SIZE &&
			!is_inode_flag_set(inode, FI_NO_PREALLOC))
2283 2284
		return 0;

2285
	if (f2fs_has_inline_data(inode) ||
2286
			(pos & PAGE_MASK) >= i_size_read(inode)) {
2287
		__do_map_lock(sbi, F2FS_GET_BLOCK_PRE_AIO, true);
2288 2289 2290
		locked = true;
	}
restart:
2291
	/* check inline_data */
C
Chao Yu 已提交
2292
	ipage = f2fs_get_node_page(sbi, inode->i_ino);
2293 2294 2295 2296 2297 2298 2299 2300
	if (IS_ERR(ipage)) {
		err = PTR_ERR(ipage);
		goto unlock_out;
	}

	set_new_dnode(&dn, inode, ipage, ipage, 0);

	if (f2fs_has_inline_data(inode)) {
C
Chao Yu 已提交
2301
		if (pos + len <= MAX_INLINE_DATA(inode)) {
C
Chao Yu 已提交
2302
			f2fs_do_read_inline_data(page, ipage);
2303
			set_inode_flag(inode, FI_DATA_EXIST);
2304 2305
			if (inode->i_nlink)
				set_inline_node(ipage);
2306 2307 2308
		} else {
			err = f2fs_convert_inline_page(&dn, page);
			if (err)
2309 2310 2311 2312 2313 2314 2315 2316 2317 2318 2319
				goto out;
			if (dn.data_blkaddr == NULL_ADDR)
				err = f2fs_get_block(&dn, index);
		}
	} else if (locked) {
		err = f2fs_get_block(&dn, index);
	} else {
		if (f2fs_lookup_extent_cache(inode, index, &ei)) {
			dn.data_blkaddr = ei.blk + index - ei.fofs;
		} else {
			/* hole case */
C
Chao Yu 已提交
2320
			err = f2fs_get_dnode_of_data(&dn, index, LOOKUP_NODE);
2321
			if (err || dn.data_blkaddr == NULL_ADDR) {
2322
				f2fs_put_dnode(&dn);
2323 2324
				__do_map_lock(sbi, F2FS_GET_BLOCK_PRE_AIO,
								true);
2325 2326 2327
				locked = true;
				goto restart;
			}
2328 2329
		}
	}
2330

2331 2332 2333
	/* convert_inline_page can make node_changed */
	*blk_addr = dn.data_blkaddr;
	*node_changed = dn.node_changed;
2334
out:
2335 2336
	f2fs_put_dnode(&dn);
unlock_out:
2337
	if (locked)
2338
		__do_map_lock(sbi, F2FS_GET_BLOCK_PRE_AIO, false);
2339 2340 2341
	return err;
}

2342 2343 2344 2345 2346
static int f2fs_write_begin(struct file *file, struct address_space *mapping,
		loff_t pos, unsigned len, unsigned flags,
		struct page **pagep, void **fsdata)
{
	struct inode *inode = mapping->host;
2347
	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
2348
	struct page *page = NULL;
2349
	pgoff_t index = ((unsigned long long) pos) >> PAGE_SHIFT;
2350
	bool need_balance = false, drop_atomic = false;
2351
	block_t blkaddr = NULL_ADDR;
2352 2353
	int err = 0;

2354 2355
	trace_f2fs_write_begin(inode, pos, len, flags);

2356 2357 2358
	if ((f2fs_is_atomic_file(inode) &&
			!f2fs_available_free_memory(sbi, INMEM_PAGES)) ||
			is_inode_flag_set(inode, FI_ATOMIC_REVOKE_REQUEST)) {
J
Jaegeuk Kim 已提交
2359
		err = -ENOMEM;
2360
		drop_atomic = true;
J
Jaegeuk Kim 已提交
2361 2362 2363
		goto fail;
	}

2364 2365 2366 2367 2368 2369 2370 2371 2372 2373
	/*
	 * We should check this at this moment to avoid deadlock on inode page
	 * and #0 page. The locking rule for inline_data conversion should be:
	 * lock_page(page #0) -> lock_page(inode_page)
	 */
	if (index != 0) {
		err = f2fs_convert_inline_inode(inode);
		if (err)
			goto fail;
	}
2374
repeat:
2375 2376 2377 2378
	/*
	 * Do not use grab_cache_page_write_begin() to avoid deadlock due to
	 * wait_for_stable_page. Will wait that below with our IO control.
	 */
C
Chao Yu 已提交
2379
	page = f2fs_pagecache_get_page(mapping, index,
2380
				FGP_LOCK | FGP_WRITE | FGP_CREAT, GFP_NOFS);
2381 2382 2383 2384
	if (!page) {
		err = -ENOMEM;
		goto fail;
	}
2385

2386 2387
	*pagep = page;

2388 2389
	err = prepare_write_begin(sbi, page, pos, len,
					&blkaddr, &need_balance);
2390
	if (err)
2391
		goto fail;
2392

2393
	if (need_balance && has_not_enough_free_secs(sbi, 0, 0)) {
2394
		unlock_page(page);
J
Jaegeuk Kim 已提交
2395
		f2fs_balance_fs(sbi, true);
2396 2397 2398 2399 2400 2401 2402 2403
		lock_page(page);
		if (page->mapping != mapping) {
			/* The page got truncated from under us */
			f2fs_put_page(page, 1);
			goto repeat;
		}
	}

2404
	f2fs_wait_on_page_writeback(page, DATA, false);
2405

2406 2407
	if (len == PAGE_SIZE || PageUptodate(page))
		return 0;
2408

2409 2410 2411 2412 2413
	if (!(pos & (PAGE_SIZE - 1)) && (pos + len) >= i_size_read(inode)) {
		zero_user_segment(page, len, PAGE_SIZE);
		return 0;
	}

2414
	if (blkaddr == NEW_ADDR) {
2415
		zero_user_segment(page, 0, PAGE_SIZE);
2416
		SetPageUptodate(page);
2417
	} else {
2418 2419
		err = f2fs_submit_page_read(inode, page, blkaddr);
		if (err)
2420
			goto fail;
2421

2422
		lock_page(page);
2423
		if (unlikely(page->mapping != mapping)) {
2424 2425
			f2fs_put_page(page, 1);
			goto repeat;
2426
		}
2427 2428 2429
		if (unlikely(!PageUptodate(page))) {
			err = -EIO;
			goto fail;
2430
		}
2431 2432
	}
	return 0;
2433

2434
fail:
2435
	f2fs_put_page(page, 1);
2436
	f2fs_write_failed(mapping, pos + len);
2437
	if (drop_atomic)
C
Chao Yu 已提交
2438
		f2fs_drop_inmem_pages_all(sbi, false);
2439
	return err;
2440 2441
}

2442 2443 2444 2445 2446 2447 2448
static int f2fs_write_end(struct file *file,
			struct address_space *mapping,
			loff_t pos, unsigned len, unsigned copied,
			struct page *page, void *fsdata)
{
	struct inode *inode = page->mapping->host;

2449 2450
	trace_f2fs_write_end(inode, pos, len, copied);

2451 2452 2453 2454 2455 2456
	/*
	 * This should be come from len == PAGE_SIZE, and we expect copied
	 * should be PAGE_SIZE. Otherwise, we treat it with zero copied and
	 * let generic_perform_write() try to copy data again through copied=0.
	 */
	if (!PageUptodate(page)) {
2457
		if (unlikely(copied != len))
2458 2459 2460 2461 2462 2463 2464
			copied = 0;
		else
			SetPageUptodate(page);
	}
	if (!copied)
		goto unlock_out;

2465
	set_page_dirty(page);
2466

2467 2468
	if (pos + copied > i_size_read(inode))
		f2fs_i_size_write(inode, pos + copied);
2469
unlock_out:
2470
	f2fs_put_page(page, 1);
2471
	f2fs_update_time(F2FS_I_SB(inode), REQ_TIME);
2472 2473 2474
	return copied;
}

2475 2476
static int check_direct_IO(struct inode *inode, struct iov_iter *iter,
			   loff_t offset)
2477
{
2478 2479 2480 2481 2482 2483 2484 2485 2486 2487 2488 2489 2490 2491
	unsigned i_blkbits = READ_ONCE(inode->i_blkbits);
	unsigned blkbits = i_blkbits;
	unsigned blocksize_mask = (1 << blkbits) - 1;
	unsigned long align = offset | iov_iter_alignment(iter);
	struct block_device *bdev = inode->i_sb->s_bdev;

	if (align & blocksize_mask) {
		if (bdev)
			blkbits = blksize_bits(bdev_logical_block_size(bdev));
		blocksize_mask = (1 << blkbits) - 1;
		if (align & blocksize_mask)
			return -EINVAL;
		return 1;
	}
2492 2493 2494
	return 0;
}

2495
static ssize_t f2fs_direct_IO(struct kiocb *iocb, struct iov_iter *iter)
2496
{
2497
	struct address_space *mapping = iocb->ki_filp->f_mapping;
2498
	struct inode *inode = mapping->host;
2499
	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
2500
	struct f2fs_inode_info *fi = F2FS_I(inode);
2501
	size_t count = iov_iter_count(iter);
2502
	loff_t offset = iocb->ki_pos;
2503
	int rw = iov_iter_rw(iter);
2504
	int err;
2505
	enum rw_hint hint = iocb->ki_hint;
2506
	int whint_mode = F2FS_OPTION(sbi).whint_mode;
2507
	bool do_opu;
2508

2509
	err = check_direct_IO(inode, iter, offset);
2510
	if (err)
2511
		return err < 0 ? err : 0;
H
Huajun Li 已提交
2512

2513
	if (f2fs_force_buffered_io(inode, iocb, iter))
2514
		return 0;
2515

2516 2517
	do_opu = allow_outplace_dio(inode, iocb, iter);

2518
	trace_f2fs_direct_IO_enter(inode, offset, count, rw);
2519

2520 2521 2522
	if (rw == WRITE && whint_mode == WHINT_MODE_OFF)
		iocb->ki_hint = WRITE_LIFE_NOT_SET;

2523 2524 2525 2526 2527 2528 2529 2530
	if (iocb->ki_flags & IOCB_NOWAIT) {
		if (!down_read_trylock(&fi->i_gc_rwsem[rw])) {
			iocb->ki_hint = hint;
			err = -EAGAIN;
			goto out;
		}
		if (do_opu && !down_read_trylock(&fi->i_gc_rwsem[READ])) {
			up_read(&fi->i_gc_rwsem[rw]);
H
Hyunchul Lee 已提交
2531 2532 2533 2534
			iocb->ki_hint = hint;
			err = -EAGAIN;
			goto out;
		}
2535 2536 2537 2538
	} else {
		down_read(&fi->i_gc_rwsem[rw]);
		if (do_opu)
			down_read(&fi->i_gc_rwsem[READ]);
H
Hyunchul Lee 已提交
2539 2540
	}

2541
	err = blockdev_direct_IO(iocb, inode, iter, get_data_block_dio);
2542 2543 2544 2545 2546

	if (do_opu)
		up_read(&fi->i_gc_rwsem[READ]);

	up_read(&fi->i_gc_rwsem[rw]);
2547 2548

	if (rw == WRITE) {
2549 2550
		if (whint_mode == WHINT_MODE_OFF)
			iocb->ki_hint = hint;
C
Chao Yu 已提交
2551 2552 2553
		if (err > 0) {
			f2fs_update_iostat(F2FS_I_SB(inode), APP_DIRECT_IO,
									err);
2554 2555
			if (!do_opu)
				set_inode_flag(inode, FI_UPDATE_WRITE);
C
Chao Yu 已提交
2556
		} else if (err < 0) {
2557
			f2fs_write_failed(mapping, offset + count);
C
Chao Yu 已提交
2558
		}
2559
	}
2560

H
Hyunchul Lee 已提交
2561
out:
2562
	trace_f2fs_direct_IO_exit(inode, offset, count, rw, err);
2563

2564
	return err;
2565 2566
}

2567 2568
void f2fs_invalidate_page(struct page *page, unsigned int offset,
							unsigned int length)
2569 2570
{
	struct inode *inode = page->mapping->host;
2571
	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
2572

2573
	if (inode->i_ino >= F2FS_ROOT_INO(sbi) &&
2574
		(offset % PAGE_SIZE || length != PAGE_SIZE))
2575 2576
		return;

2577
	if (PageDirty(page)) {
2578
		if (inode->i_ino == F2FS_META_INO(sbi)) {
2579
			dec_page_count(sbi, F2FS_DIRTY_META);
2580
		} else if (inode->i_ino == F2FS_NODE_INO(sbi)) {
2581
			dec_page_count(sbi, F2FS_DIRTY_NODES);
2582
		} else {
2583
			inode_dec_dirty_pages(inode);
C
Chao Yu 已提交
2584
			f2fs_remove_dirty_inode(inode);
2585
		}
2586
	}
C
Chao Yu 已提交
2587 2588 2589

	/* This is atomic written page, keep Private */
	if (IS_ATOMIC_WRITTEN_PAGE(page))
C
Chao Yu 已提交
2590
		return f2fs_drop_inmem_page(inode, page);
C
Chao Yu 已提交
2591

2592
	set_page_private(page, 0);
2593 2594 2595
	ClearPagePrivate(page);
}

2596
int f2fs_release_page(struct page *page, gfp_t wait)
2597
{
2598 2599 2600 2601
	/* If this is dirty page, keep PagePrivate */
	if (PageDirty(page))
		return 0;

C
Chao Yu 已提交
2602 2603 2604 2605
	/* This is atomic written page, keep Private */
	if (IS_ATOMIC_WRITTEN_PAGE(page))
		return 0;

2606
	set_page_private(page, 0);
2607
	ClearPagePrivate(page);
2608
	return 1;
2609 2610 2611 2612 2613 2614 2615
}

static int f2fs_set_data_page_dirty(struct page *page)
{
	struct address_space *mapping = page->mapping;
	struct inode *inode = mapping->host;

2616 2617
	trace_f2fs_set_page_dirty(page, DATA);

2618 2619
	if (!PageUptodate(page))
		SetPageUptodate(page);
2620

2621 2622 2623 2624
	/* don't remain PG_checked flag which was set during GC */
	if (is_cold_data(page))
		clear_cold_data(page);

C
Chao Yu 已提交
2625
	if (f2fs_is_atomic_file(inode) && !f2fs_is_commit_atomic_write(inode)) {
C
Chao Yu 已提交
2626
		if (!IS_ATOMIC_WRITTEN_PAGE(page)) {
C
Chao Yu 已提交
2627
			f2fs_register_inmem_page(inode, page);
C
Chao Yu 已提交
2628 2629 2630 2631 2632 2633 2634
			return 1;
		}
		/*
		 * Previously, this page has been registered, we just
		 * return here.
		 */
		return 0;
2635 2636
	}

2637
	if (!PageDirty(page)) {
2638
		__set_page_dirty_nobuffers(page);
C
Chao Yu 已提交
2639
		f2fs_update_dirty_page(inode, page);
2640 2641 2642 2643 2644
		return 1;
	}
	return 0;
}

J
Jaegeuk Kim 已提交
2645 2646
static sector_t f2fs_bmap(struct address_space *mapping, sector_t block)
{
2647 2648
	struct inode *inode = mapping->host;

J
Jaegeuk Kim 已提交
2649 2650 2651 2652 2653 2654 2655
	if (f2fs_has_inline_data(inode))
		return 0;

	/* make sure allocating whole blocks */
	if (mapping_tagged(mapping, PAGECACHE_TAG_DIRTY))
		filemap_write_and_wait(mapping);

C
Chao Yu 已提交
2656
	return generic_block_bmap(mapping, block, get_data_block_bmap);
2657 2658
}

2659 2660 2661 2662 2663 2664 2665 2666 2667 2668 2669 2670 2671
#ifdef CONFIG_MIGRATION
#include <linux/migrate.h>

int f2fs_migrate_page(struct address_space *mapping,
		struct page *newpage, struct page *page, enum migrate_mode mode)
{
	int rc, extra_count;
	struct f2fs_inode_info *fi = F2FS_I(mapping->host);
	bool atomic_written = IS_ATOMIC_WRITTEN_PAGE(page);

	BUG_ON(PageWriteback(page));

	/* migrating an atomic written page is safe with the inmem_lock hold */
2672 2673 2674 2675 2676 2677
	if (atomic_written) {
		if (mode != MIGRATE_SYNC)
			return -EBUSY;
		if (!mutex_trylock(&fi->inmem_lock))
			return -EAGAIN;
	}
2678 2679 2680 2681 2682 2683 2684 2685 2686 2687 2688 2689 2690 2691 2692 2693 2694 2695 2696 2697 2698 2699 2700 2701 2702 2703 2704 2705 2706 2707 2708

	/*
	 * A reference is expected if PagePrivate set when move mapping,
	 * however F2FS breaks this for maintaining dirty page counts when
	 * truncating pages. So here adjusting the 'extra_count' make it work.
	 */
	extra_count = (atomic_written ? 1 : 0) - page_has_private(page);
	rc = migrate_page_move_mapping(mapping, newpage,
				page, NULL, mode, extra_count);
	if (rc != MIGRATEPAGE_SUCCESS) {
		if (atomic_written)
			mutex_unlock(&fi->inmem_lock);
		return rc;
	}

	if (atomic_written) {
		struct inmem_pages *cur;
		list_for_each_entry(cur, &fi->inmem_pages, list)
			if (cur->page == page) {
				cur->page = newpage;
				break;
			}
		mutex_unlock(&fi->inmem_lock);
		put_page(page);
		get_page(newpage);
	}

	if (PagePrivate(page))
		SetPagePrivate(newpage);
	set_page_private(newpage, page_private(page));

2709 2710 2711 2712
	if (mode != MIGRATE_SYNC_NO_COPY)
		migrate_page_copy(newpage, page);
	else
		migrate_page_states(newpage, page);
2713 2714 2715 2716 2717

	return MIGRATEPAGE_SUCCESS;
}
#endif

2718 2719 2720 2721 2722 2723
const struct address_space_operations f2fs_dblock_aops = {
	.readpage	= f2fs_read_data_page,
	.readpages	= f2fs_read_data_pages,
	.writepage	= f2fs_write_data_page,
	.writepages	= f2fs_write_data_pages,
	.write_begin	= f2fs_write_begin,
2724
	.write_end	= f2fs_write_end,
2725
	.set_page_dirty	= f2fs_set_data_page_dirty,
2726 2727
	.invalidatepage	= f2fs_invalidate_page,
	.releasepage	= f2fs_release_page,
2728
	.direct_IO	= f2fs_direct_IO,
J
Jaegeuk Kim 已提交
2729
	.bmap		= f2fs_bmap,
2730 2731 2732
#ifdef CONFIG_MIGRATION
	.migratepage    = f2fs_migrate_page,
#endif
2733
};
2734

C
Chao Yu 已提交
2735
void f2fs_clear_radix_tree_dirty_tag(struct page *page)
2736 2737 2738 2739 2740 2741 2742 2743 2744 2745
{
	struct address_space *mapping = page_mapping(page);
	unsigned long flags;

	xa_lock_irqsave(&mapping->i_pages, flags);
	radix_tree_tag_clear(&mapping->i_pages, page_index(page),
						PAGECACHE_TAG_DIRTY);
	xa_unlock_irqrestore(&mapping->i_pages, flags);
}

2746 2747 2748 2749 2750 2751 2752 2753 2754 2755 2756 2757 2758 2759 2760 2761 2762 2763 2764 2765 2766 2767 2768
int __init f2fs_init_post_read_processing(void)
{
	bio_post_read_ctx_cache = KMEM_CACHE(bio_post_read_ctx, 0);
	if (!bio_post_read_ctx_cache)
		goto fail;
	bio_post_read_ctx_pool =
		mempool_create_slab_pool(NUM_PREALLOC_POST_READ_CTXS,
					 bio_post_read_ctx_cache);
	if (!bio_post_read_ctx_pool)
		goto fail_free_cache;
	return 0;

fail_free_cache:
	kmem_cache_destroy(bio_post_read_ctx_cache);
fail:
	return -ENOMEM;
}

void __exit f2fs_destroy_post_read_processing(void)
{
	mempool_destroy(bio_post_read_ctx_pool);
	kmem_cache_destroy(bio_post_read_ctx_cache);
}