data.c 42.1 KB
Newer Older
J
Jaegeuk Kim 已提交
1
/*
2 3 4 5 6 7 8 9 10 11 12 13 14 15 16
 * fs/f2fs/data.c
 *
 * Copyright (c) 2012 Samsung Electronics Co., Ltd.
 *             http://www.samsung.com/
 *
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License version 2 as
 * published by the Free Software Foundation.
 */
#include <linux/fs.h>
#include <linux/f2fs_fs.h>
#include <linux/buffer_head.h>
#include <linux/mpage.h>
#include <linux/writeback.h>
#include <linux/backing-dev.h>
C
Chao Yu 已提交
17
#include <linux/pagevec.h>
18 19
#include <linux/blkdev.h>
#include <linux/bio.h>
20
#include <linux/prefetch.h>
21
#include <linux/uio.h>
J
Jaegeuk Kim 已提交
22
#include <linux/cleancache.h>
23 24 25 26

#include "f2fs.h"
#include "node.h"
#include "segment.h"
J
Jaegeuk Kim 已提交
27
#include "trace.h"
28
#include <trace/events/f2fs.h>
29

30
static void f2fs_read_end_io(struct bio *bio)
31
{
32 33
	struct bio_vec *bvec;
	int i;
34

35
	if (f2fs_bio_encrypted(bio)) {
36
		if (bio->bi_error) {
37
			fscrypt_release_ctx(bio->bi_private);
38
		} else {
39
			fscrypt_decrypt_bio_pages(bio->bi_private, bio);
40 41 42 43
			return;
		}
	}

44 45
	bio_for_each_segment_all(bvec, bio, i) {
		struct page *page = bvec->bv_page;
J
Jaegeuk Kim 已提交
46

47
		if (!bio->bi_error) {
J
Jaegeuk Kim 已提交
48 49 50 51 52 53 54 55 56 57
			SetPageUptodate(page);
		} else {
			ClearPageUptodate(page);
			SetPageError(page);
		}
		unlock_page(page);
	}
	bio_put(bio);
}

58
static void f2fs_write_end_io(struct bio *bio)
59
{
60
	struct f2fs_sb_info *sbi = bio->bi_private;
61 62
	struct bio_vec *bvec;
	int i;
63

64
	bio_for_each_segment_all(bvec, bio, i) {
65 66
		struct page *page = bvec->bv_page;

67
		fscrypt_pullback_bio_page(&page, true);
68

69
		if (unlikely(bio->bi_error)) {
70
			set_bit(AS_EIO, &page->mapping->flags);
71
			f2fs_stop_checkpoint(sbi);
72 73 74
		}
		end_page_writeback(page);
		dec_page_count(sbi, F2FS_WRITEBACK);
75
	}
76

77
	if (!get_pages(sbi, F2FS_WRITEBACK) && wq_has_sleeper(&sbi->cp_wait))
78 79 80 81 82
		wake_up(&sbi->cp_wait);

	bio_put(bio);
}

83 84 85 86 87 88 89 90
/*
 * Low-level block read/write IO operations.
 */
static struct bio *__bio_alloc(struct f2fs_sb_info *sbi, block_t blk_addr,
				int npages, bool is_read)
{
	struct bio *bio;

J
Jaegeuk Kim 已提交
91
	bio = f2fs_bio_alloc(npages);
92 93

	bio->bi_bdev = sbi->sb->s_bdev;
C
Chao Yu 已提交
94
	bio->bi_iter.bi_sector = SECTOR_FROM_BLOCK(blk_addr);
95
	bio->bi_end_io = is_read ? f2fs_read_end_io : f2fs_write_end_io;
96
	bio->bi_private = is_read ? NULL : sbi;
97 98 99 100

	return bio;
}

J
Jaegeuk Kim 已提交
101
static void __submit_merged_bio(struct f2fs_bio_info *io)
102
{
J
Jaegeuk Kim 已提交
103
	struct f2fs_io_info *fio = &io->fio;
104 105 106 107

	if (!io->bio)
		return;

108
	if (is_read_io(fio->rw))
109
		trace_f2fs_submit_read_bio(io->sbi->sb, fio, io->bio);
110
	else
111
		trace_f2fs_submit_write_bio(io->sbi->sb, fio, io->bio);
112

113
	submit_bio(fio->rw, io->bio);
114 115 116
	io->bio = NULL;
}

117 118
static bool __has_merged_page(struct f2fs_bio_info *io, struct inode *inode,
						struct page *page, nid_t ino)
C
Chao Yu 已提交
119 120 121 122 123
{
	struct bio_vec *bvec;
	struct page *target;
	int i;

124
	if (!io->bio)
C
Chao Yu 已提交
125
		return false;
126 127 128

	if (!inode && !page && !ino)
		return true;
C
Chao Yu 已提交
129 130 131

	bio_for_each_segment_all(bvec, io->bio, i) {

132
		if (bvec->bv_page->mapping)
C
Chao Yu 已提交
133
			target = bvec->bv_page;
134 135
		else
			target = fscrypt_control_page(bvec->bv_page);
C
Chao Yu 已提交
136

137 138 139 140 141
		if (inode && inode == target->mapping->host)
			return true;
		if (page && page == target)
			return true;
		if (ino && ino == ino_of_node(target))
C
Chao Yu 已提交
142 143 144 145 146 147
			return true;
	}

	return false;
}

148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164
static bool has_merged_page(struct f2fs_sb_info *sbi, struct inode *inode,
						struct page *page, nid_t ino,
						enum page_type type)
{
	enum page_type btype = PAGE_TYPE_OF_BIO(type);
	struct f2fs_bio_info *io = &sbi->write_io[btype];
	bool ret;

	down_read(&io->io_rwsem);
	ret = __has_merged_page(io, inode, page, ino);
	up_read(&io->io_rwsem);
	return ret;
}

static void __f2fs_submit_merged_bio(struct f2fs_sb_info *sbi,
				struct inode *inode, struct page *page,
				nid_t ino, enum page_type type, int rw)
165 166 167 168 169 170
{
	enum page_type btype = PAGE_TYPE_OF_BIO(type);
	struct f2fs_bio_info *io;

	io = is_read_io(rw) ? &sbi->read_io : &sbi->write_io[btype];

171
	down_write(&io->io_rwsem);
J
Jaegeuk Kim 已提交
172

173 174 175
	if (!__has_merged_page(io, inode, page, ino))
		goto out;

J
Jaegeuk Kim 已提交
176 177 178
	/* change META to META_FLUSH in the checkpoint procedure */
	if (type >= META_FLUSH) {
		io->fio.type = META_FLUSH;
J
Jaegeuk Kim 已提交
179 180 181 182
		if (test_opt(sbi, NOBARRIER))
			io->fio.rw = WRITE_FLUSH | REQ_META | REQ_PRIO;
		else
			io->fio.rw = WRITE_FLUSH_FUA | REQ_META | REQ_PRIO;
J
Jaegeuk Kim 已提交
183 184
	}
	__submit_merged_bio(io);
185
out:
186
	up_write(&io->io_rwsem);
187 188
}

189 190 191 192 193 194 195 196 197 198 199 200 201 202
void f2fs_submit_merged_bio(struct f2fs_sb_info *sbi, enum page_type type,
									int rw)
{
	__f2fs_submit_merged_bio(sbi, NULL, NULL, 0, type, rw);
}

void f2fs_submit_merged_bio_cond(struct f2fs_sb_info *sbi,
				struct inode *inode, struct page *page,
				nid_t ino, enum page_type type, int rw)
{
	if (has_merged_page(sbi, inode, page, ino, type))
		__f2fs_submit_merged_bio(sbi, inode, page, ino, type, rw);
}

203 204 205 206 207 208 209
void f2fs_flush_merged_bios(struct f2fs_sb_info *sbi)
{
	f2fs_submit_merged_bio(sbi, DATA, WRITE);
	f2fs_submit_merged_bio(sbi, NODE, WRITE);
	f2fs_submit_merged_bio(sbi, META, WRITE);
}

210 211 212 213
/*
 * Fill the locked page with data located in the block address.
 * Return unlocked page.
 */
214
int f2fs_submit_page_bio(struct f2fs_io_info *fio)
215 216
{
	struct bio *bio;
217 218
	struct page *page = fio->encrypted_page ?
			fio->encrypted_page : fio->page;
219

220
	trace_f2fs_submit_page_bio(page, fio);
221
	f2fs_trace_ios(fio, 0);
222 223

	/* Allocate a new bio */
224
	bio = __bio_alloc(fio->sbi, fio->new_blkaddr, 1, is_read_io(fio->rw));
225

226
	if (bio_add_page(bio, page, PAGE_SIZE, 0) < PAGE_SIZE) {
227 228 229 230
		bio_put(bio);
		return -EFAULT;
	}

231
	submit_bio(fio->rw, bio);
232 233 234
	return 0;
}

235
void f2fs_submit_page_mbio(struct f2fs_io_info *fio)
236
{
237
	struct f2fs_sb_info *sbi = fio->sbi;
J
Jaegeuk Kim 已提交
238
	enum page_type btype = PAGE_TYPE_OF_BIO(fio->type);
239
	struct f2fs_bio_info *io;
240
	bool is_read = is_read_io(fio->rw);
241
	struct page *bio_page;
242

243
	io = is_read ? &sbi->read_io : &sbi->write_io[btype];
244

245 246 247
	if (fio->old_blkaddr != NEW_ADDR)
		verify_block_addr(sbi, fio->old_blkaddr);
	verify_block_addr(sbi, fio->new_blkaddr);
248

249
	down_write(&io->io_rwsem);
250

251
	if (!is_read)
252 253
		inc_page_count(sbi, F2FS_WRITEBACK);

254
	if (io->bio && (io->last_block_in_bio != fio->new_blkaddr - 1 ||
J
Jaegeuk Kim 已提交
255 256
						io->fio.rw != fio->rw))
		__submit_merged_bio(io);
257 258
alloc_new:
	if (io->bio == NULL) {
J
Jaegeuk Kim 已提交
259
		int bio_blocks = MAX_BIO_BLOCKS(sbi);
260

261 262
		io->bio = __bio_alloc(sbi, fio->new_blkaddr,
						bio_blocks, is_read);
J
Jaegeuk Kim 已提交
263
		io->fio = *fio;
264 265
	}

266 267
	bio_page = fio->encrypted_page ? fio->encrypted_page : fio->page;

268 269
	if (bio_add_page(io->bio, bio_page, PAGE_SIZE, 0) <
							PAGE_SIZE) {
J
Jaegeuk Kim 已提交
270
		__submit_merged_bio(io);
271 272 273
		goto alloc_new;
	}

274
	io->last_block_in_bio = fio->new_blkaddr;
275
	f2fs_trace_ios(fio, 0);
276

277
	up_write(&io->io_rwsem);
278
	trace_f2fs_submit_page_mbio(fio->page, fio);
279 280
}

J
Jaegeuk Kim 已提交
281
/*
282 283 284 285 286
 * Lock ordering for the change of data block address:
 * ->data_page
 *  ->node_page
 *    update block addresses in the node page
 */
287
void set_data_blkaddr(struct dnode_of_data *dn)
288 289 290 291 292 293
{
	struct f2fs_node *rn;
	__le32 *addr_array;
	struct page *node_page = dn->node_page;
	unsigned int ofs_in_node = dn->ofs_in_node;

294
	f2fs_wait_on_page_writeback(node_page, NODE, true);
295

296
	rn = F2FS_NODE(node_page);
297 298 299

	/* Get physical address of data block */
	addr_array = blkaddr_in_node(rn);
J
Jaegeuk Kim 已提交
300
	addr_array[ofs_in_node] = cpu_to_le32(dn->data_blkaddr);
301 302
	if (set_page_dirty(node_page))
		dn->node_changed = true;
303 304
}

305 306 307 308 309 310 311
void f2fs_update_data_blkaddr(struct dnode_of_data *dn, block_t blkaddr)
{
	dn->data_blkaddr = blkaddr;
	set_data_blkaddr(dn);
	f2fs_update_extent_cache(dn);
}

312 313
int reserve_new_block(struct dnode_of_data *dn)
{
314
	struct f2fs_sb_info *sbi = F2FS_I_SB(dn->inode);
315

316
	if (unlikely(is_inode_flag_set(F2FS_I(dn->inode), FI_NO_ALLOC)))
317
		return -EPERM;
318
	if (unlikely(!inc_valid_block_count(sbi, dn->inode, 1)))
319 320
		return -ENOSPC;

321 322
	trace_f2fs_reserve_new_block(dn->inode, dn->nid, dn->ofs_in_node);

323
	dn->data_blkaddr = NEW_ADDR;
324
	set_data_blkaddr(dn);
325
	mark_inode_dirty(dn->inode);
326 327 328 329
	sync_inode_page(dn);
	return 0;
}

330 331 332 333 334 335 336 337
int f2fs_reserve_block(struct dnode_of_data *dn, pgoff_t index)
{
	bool need_put = dn->inode_page ? false : true;
	int err;

	err = get_dnode_of_data(dn, index, ALLOC_NODE);
	if (err)
		return err;
338

339 340
	if (dn->data_blkaddr == NULL_ADDR)
		err = reserve_new_block(dn);
341
	if (err || need_put)
342 343 344 345
		f2fs_put_dnode(dn);
	return err;
}

346
int f2fs_get_block(struct dnode_of_data *dn, pgoff_t index)
347
{
348
	struct extent_info ei;
349
	struct inode *inode = dn->inode;
350

351 352 353
	if (f2fs_lookup_extent_cache(inode, index, &ei)) {
		dn->data_blkaddr = ei.blk + index - ei.fofs;
		return 0;
354
	}
355

356
	return f2fs_reserve_block(dn, index);
357 358
}

359 360
struct page *get_read_data_page(struct inode *inode, pgoff_t index,
						int rw, bool for_write)
361 362 363 364
{
	struct address_space *mapping = inode->i_mapping;
	struct dnode_of_data dn;
	struct page *page;
C
Chao Yu 已提交
365
	struct extent_info ei;
366
	int err;
367
	struct f2fs_io_info fio = {
368
		.sbi = F2FS_I_SB(inode),
369
		.type = DATA,
370
		.rw = rw,
371
		.encrypted_page = NULL,
372
	};
373

374 375 376
	if (f2fs_encrypted_inode(inode) && S_ISREG(inode->i_mode))
		return read_mapping_page(mapping, index, NULL);

377
	page = f2fs_grab_cache_page(mapping, index, for_write);
378 379 380
	if (!page)
		return ERR_PTR(-ENOMEM);

C
Chao Yu 已提交
381 382 383 384 385
	if (f2fs_lookup_extent_cache(inode, index, &ei)) {
		dn.data_blkaddr = ei.blk + index - ei.fofs;
		goto got_it;
	}

386
	set_new_dnode(&dn, inode, NULL, NULL, 0);
387
	err = get_dnode_of_data(&dn, index, LOOKUP_NODE);
388 389
	if (err)
		goto put_err;
390 391
	f2fs_put_dnode(&dn);

392
	if (unlikely(dn.data_blkaddr == NULL_ADDR)) {
393 394
		err = -ENOENT;
		goto put_err;
395
	}
C
Chao Yu 已提交
396
got_it:
397 398
	if (PageUptodate(page)) {
		unlock_page(page);
399
		return page;
400
	}
401

J
Jaegeuk Kim 已提交
402 403 404 405 406 407 408
	/*
	 * A new dentry page is allocated but not able to be written, since its
	 * new inode page couldn't be allocated due to -ENOSPC.
	 * In such the case, its blkaddr can be remained as NEW_ADDR.
	 * see, f2fs_add_link -> get_new_data_page -> init_inode_metadata.
	 */
	if (dn.data_blkaddr == NEW_ADDR) {
409
		zero_user_segment(page, 0, PAGE_SIZE);
J
Jaegeuk Kim 已提交
410
		SetPageUptodate(page);
411
		unlock_page(page);
J
Jaegeuk Kim 已提交
412 413
		return page;
	}
414

415
	fio.new_blkaddr = fio.old_blkaddr = dn.data_blkaddr;
416 417
	fio.page = page;
	err = f2fs_submit_page_bio(&fio);
418
	if (err)
419
		goto put_err;
420
	return page;
421 422 423 424

put_err:
	f2fs_put_page(page, 1);
	return ERR_PTR(err);
425 426 427 428 429 430 431 432 433 434 435 436
}

struct page *find_data_page(struct inode *inode, pgoff_t index)
{
	struct address_space *mapping = inode->i_mapping;
	struct page *page;

	page = find_get_page(mapping, index);
	if (page && PageUptodate(page))
		return page;
	f2fs_put_page(page, 0);

437
	page = get_read_data_page(inode, index, READ_SYNC, false);
438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456
	if (IS_ERR(page))
		return page;

	if (PageUptodate(page))
		return page;

	wait_on_page_locked(page);
	if (unlikely(!PageUptodate(page))) {
		f2fs_put_page(page, 0);
		return ERR_PTR(-EIO);
	}
	return page;
}

/*
 * If it tries to access a hole, return an error.
 * Because, the callers, functions in dir.c and GC, should be able to know
 * whether this page exists or not.
 */
457 458
struct page *get_lock_data_page(struct inode *inode, pgoff_t index,
							bool for_write)
459 460 461 462
{
	struct address_space *mapping = inode->i_mapping;
	struct page *page;
repeat:
463
	page = get_read_data_page(inode, index, READ_SYNC, for_write);
464 465
	if (IS_ERR(page))
		return page;
466

467
	/* wait for read completion */
468
	lock_page(page);
469
	if (unlikely(!PageUptodate(page))) {
470 471
		f2fs_put_page(page, 1);
		return ERR_PTR(-EIO);
472
	}
473
	if (unlikely(page->mapping != mapping)) {
474 475
		f2fs_put_page(page, 1);
		goto repeat;
476 477 478 479
	}
	return page;
}

J
Jaegeuk Kim 已提交
480
/*
481 482
 * Caller ensures that this data page is never allocated.
 * A new zero-filled data page is allocated in the page cache.
483
 *
C
Chao Yu 已提交
484 485
 * Also, caller should grab and release a rwsem by calling f2fs_lock_op() and
 * f2fs_unlock_op().
486 487
 * Note that, ipage is set only by make_empty_dir, and if any error occur,
 * ipage should be released by this function.
488
 */
489
struct page *get_new_data_page(struct inode *inode,
490
		struct page *ipage, pgoff_t index, bool new_i_size)
491 492 493 494 495
{
	struct address_space *mapping = inode->i_mapping;
	struct page *page;
	struct dnode_of_data dn;
	int err;
496

497
	page = f2fs_grab_cache_page(mapping, index, true);
498 499 500 501 502 503
	if (!page) {
		/*
		 * before exiting, we should make sure ipage will be released
		 * if any error occur.
		 */
		f2fs_put_page(ipage, 1);
504
		return ERR_PTR(-ENOMEM);
505
	}
506

507
	set_new_dnode(&dn, inode, ipage, NULL, 0);
508
	err = f2fs_reserve_block(&dn, index);
509 510
	if (err) {
		f2fs_put_page(page, 1);
511
		return ERR_PTR(err);
512
	}
513 514
	if (!ipage)
		f2fs_put_dnode(&dn);
515 516

	if (PageUptodate(page))
517
		goto got_it;
518 519

	if (dn.data_blkaddr == NEW_ADDR) {
520
		zero_user_segment(page, 0, PAGE_SIZE);
521
		SetPageUptodate(page);
522
	} else {
523
		f2fs_put_page(page, 1);
524

525 526 527
		/* if ipage exists, blkaddr should be NEW_ADDR */
		f2fs_bug_on(F2FS_I_SB(inode), ipage);
		page = get_lock_data_page(inode, index, true);
528
		if (IS_ERR(page))
529
			return page;
530
	}
531
got_it:
C
Chao Yu 已提交
532
	if (new_i_size && i_size_read(inode) <
533 534
				((loff_t)(index + 1) << PAGE_SHIFT)) {
		i_size_write(inode, ((loff_t)(index + 1) << PAGE_SHIFT));
535 536
		/* Only the directory inode sets new_i_size */
		set_inode_flag(F2FS_I(inode), FI_UPDATE_DIR);
537 538 539 540
	}
	return page;
}

541 542
static int __allocate_data_block(struct dnode_of_data *dn)
{
543
	struct f2fs_sb_info *sbi = F2FS_I_SB(dn->inode);
544 545
	struct f2fs_summary sum;
	struct node_info ni;
546
	int seg = CURSEG_WARM_DATA;
547
	pgoff_t fofs;
548 549 550

	if (unlikely(is_inode_flag_set(F2FS_I(dn->inode), FI_NO_ALLOC)))
		return -EPERM;
551 552 553 554 555

	dn->data_blkaddr = datablock_addr(dn->node_page, dn->ofs_in_node);
	if (dn->data_blkaddr == NEW_ADDR)
		goto alloc;

556 557 558
	if (unlikely(!inc_valid_block_count(sbi, dn->inode, 1)))
		return -ENOSPC;

559
alloc:
560 561 562
	get_node_info(sbi, dn->nid, &ni);
	set_summary(&sum, dn->nid, dn->ofs_in_node, ni.version);

563 564 565
	if (dn->ofs_in_node == 0 && dn->inode_page == dn->node_page)
		seg = CURSEG_DIRECT_IO;

566 567
	allocate_data_block(sbi, NULL, dn->data_blkaddr, &dn->data_blkaddr,
								&sum, seg);
568
	set_data_blkaddr(dn);
569

570
	/* update i_size */
571
	fofs = start_bidx_of_node(ofs_of_node(dn->node_page), dn->inode) +
572
							dn->ofs_in_node;
573
	if (i_size_read(dn->inode) < ((loff_t)(fofs + 1) << PAGE_SHIFT))
C
Chao Yu 已提交
574
		i_size_write(dn->inode,
575
				((loff_t)(fofs + 1) << PAGE_SHIFT));
576 577 578
	return 0;
}

579
ssize_t f2fs_preallocate_blocks(struct kiocb *iocb, struct iov_iter *from)
580
{
581
	struct inode *inode = file_inode(iocb->ki_filp);
C
Chao Yu 已提交
582
	struct f2fs_map_blocks map;
583
	ssize_t ret = 0;
584

585 586
	map.m_lblk = F2FS_BLK_ALIGN(iocb->ki_pos);
	map.m_len = F2FS_BYTES_TO_BLK(iov_iter_count(from));
587
	map.m_next_pgofs = NULL;
588

589 590 591 592 593 594 595 596 597 598
	if (f2fs_encrypted_inode(inode))
		return 0;

	if (iocb->ki_flags & IOCB_DIRECT) {
		ret = f2fs_convert_inline_inode(inode);
		if (ret)
			return ret;
		return f2fs_map_blocks(inode, &map, 1, F2FS_GET_BLOCK_PRE_DIO);
	}
	if (iocb->ki_pos + iov_iter_count(from) > MAX_INLINE_DATA) {
599 600 601 602
		ret = f2fs_convert_inline_inode(inode);
		if (ret)
			return ret;
	}
603 604
	if (!f2fs_has_inline_data(inode))
		return f2fs_map_blocks(inode, &map, 1, F2FS_GET_BLOCK_PRE_AIO);
605
	return ret;
606 607
}

J
Jaegeuk Kim 已提交
608
/*
J
Jaegeuk Kim 已提交
609 610
 * f2fs_map_blocks() now supported readahead/bmap/rw direct_IO with
 * f2fs_map_blocks structure.
C
Chao Yu 已提交
611 612 613 614 615
 * If original data blocks are allocated, then give them to blockdev.
 * Otherwise,
 *     a. preallocate requested block addresses
 *     b. do not use extent cache for better performance
 *     c. give the block addresses to blockdev
616
 */
C
Chao Yu 已提交
617
int f2fs_map_blocks(struct inode *inode, struct f2fs_map_blocks *map,
C
Chao Yu 已提交
618
						int create, int flag)
619
{
J
Jaegeuk Kim 已提交
620
	unsigned int maxblocks = map->m_len;
621
	struct dnode_of_data dn;
622
	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
623 624 625
	int mode = create ? ALLOC_NODE : LOOKUP_NODE_RA;
	pgoff_t pgofs, end_offset;
	int err = 0, ofs = 1;
626
	struct extent_info ei;
627
	bool allocated = false;
628
	block_t blkaddr;
629

J
Jaegeuk Kim 已提交
630 631 632 633 634
	map->m_len = 0;
	map->m_flags = 0;

	/* it only supports block size == page size */
	pgofs =	(pgoff_t)map->m_lblk;
635

636
	if (!create && f2fs_lookup_extent_cache(inode, pgofs, &ei)) {
J
Jaegeuk Kim 已提交
637 638 639
		map->m_pblk = ei.blk + pgofs - ei.fofs;
		map->m_len = min((pgoff_t)maxblocks, ei.fofs + ei.len - pgofs);
		map->m_flags = F2FS_MAP_MAPPED;
640
		goto out;
641
	}
642

C
Chao Yu 已提交
643
next_dnode:
644
	if (create)
645
		f2fs_lock_op(sbi);
646 647 648

	/* When reading holes, we need its node page */
	set_new_dnode(&dn, inode, NULL, NULL, 0);
649
	err = get_dnode_of_data(&dn, pgofs, mode);
650
	if (err) {
C
Chao Yu 已提交
651 652
		if (flag == F2FS_GET_BLOCK_BMAP)
			map->m_pblk = 0;
653
		if (err == -ENOENT) {
654
			err = 0;
655 656 657 658
			if (map->m_next_pgofs)
				*map->m_next_pgofs =
					get_next_page_offset(&dn, pgofs);
		}
659
		goto unlock_out;
660
	}
C
Chao Yu 已提交
661

662
	end_offset = ADDRS_PER_PAGE(dn.node_page, inode);
C
Chao Yu 已提交
663 664 665 666 667

next_block:
	blkaddr = datablock_addr(dn.node_page, dn.ofs_in_node);

	if (blkaddr == NEW_ADDR || blkaddr == NULL_ADDR) {
C
Chao Yu 已提交
668
		if (create) {
669 670
			if (unlikely(f2fs_cp_error(sbi))) {
				err = -EIO;
C
Chao Yu 已提交
671
				goto sync_out;
672
			}
673 674 675 676 677
			if (flag == F2FS_GET_BLOCK_PRE_AIO) {
				if (blkaddr == NULL_ADDR)
					err = reserve_new_block(&dn);
			} else {
				err = __allocate_data_block(&dn);
678 679 680
				if (!err)
					set_inode_flag(F2FS_I(inode),
							FI_APPEND_WRITE);
681
			}
C
Chao Yu 已提交
682
			if (err)
C
Chao Yu 已提交
683
				goto sync_out;
C
Chao Yu 已提交
684 685
			allocated = true;
			map->m_flags = F2FS_MAP_NEW;
C
Chao Yu 已提交
686
			blkaddr = dn.data_blkaddr;
C
Chao Yu 已提交
687
		} else {
C
Chao Yu 已提交
688 689 690 691
			if (flag == F2FS_GET_BLOCK_BMAP) {
				map->m_pblk = 0;
				goto sync_out;
			}
692 693 694 695 696
			if (flag == F2FS_GET_BLOCK_FIEMAP &&
						blkaddr == NULL_ADDR) {
				if (map->m_next_pgofs)
					*map->m_next_pgofs = pgofs + 1;
			}
C
Chao Yu 已提交
697
			if (flag != F2FS_GET_BLOCK_FIEMAP ||
C
Chao Yu 已提交
698
						blkaddr != NEW_ADDR)
C
Chao Yu 已提交
699
				goto sync_out;
C
Chao Yu 已提交
700 701
		}
	}
702

C
Chao Yu 已提交
703 704 705 706 707 708 709 710 711 712
	if (map->m_len == 0) {
		/* preallocated unwritten block should be mapped for fiemap. */
		if (blkaddr == NEW_ADDR)
			map->m_flags |= F2FS_MAP_UNWRITTEN;
		map->m_flags |= F2FS_MAP_MAPPED;

		map->m_pblk = blkaddr;
		map->m_len = 1;
	} else if ((map->m_pblk != NEW_ADDR &&
			blkaddr == (map->m_pblk + ofs)) ||
713
			(map->m_pblk == NEW_ADDR && blkaddr == NEW_ADDR) ||
714 715
			flag == F2FS_GET_BLOCK_PRE_DIO ||
			flag == F2FS_GET_BLOCK_PRE_AIO) {
C
Chao Yu 已提交
716 717 718 719 720
		ofs++;
		map->m_len++;
	} else {
		goto sync_out;
	}
721 722 723 724

	dn.ofs_in_node++;
	pgofs++;

C
Chao Yu 已提交
725 726 727
	if (map->m_len < maxblocks) {
		if (dn.ofs_in_node < end_offset)
			goto next_block;
728

729 730 731 732
		if (allocated)
			sync_inode_page(&dn);
		f2fs_put_dnode(&dn);

733 734
		if (create) {
			f2fs_unlock_op(sbi);
735
			f2fs_balance_fs(sbi, allocated);
736
		}
737
		allocated = false;
C
Chao Yu 已提交
738
		goto next_dnode;
739
	}
740

741 742 743
sync_out:
	if (allocated)
		sync_inode_page(&dn);
744
	f2fs_put_dnode(&dn);
745
unlock_out:
746
	if (create) {
747
		f2fs_unlock_op(sbi);
748
		f2fs_balance_fs(sbi, allocated);
749
	}
750
out:
J
Jaegeuk Kim 已提交
751
	trace_f2fs_map_blocks(inode, map, err);
752
	return err;
753 754
}

J
Jaegeuk Kim 已提交
755
static int __get_data_block(struct inode *inode, sector_t iblock,
756 757
			struct buffer_head *bh, int create, int flag,
			pgoff_t *next_pgofs)
J
Jaegeuk Kim 已提交
758 759 760 761 762 763
{
	struct f2fs_map_blocks map;
	int ret;

	map.m_lblk = iblock;
	map.m_len = bh->b_size >> inode->i_blkbits;
764
	map.m_next_pgofs = next_pgofs;
J
Jaegeuk Kim 已提交
765

C
Chao Yu 已提交
766
	ret = f2fs_map_blocks(inode, &map, create, flag);
J
Jaegeuk Kim 已提交
767 768 769 770 771 772 773 774
	if (!ret) {
		map_bh(bh, inode->i_sb, map.m_pblk);
		bh->b_state = (bh->b_state & ~F2FS_MAP_FLAGS) | map.m_flags;
		bh->b_size = map.m_len << inode->i_blkbits;
	}
	return ret;
}

775
static int get_data_block(struct inode *inode, sector_t iblock,
776 777
			struct buffer_head *bh_result, int create, int flag,
			pgoff_t *next_pgofs)
C
Chao Yu 已提交
778
{
779 780
	return __get_data_block(inode, iblock, bh_result, create,
							flag, next_pgofs);
C
Chao Yu 已提交
781 782 783
}

static int get_data_block_dio(struct inode *inode, sector_t iblock,
784 785
			struct buffer_head *bh_result, int create)
{
C
Chao Yu 已提交
786
	return __get_data_block(inode, iblock, bh_result, create,
787
						F2FS_GET_BLOCK_DIO, NULL);
788 789
}

C
Chao Yu 已提交
790
static int get_data_block_bmap(struct inode *inode, sector_t iblock,
791 792
			struct buffer_head *bh_result, int create)
{
793
	/* Block number less than F2FS MAX BLOCKS */
C
Chao Yu 已提交
794
	if (unlikely(iblock >= F2FS_I_SB(inode)->max_file_blocks))
795 796
		return -EFBIG;

C
Chao Yu 已提交
797
	return __get_data_block(inode, iblock, bh_result, create,
798
						F2FS_GET_BLOCK_BMAP, NULL);
799 800
}

801 802 803 804 805 806 807 808 809 810
static inline sector_t logical_to_blk(struct inode *inode, loff_t offset)
{
	return (offset >> inode->i_blkbits);
}

static inline loff_t blk_to_logical(struct inode *inode, sector_t blk)
{
	return (blk << inode->i_blkbits);
}

J
Jaegeuk Kim 已提交
811 812 813
int f2fs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
		u64 start, u64 len)
{
814 815
	struct buffer_head map_bh;
	sector_t start_blk, last_blk;
816
	pgoff_t next_pgofs;
817
	loff_t isize;
818 819 820 821 822 823 824 825
	u64 logical = 0, phys = 0, size = 0;
	u32 flags = 0;
	int ret = 0;

	ret = fiemap_check_flags(fieinfo, FIEMAP_FLAG_SYNC);
	if (ret)
		return ret;

J
Jaegeuk Kim 已提交
826 827 828 829 830 831
	if (f2fs_has_inline_data(inode)) {
		ret = f2fs_inline_data_fiemap(inode, fieinfo, start, len);
		if (ret != -EAGAIN)
			return ret;
	}

A
Al Viro 已提交
832
	inode_lock(inode);
833 834

	isize = i_size_read(inode);
835 836
	if (start >= isize)
		goto out;
837

838 839
	if (start + len > isize)
		len = isize - start;
840 841 842 843 844 845

	if (logical_to_blk(inode, len) == 0)
		len = blk_to_logical(inode, 1);

	start_blk = logical_to_blk(inode, start);
	last_blk = logical_to_blk(inode, start + len - 1);
846

847 848 849 850
next:
	memset(&map_bh, 0, sizeof(struct buffer_head));
	map_bh.b_size = len;

C
Chao Yu 已提交
851
	ret = get_data_block(inode, start_blk, &map_bh, 0,
852
					F2FS_GET_BLOCK_FIEMAP, &next_pgofs);
853 854 855 856 857
	if (ret)
		goto out;

	/* HOLE */
	if (!buffer_mapped(&map_bh)) {
858
		start_blk = next_pgofs;
859
		/* Go through holes util pass the EOF */
860
		if (blk_to_logical(inode, start_blk) < isize)
861 862 863 864 865 866 867
			goto prep_next;
		/* Found a hole beyond isize means no more extents.
		 * Note that the premise is that filesystems don't
		 * punch holes beyond isize and keep size unchanged.
		 */
		flags |= FIEMAP_EXTENT_LAST;
	}
868

869 870 871 872
	if (size) {
		if (f2fs_encrypted_inode(inode))
			flags |= FIEMAP_EXTENT_DATA_ENCRYPTED;

873 874
		ret = fiemap_fill_next_extent(fieinfo, logical,
				phys, size, flags);
875
	}
876

877 878
	if (start_blk > last_blk || ret)
		goto out;
879

880 881 882 883 884 885
	logical = blk_to_logical(inode, start_blk);
	phys = blk_to_logical(inode, map_bh.b_blocknr);
	size = map_bh.b_size;
	flags = 0;
	if (buffer_unwritten(&map_bh))
		flags = FIEMAP_EXTENT_UNWRITTEN;
886

887
	start_blk += logical_to_blk(inode, size);
888

889
prep_next:
890 891 892 893 894 895 896 897 898
	cond_resched();
	if (fatal_signal_pending(current))
		ret = -EINTR;
	else
		goto next;
out:
	if (ret == 1)
		ret = 0;

A
Al Viro 已提交
899
	inode_unlock(inode);
900
	return ret;
J
Jaegeuk Kim 已提交
901 902
}

J
Jaegeuk Kim 已提交
903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927
/*
 * This function was originally taken from fs/mpage.c, and customized for f2fs.
 * Major change was from block_size == page_size in f2fs by default.
 */
static int f2fs_mpage_readpages(struct address_space *mapping,
			struct list_head *pages, struct page *page,
			unsigned nr_pages)
{
	struct bio *bio = NULL;
	unsigned page_idx;
	sector_t last_block_in_bio = 0;
	struct inode *inode = mapping->host;
	const unsigned blkbits = inode->i_blkbits;
	const unsigned blocksize = 1 << blkbits;
	sector_t block_in_file;
	sector_t last_block;
	sector_t last_block_in_file;
	sector_t block_nr;
	struct block_device *bdev = inode->i_sb->s_bdev;
	struct f2fs_map_blocks map;

	map.m_pblk = 0;
	map.m_lblk = 0;
	map.m_len = 0;
	map.m_flags = 0;
928
	map.m_next_pgofs = NULL;
J
Jaegeuk Kim 已提交
929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965

	for (page_idx = 0; nr_pages; page_idx++, nr_pages--) {

		prefetchw(&page->flags);
		if (pages) {
			page = list_entry(pages->prev, struct page, lru);
			list_del(&page->lru);
			if (add_to_page_cache_lru(page, mapping,
						  page->index, GFP_KERNEL))
				goto next_page;
		}

		block_in_file = (sector_t)page->index;
		last_block = block_in_file + nr_pages;
		last_block_in_file = (i_size_read(inode) + blocksize - 1) >>
								blkbits;
		if (last_block > last_block_in_file)
			last_block = last_block_in_file;

		/*
		 * Map blocks using the previous result first.
		 */
		if ((map.m_flags & F2FS_MAP_MAPPED) &&
				block_in_file > map.m_lblk &&
				block_in_file < (map.m_lblk + map.m_len))
			goto got_it;

		/*
		 * Then do more f2fs_map_blocks() calls until we are
		 * done with this page.
		 */
		map.m_flags = 0;

		if (block_in_file < last_block) {
			map.m_lblk = block_in_file;
			map.m_len = last_block - block_in_file;

966
			if (f2fs_map_blocks(inode, &map, 0,
967
						F2FS_GET_BLOCK_READ))
J
Jaegeuk Kim 已提交
968 969 970 971 972 973 974 975 976 977 978 979
				goto set_error_page;
		}
got_it:
		if ((map.m_flags & F2FS_MAP_MAPPED)) {
			block_nr = map.m_pblk + block_in_file - map.m_lblk;
			SetPageMappedToDisk(page);

			if (!PageUptodate(page) && !cleancache_get_page(page)) {
				SetPageUptodate(page);
				goto confused;
			}
		} else {
980
			zero_user_segment(page, 0, PAGE_SIZE);
J
Jaegeuk Kim 已提交
981 982 983 984 985 986 987 988 989 990 991 992 993 994 995
			SetPageUptodate(page);
			unlock_page(page);
			goto next_page;
		}

		/*
		 * This page will go to BIO.  Do we need to send this
		 * BIO off first?
		 */
		if (bio && (last_block_in_bio != block_nr - 1)) {
submit_and_realloc:
			submit_bio(READ, bio);
			bio = NULL;
		}
		if (bio == NULL) {
996
			struct fscrypt_ctx *ctx = NULL;
997 998 999 1000

			if (f2fs_encrypted_inode(inode) &&
					S_ISREG(inode->i_mode)) {

1001
				ctx = fscrypt_get_ctx(inode, GFP_NOFS);
1002 1003 1004 1005
				if (IS_ERR(ctx))
					goto set_error_page;

				/* wait the page to be moved by cleaning */
1006 1007
				f2fs_wait_on_encrypted_page_writeback(
						F2FS_I_SB(inode), block_nr);
1008 1009
			}

J
Jaegeuk Kim 已提交
1010
			bio = bio_alloc(GFP_KERNEL,
1011
				min_t(int, nr_pages, BIO_MAX_PAGES));
1012 1013
			if (!bio) {
				if (ctx)
1014
					fscrypt_release_ctx(ctx);
J
Jaegeuk Kim 已提交
1015
				goto set_error_page;
1016
			}
J
Jaegeuk Kim 已提交
1017 1018
			bio->bi_bdev = bdev;
			bio->bi_iter.bi_sector = SECTOR_FROM_BLOCK(block_nr);
1019
			bio->bi_end_io = f2fs_read_end_io;
1020
			bio->bi_private = ctx;
J
Jaegeuk Kim 已提交
1021 1022 1023 1024 1025 1026 1027 1028 1029
		}

		if (bio_add_page(bio, page, blocksize, 0) < blocksize)
			goto submit_and_realloc;

		last_block_in_bio = block_nr;
		goto next_page;
set_error_page:
		SetPageError(page);
1030
		zero_user_segment(page, 0, PAGE_SIZE);
J
Jaegeuk Kim 已提交
1031 1032 1033 1034 1035 1036 1037 1038 1039 1040
		unlock_page(page);
		goto next_page;
confused:
		if (bio) {
			submit_bio(READ, bio);
			bio = NULL;
		}
		unlock_page(page);
next_page:
		if (pages)
1041
			put_page(page);
J
Jaegeuk Kim 已提交
1042 1043 1044 1045 1046 1047 1048
	}
	BUG_ON(pages && !list_empty(pages));
	if (bio)
		submit_bio(READ, bio);
	return 0;
}

1049 1050
static int f2fs_read_data_page(struct file *file, struct page *page)
{
H
Huajun Li 已提交
1051
	struct inode *inode = page->mapping->host;
1052
	int ret = -EAGAIN;
H
Huajun Li 已提交
1053

1054 1055
	trace_f2fs_readpage(page, DATA);

A
arter97 已提交
1056
	/* If the file has inline data, try to read it directly */
H
Huajun Li 已提交
1057 1058
	if (f2fs_has_inline_data(inode))
		ret = f2fs_read_inline_data(inode, page);
1059
	if (ret == -EAGAIN)
J
Jaegeuk Kim 已提交
1060
		ret = f2fs_mpage_readpages(page->mapping, NULL, page, 1);
H
Huajun Li 已提交
1061
	return ret;
1062 1063 1064 1065 1066 1067
}

static int f2fs_read_data_pages(struct file *file,
			struct address_space *mapping,
			struct list_head *pages, unsigned nr_pages)
{
H
Huajun Li 已提交
1068
	struct inode *inode = file->f_mapping->host;
1069 1070 1071
	struct page *page = list_entry(pages->prev, struct page, lru);

	trace_f2fs_readpages(inode, page, nr_pages);
H
Huajun Li 已提交
1072 1073 1074 1075 1076

	/* If the file has inline data, skip readpages */
	if (f2fs_has_inline_data(inode))
		return 0;

J
Jaegeuk Kim 已提交
1077
	return f2fs_mpage_readpages(mapping, pages, NULL, nr_pages);
1078 1079
}

1080
int do_write_data_page(struct f2fs_io_info *fio)
1081
{
1082
	struct page *page = fio->page;
1083 1084 1085 1086 1087
	struct inode *inode = page->mapping->host;
	struct dnode_of_data dn;
	int err = 0;

	set_new_dnode(&dn, inode, NULL, NULL, 0);
1088
	err = get_dnode_of_data(&dn, page->index, LOOKUP_NODE);
1089 1090 1091
	if (err)
		return err;

1092
	fio->old_blkaddr = dn.data_blkaddr;
1093 1094

	/* This page is already truncated */
1095
	if (fio->old_blkaddr == NULL_ADDR) {
1096
		ClearPageUptodate(page);
1097
		goto out_writepage;
1098
	}
1099

1100
	if (f2fs_encrypted_inode(inode) && S_ISREG(inode->i_mode)) {
1101
		gfp_t gfp_flags = GFP_NOFS;
1102 1103 1104

		/* wait for GCed encrypted page writeback */
		f2fs_wait_on_encrypted_page_writeback(F2FS_I_SB(inode),
1105
							fio->old_blkaddr);
1106 1107 1108
retry_encrypt:
		fio->encrypted_page = fscrypt_encrypt_page(inode, fio->page,
								gfp_flags);
1109 1110
		if (IS_ERR(fio->encrypted_page)) {
			err = PTR_ERR(fio->encrypted_page);
1111 1112 1113 1114 1115 1116 1117 1118
			if (err == -ENOMEM) {
				/* flush pending ios and wait for a while */
				f2fs_flush_merged_bios(F2FS_I_SB(inode));
				congestion_wait(BLK_RW_ASYNC, HZ/50);
				gfp_flags |= __GFP_NOFAIL;
				err = 0;
				goto retry_encrypt;
			}
1119 1120 1121 1122
			goto out_writepage;
		}
	}

1123 1124 1125 1126 1127 1128
	set_page_writeback(page);

	/*
	 * If current allocation needs SSR,
	 * it had better in-place writes for updated data.
	 */
1129
	if (unlikely(fio->old_blkaddr != NEW_ADDR &&
1130
			!is_cold_data(page) &&
1131
			!IS_ATOMIC_WRITTEN_PAGE(page) &&
1132
			need_inplace_update(inode))) {
1133
		rewrite_data_page(fio);
1134
		set_inode_flag(F2FS_I(inode), FI_UPDATE_WRITE);
1135
		trace_f2fs_do_write_data_page(page, IPU);
1136
	} else {
1137
		write_data_page(&dn, fio);
1138
		trace_f2fs_do_write_data_page(page, OPU);
1139
		set_inode_flag(F2FS_I(inode), FI_APPEND_WRITE);
1140 1141
		if (page->index == 0)
			set_inode_flag(F2FS_I(inode), FI_FIRST_BLOCK_WRITTEN);
1142 1143 1144 1145 1146 1147 1148 1149 1150 1151
	}
out_writepage:
	f2fs_put_dnode(&dn);
	return err;
}

static int f2fs_write_data_page(struct page *page,
					struct writeback_control *wbc)
{
	struct inode *inode = page->mapping->host;
1152
	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
1153 1154
	loff_t i_size = i_size_read(inode);
	const pgoff_t end_index = ((unsigned long long) i_size)
1155
							>> PAGE_SHIFT;
H
Huajun Li 已提交
1156
	unsigned offset = 0;
1157
	bool need_balance_fs = false;
1158
	int err = 0;
J
Jaegeuk Kim 已提交
1159
	struct f2fs_io_info fio = {
1160
		.sbi = sbi,
J
Jaegeuk Kim 已提交
1161
		.type = DATA,
C
Chris Fries 已提交
1162
		.rw = (wbc->sync_mode == WB_SYNC_ALL) ? WRITE_SYNC : WRITE,
1163
		.page = page,
1164
		.encrypted_page = NULL,
J
Jaegeuk Kim 已提交
1165
	};
1166

1167 1168
	trace_f2fs_writepage(page, DATA);

1169
	if (page->index < end_index)
1170
		goto write;
1171 1172 1173 1174 1175

	/*
	 * If the offset is out-of-range of file size,
	 * this page does not have to be written to disk.
	 */
1176
	offset = i_size & (PAGE_SIZE - 1);
1177
	if ((page->index >= end_index + 1) || !offset)
1178
		goto out;
1179

1180
	zero_user_segment(page, offset, PAGE_SIZE);
1181
write:
1182
	if (unlikely(is_sbi_flag_set(sbi, SBI_POR_DOING)))
1183
		goto redirty_out;
1184 1185
	if (f2fs_is_drop_cache(inode))
		goto out;
1186 1187 1188 1189
	/* we should not write 0'th page having journal header */
	if (f2fs_is_volatile_file(inode) && (!page->index ||
			(!wbc->for_reclaim &&
			available_free_memory(sbi, BASE_CHECK))))
1190
		goto redirty_out;
1191

1192
	/* Dentry blocks are controlled by checkpoint */
1193
	if (S_ISDIR(inode->i_mode)) {
1194 1195
		if (unlikely(f2fs_cp_error(sbi)))
			goto redirty_out;
1196
		err = do_write_data_page(&fio);
1197 1198
		goto done;
	}
H
Huajun Li 已提交
1199

1200 1201 1202
	/* we should bypass data pages to proceed the kworkder jobs */
	if (unlikely(f2fs_cp_error(sbi))) {
		SetPageError(page);
1203
		goto out;
1204 1205
	}

1206
	if (!wbc->for_reclaim)
1207
		need_balance_fs = true;
1208
	else if (has_not_enough_free_secs(sbi, 0))
1209
		goto redirty_out;
1210

1211
	err = -EAGAIN;
1212
	f2fs_lock_op(sbi);
1213 1214 1215
	if (f2fs_has_inline_data(inode))
		err = f2fs_write_inline_data(inode, page);
	if (err == -EAGAIN)
1216
		err = do_write_data_page(&fio);
1217 1218 1219 1220
	f2fs_unlock_op(sbi);
done:
	if (err && err != -ENOENT)
		goto redirty_out;
1221 1222

	clear_cold_data(page);
1223
out:
1224
	inode_dec_dirty_pages(inode);
1225 1226
	if (err)
		ClearPageUptodate(page);
1227 1228 1229 1230 1231 1232

	if (wbc->for_reclaim) {
		f2fs_submit_merged_bio_cond(sbi, NULL, page, 0, DATA, WRITE);
		remove_dirty_inode(inode);
	}

1233
	unlock_page(page);
J
Jaegeuk Kim 已提交
1234
	f2fs_balance_fs(sbi, need_balance_fs);
1235 1236

	if (unlikely(f2fs_cp_error(sbi)))
1237
		f2fs_submit_merged_bio(sbi, DATA, WRITE);
1238

1239 1240 1241
	return 0;

redirty_out:
1242
	redirty_page_for_writepage(wbc, page);
1243
	return AOP_WRITEPAGE_ACTIVATE;
1244 1245
}

1246 1247 1248 1249 1250 1251 1252 1253 1254
static int __f2fs_writepage(struct page *page, struct writeback_control *wbc,
			void *data)
{
	struct address_space *mapping = data;
	int ret = mapping->a_ops->writepage(page, wbc);
	mapping_set_error(mapping, ret);
	return ret;
}

C
Chao Yu 已提交
1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287
/*
 * This function was copied from write_cche_pages from mm/page-writeback.c.
 * The major change is making write step of cold data page separately from
 * warm/hot data page.
 */
static int f2fs_write_cache_pages(struct address_space *mapping,
			struct writeback_control *wbc, writepage_t writepage,
			void *data)
{
	int ret = 0;
	int done = 0;
	struct pagevec pvec;
	int nr_pages;
	pgoff_t uninitialized_var(writeback_index);
	pgoff_t index;
	pgoff_t end;		/* Inclusive */
	pgoff_t done_index;
	int cycled;
	int range_whole = 0;
	int tag;
	int step = 0;

	pagevec_init(&pvec, 0);
next:
	if (wbc->range_cyclic) {
		writeback_index = mapping->writeback_index; /* prev offset */
		index = writeback_index;
		if (index == 0)
			cycled = 1;
		else
			cycled = 0;
		end = -1;
	} else {
1288 1289
		index = wbc->range_start >> PAGE_SHIFT;
		end = wbc->range_end >> PAGE_SHIFT;
C
Chao Yu 已提交
1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332
		if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX)
			range_whole = 1;
		cycled = 1; /* ignore range_cyclic tests */
	}
	if (wbc->sync_mode == WB_SYNC_ALL || wbc->tagged_writepages)
		tag = PAGECACHE_TAG_TOWRITE;
	else
		tag = PAGECACHE_TAG_DIRTY;
retry:
	if (wbc->sync_mode == WB_SYNC_ALL || wbc->tagged_writepages)
		tag_pages_for_writeback(mapping, index, end);
	done_index = index;
	while (!done && (index <= end)) {
		int i;

		nr_pages = pagevec_lookup_tag(&pvec, mapping, &index, tag,
			      min(end - index, (pgoff_t)PAGEVEC_SIZE - 1) + 1);
		if (nr_pages == 0)
			break;

		for (i = 0; i < nr_pages; i++) {
			struct page *page = pvec.pages[i];

			if (page->index > end) {
				done = 1;
				break;
			}

			done_index = page->index;

			lock_page(page);

			if (unlikely(page->mapping != mapping)) {
continue_unlock:
				unlock_page(page);
				continue;
			}

			if (!PageDirty(page)) {
				/* someone wrote it for us */
				goto continue_unlock;
			}

1333
			if (step == is_cold_data(page))
C
Chao Yu 已提交
1334 1335 1336 1337
				goto continue_unlock;

			if (PageWriteback(page)) {
				if (wbc->sync_mode != WB_SYNC_NONE)
1338 1339
					f2fs_wait_on_page_writeback(page,
								DATA, true);
C
Chao Yu 已提交
1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386
				else
					goto continue_unlock;
			}

			BUG_ON(PageWriteback(page));
			if (!clear_page_dirty_for_io(page))
				goto continue_unlock;

			ret = (*writepage)(page, wbc, data);
			if (unlikely(ret)) {
				if (ret == AOP_WRITEPAGE_ACTIVATE) {
					unlock_page(page);
					ret = 0;
				} else {
					done_index = page->index + 1;
					done = 1;
					break;
				}
			}

			if (--wbc->nr_to_write <= 0 &&
			    wbc->sync_mode == WB_SYNC_NONE) {
				done = 1;
				break;
			}
		}
		pagevec_release(&pvec);
		cond_resched();
	}

	if (step < 1) {
		step++;
		goto next;
	}

	if (!cycled && !done) {
		cycled = 1;
		index = 0;
		end = writeback_index - 1;
		goto retry;
	}
	if (wbc->range_cyclic || (range_whole && wbc->nr_to_write > 0))
		mapping->writeback_index = done_index;

	return ret;
}

1387
static int f2fs_write_data_pages(struct address_space *mapping,
1388 1389 1390
			    struct writeback_control *wbc)
{
	struct inode *inode = mapping->host;
1391
	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
1392
	bool locked = false;
1393
	int ret;
1394
	long diff;
1395

P
P J P 已提交
1396 1397 1398 1399
	/* deal with chardevs and other special file */
	if (!mapping->a_ops->writepage)
		return 0;

1400 1401 1402 1403
	/* skip writing if there is no dirty page in this inode */
	if (!get_dirty_pages(inode) && wbc->sync_mode == WB_SYNC_NONE)
		return 0;

1404 1405 1406 1407 1408
	if (S_ISDIR(inode->i_mode) && wbc->sync_mode == WB_SYNC_NONE &&
			get_dirty_pages(inode) < nr_pages_to_skip(sbi, DATA) &&
			available_free_memory(sbi, DIRTY_DENTS))
		goto skip_write;

C
Chao Yu 已提交
1409 1410 1411 1412
	/* skip writing during file defragment */
	if (is_inode_flag_set(F2FS_I(inode), FI_DO_DEFRAG))
		goto skip_write;

1413 1414 1415 1416
	/* during POR, we don't need to trigger writepage at all. */
	if (unlikely(is_sbi_flag_set(sbi, SBI_POR_DOING)))
		goto skip_write;

Y
Yunlei He 已提交
1417 1418
	trace_f2fs_writepages(mapping->host, wbc, DATA);

1419
	diff = nr_pages_to_write(sbi, DATA, wbc);
1420

1421
	if (!S_ISDIR(inode->i_mode) && wbc->sync_mode == WB_SYNC_ALL) {
1422 1423 1424
		mutex_lock(&sbi->writepages);
		locked = true;
	}
C
Chao Yu 已提交
1425
	ret = f2fs_write_cache_pages(mapping, wbc, __f2fs_writepage, mapping);
1426
	f2fs_submit_merged_bio_cond(sbi, inode, NULL, 0, DATA, WRITE);
1427 1428
	if (locked)
		mutex_unlock(&sbi->writepages);
J
Jaegeuk Kim 已提交
1429

1430
	remove_dirty_inode(inode);
1431

1432
	wbc->nr_to_write = max((long)0, wbc->nr_to_write - diff);
1433
	return ret;
1434 1435

skip_write:
1436
	wbc->pages_skipped += get_dirty_pages(inode);
Y
Yunlei He 已提交
1437
	trace_f2fs_writepages(mapping->host, wbc, DATA);
1438
	return 0;
1439 1440
}

1441 1442 1443
static void f2fs_write_failed(struct address_space *mapping, loff_t to)
{
	struct inode *inode = mapping->host;
J
Jaegeuk Kim 已提交
1444
	loff_t i_size = i_size_read(inode);
1445

J
Jaegeuk Kim 已提交
1446 1447 1448
	if (to > i_size) {
		truncate_pagecache(inode, i_size);
		truncate_blocks(inode, i_size, true);
1449 1450 1451
	}
}

1452 1453 1454 1455 1456 1457 1458 1459
static int prepare_write_begin(struct f2fs_sb_info *sbi,
			struct page *page, loff_t pos, unsigned len,
			block_t *blk_addr, bool *node_changed)
{
	struct inode *inode = page->mapping->host;
	pgoff_t index = page->index;
	struct dnode_of_data dn;
	struct page *ipage;
1460 1461
	bool locked = false;
	struct extent_info ei;
1462 1463
	int err = 0;

1464 1465 1466 1467 1468
	/*
	 * we already allocated all the blocks, so we don't need to get
	 * the block addresses when there is no need to fill the page.
	 */
	if (!f2fs_has_inline_data(inode) && !f2fs_encrypted_inode(inode) &&
1469
					len == PAGE_SIZE)
1470 1471
		return 0;

1472
	if (f2fs_has_inline_data(inode) ||
1473
			(pos & PAGE_MASK) >= i_size_read(inode)) {
1474 1475 1476 1477
		f2fs_lock_op(sbi);
		locked = true;
	}
restart:
1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490
	/* check inline_data */
	ipage = get_node_page(sbi, inode->i_ino);
	if (IS_ERR(ipage)) {
		err = PTR_ERR(ipage);
		goto unlock_out;
	}

	set_new_dnode(&dn, inode, ipage, ipage, 0);

	if (f2fs_has_inline_data(inode)) {
		if (pos + len <= MAX_INLINE_DATA) {
			read_inline_data(page, ipage);
			set_inode_flag(F2FS_I(inode), FI_DATA_EXIST);
1491
			set_inline_node(ipage);
1492 1493 1494
		} else {
			err = f2fs_convert_inline_page(&dn, page);
			if (err)
1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506
				goto out;
			if (dn.data_blkaddr == NULL_ADDR)
				err = f2fs_get_block(&dn, index);
		}
	} else if (locked) {
		err = f2fs_get_block(&dn, index);
	} else {
		if (f2fs_lookup_extent_cache(inode, index, &ei)) {
			dn.data_blkaddr = ei.blk + index - ei.fofs;
		} else {
			/* hole case */
			err = get_dnode_of_data(&dn, index, LOOKUP_NODE);
1507
			if (err || dn.data_blkaddr == NULL_ADDR) {
1508 1509 1510 1511 1512
				f2fs_put_dnode(&dn);
				f2fs_lock_op(sbi);
				locked = true;
				goto restart;
			}
1513 1514
		}
	}
1515

1516 1517 1518
	/* convert_inline_page can make node_changed */
	*blk_addr = dn.data_blkaddr;
	*node_changed = dn.node_changed;
1519
out:
1520 1521
	f2fs_put_dnode(&dn);
unlock_out:
1522 1523
	if (locked)
		f2fs_unlock_op(sbi);
1524 1525 1526
	return err;
}

1527 1528 1529 1530 1531
static int f2fs_write_begin(struct file *file, struct address_space *mapping,
		loff_t pos, unsigned len, unsigned flags,
		struct page **pagep, void **fsdata)
{
	struct inode *inode = mapping->host;
1532
	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
1533
	struct page *page = NULL;
1534
	pgoff_t index = ((unsigned long long) pos) >> PAGE_SHIFT;
1535 1536
	bool need_balance = false;
	block_t blkaddr = NULL_ADDR;
1537 1538
	int err = 0;

1539 1540
	trace_f2fs_write_begin(inode, pos, len, flags);

1541 1542 1543 1544 1545 1546 1547 1548 1549 1550
	/*
	 * We should check this at this moment to avoid deadlock on inode page
	 * and #0 page. The locking rule for inline_data conversion should be:
	 * lock_page(page #0) -> lock_page(inode_page)
	 */
	if (index != 0) {
		err = f2fs_convert_inline_inode(inode);
		if (err)
			goto fail;
	}
1551
repeat:
1552
	page = grab_cache_page_write_begin(mapping, index, flags);
1553 1554 1555 1556
	if (!page) {
		err = -ENOMEM;
		goto fail;
	}
1557

1558 1559
	*pagep = page;

1560 1561
	err = prepare_write_begin(sbi, page, pos, len,
					&blkaddr, &need_balance);
1562
	if (err)
1563
		goto fail;
1564

1565
	if (need_balance && has_not_enough_free_secs(sbi, 0)) {
1566
		unlock_page(page);
J
Jaegeuk Kim 已提交
1567
		f2fs_balance_fs(sbi, true);
1568 1569 1570 1571 1572 1573 1574 1575
		lock_page(page);
		if (page->mapping != mapping) {
			/* The page got truncated from under us */
			f2fs_put_page(page, 1);
			goto repeat;
		}
	}

1576
	f2fs_wait_on_page_writeback(page, DATA, false);
1577

1578 1579
	/* wait for GCed encrypted page writeback */
	if (f2fs_encrypted_inode(inode) && S_ISREG(inode->i_mode))
1580
		f2fs_wait_on_encrypted_page_writeback(sbi, blkaddr);
1581

1582
	if (len == PAGE_SIZE)
C
Chao Yu 已提交
1583 1584 1585
		goto out_update;
	if (PageUptodate(page))
		goto out_clear;
1586

1587 1588
	if ((pos & PAGE_MASK) >= i_size_read(inode)) {
		unsigned start = pos & (PAGE_SIZE - 1);
1589 1590 1591
		unsigned end = start + len;

		/* Reading beyond i_size is simple: memset to zero */
1592
		zero_user_segments(page, 0, start, end, PAGE_SIZE);
C
Chao Yu 已提交
1593
		goto out_update;
1594 1595
	}

1596
	if (blkaddr == NEW_ADDR) {
1597
		zero_user_segment(page, 0, PAGE_SIZE);
1598
	} else {
1599
		struct f2fs_io_info fio = {
1600
			.sbi = sbi,
1601 1602
			.type = DATA,
			.rw = READ_SYNC,
1603 1604
			.old_blkaddr = blkaddr,
			.new_blkaddr = blkaddr,
1605
			.page = page,
1606
			.encrypted_page = NULL,
1607
		};
1608
		err = f2fs_submit_page_bio(&fio);
1609 1610
		if (err)
			goto fail;
1611

1612
		lock_page(page);
1613
		if (unlikely(!PageUptodate(page))) {
1614 1615
			err = -EIO;
			goto fail;
1616
		}
1617
		if (unlikely(page->mapping != mapping)) {
1618 1619
			f2fs_put_page(page, 1);
			goto repeat;
1620
		}
1621 1622 1623

		/* avoid symlink page */
		if (f2fs_encrypted_inode(inode) && S_ISREG(inode->i_mode)) {
1624
			err = fscrypt_decrypt_page(page);
1625
			if (err)
1626 1627
				goto fail;
		}
1628
	}
C
Chao Yu 已提交
1629
out_update:
1630
	SetPageUptodate(page);
C
Chao Yu 已提交
1631
out_clear:
1632 1633
	clear_cold_data(page);
	return 0;
1634

1635
fail:
1636
	f2fs_put_page(page, 1);
1637 1638
	f2fs_write_failed(mapping, pos + len);
	return err;
1639 1640
}

1641 1642 1643 1644 1645 1646 1647
static int f2fs_write_end(struct file *file,
			struct address_space *mapping,
			loff_t pos, unsigned len, unsigned copied,
			struct page *page, void *fsdata)
{
	struct inode *inode = page->mapping->host;

1648 1649
	trace_f2fs_write_end(inode, pos, len, copied);

1650
	set_page_dirty(page);
1651 1652 1653 1654 1655 1656

	if (pos + copied > i_size_read(inode)) {
		i_size_write(inode, pos + copied);
		mark_inode_dirty(inode);
	}

1657
	f2fs_put_page(page, 1);
1658
	f2fs_update_time(F2FS_I_SB(inode), REQ_TIME);
1659 1660 1661
	return copied;
}

1662 1663
static int check_direct_IO(struct inode *inode, struct iov_iter *iter,
			   loff_t offset)
1664 1665 1666 1667 1668 1669
{
	unsigned blocksize_mask = inode->i_sb->s_blocksize - 1;

	if (offset & blocksize_mask)
		return -EINVAL;

A
Al Viro 已提交
1670 1671 1672
	if (iov_iter_alignment(iter) & blocksize_mask)
		return -EINVAL;

1673 1674 1675
	return 0;
}

1676 1677
static ssize_t f2fs_direct_IO(struct kiocb *iocb, struct iov_iter *iter,
			      loff_t offset)
1678
{
1679
	struct address_space *mapping = iocb->ki_filp->f_mapping;
1680 1681 1682
	struct inode *inode = mapping->host;
	size_t count = iov_iter_count(iter);
	int err;
1683

1684
	err = check_direct_IO(inode, iter, offset);
1685 1686
	if (err)
		return err;
H
Huajun Li 已提交
1687

1688 1689 1690
	if (f2fs_encrypted_inode(inode) && S_ISREG(inode->i_mode))
		return 0;

1691
	trace_f2fs_direct_IO_enter(inode, offset, count, iov_iter_rw(iter));
1692

C
Chao Yu 已提交
1693
	err = blockdev_direct_IO(iocb, inode, iter, offset, get_data_block_dio);
1694 1695 1696 1697 1698 1699
	if (iov_iter_rw(iter) == WRITE) {
		if (err > 0)
			set_inode_flag(F2FS_I(inode), FI_UPDATE_WRITE);
		else if (err < 0)
			f2fs_write_failed(mapping, offset + count);
	}
1700

1701
	trace_f2fs_direct_IO_exit(inode, offset, count, iov_iter_rw(iter), err);
1702

1703
	return err;
1704 1705
}

1706 1707
void f2fs_invalidate_page(struct page *page, unsigned int offset,
							unsigned int length)
1708 1709
{
	struct inode *inode = page->mapping->host;
1710
	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
1711

1712
	if (inode->i_ino >= F2FS_ROOT_INO(sbi) &&
1713
		(offset % PAGE_SIZE || length != PAGE_SIZE))
1714 1715
		return;

1716 1717 1718 1719 1720 1721 1722 1723
	if (PageDirty(page)) {
		if (inode->i_ino == F2FS_META_INO(sbi))
			dec_page_count(sbi, F2FS_DIRTY_META);
		else if (inode->i_ino == F2FS_NODE_INO(sbi))
			dec_page_count(sbi, F2FS_DIRTY_NODES);
		else
			inode_dec_dirty_pages(inode);
	}
C
Chao Yu 已提交
1724 1725 1726 1727 1728

	/* This is atomic written page, keep Private */
	if (IS_ATOMIC_WRITTEN_PAGE(page))
		return;

1729
	set_page_private(page, 0);
1730 1731 1732
	ClearPagePrivate(page);
}

1733
int f2fs_release_page(struct page *page, gfp_t wait)
1734
{
1735 1736 1737 1738
	/* If this is dirty page, keep PagePrivate */
	if (PageDirty(page))
		return 0;

C
Chao Yu 已提交
1739 1740 1741 1742
	/* This is atomic written page, keep Private */
	if (IS_ATOMIC_WRITTEN_PAGE(page))
		return 0;

1743
	set_page_private(page, 0);
1744
	ClearPagePrivate(page);
1745
	return 1;
1746 1747 1748 1749 1750 1751 1752
}

static int f2fs_set_data_page_dirty(struct page *page)
{
	struct address_space *mapping = page->mapping;
	struct inode *inode = mapping->host;

1753 1754
	trace_f2fs_set_page_dirty(page, DATA);

1755
	SetPageUptodate(page);
1756

1757
	if (f2fs_is_atomic_file(inode)) {
C
Chao Yu 已提交
1758 1759 1760 1761 1762 1763 1764 1765 1766
		if (!IS_ATOMIC_WRITTEN_PAGE(page)) {
			register_inmem_page(inode, page);
			return 1;
		}
		/*
		 * Previously, this page has been registered, we just
		 * return here.
		 */
		return 0;
1767 1768
	}

1769 1770
	if (!PageDirty(page)) {
		__set_page_dirty_nobuffers(page);
1771
		update_dirty_page(inode, page);
1772 1773 1774 1775 1776
		return 1;
	}
	return 0;
}

J
Jaegeuk Kim 已提交
1777 1778
static sector_t f2fs_bmap(struct address_space *mapping, sector_t block)
{
1779 1780
	struct inode *inode = mapping->host;

J
Jaegeuk Kim 已提交
1781 1782 1783 1784 1785 1786 1787
	if (f2fs_has_inline_data(inode))
		return 0;

	/* make sure allocating whole blocks */
	if (mapping_tagged(mapping, PAGECACHE_TAG_DIRTY))
		filemap_write_and_wait(mapping);

C
Chao Yu 已提交
1788
	return generic_block_bmap(mapping, block, get_data_block_bmap);
1789 1790
}

1791 1792 1793 1794 1795 1796
const struct address_space_operations f2fs_dblock_aops = {
	.readpage	= f2fs_read_data_page,
	.readpages	= f2fs_read_data_pages,
	.writepage	= f2fs_write_data_page,
	.writepages	= f2fs_write_data_pages,
	.write_begin	= f2fs_write_begin,
1797
	.write_end	= f2fs_write_end,
1798
	.set_page_dirty	= f2fs_set_data_page_dirty,
1799 1800
	.invalidatepage	= f2fs_invalidate_page,
	.releasepage	= f2fs_release_page,
1801
	.direct_IO	= f2fs_direct_IO,
J
Jaegeuk Kim 已提交
1802
	.bmap		= f2fs_bmap,
1803
};