data.c 27.2 KB
Newer Older
J
Jaegeuk Kim 已提交
1
/*
2 3 4 5 6 7 8 9 10 11 12 13 14
 * fs/f2fs/data.c
 *
 * Copyright (c) 2012 Samsung Electronics Co., Ltd.
 *             http://www.samsung.com/
 *
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License version 2 as
 * published by the Free Software Foundation.
 */
#include <linux/fs.h>
#include <linux/f2fs_fs.h>
#include <linux/buffer_head.h>
#include <linux/mpage.h>
15
#include <linux/aio.h>
16 17 18 19
#include <linux/writeback.h>
#include <linux/backing-dev.h>
#include <linux/blkdev.h>
#include <linux/bio.h>
20
#include <linux/prefetch.h>
21 22 23 24

#include "f2fs.h"
#include "node.h"
#include "segment.h"
25
#include <trace/events/f2fs.h>
26

27 28
static void f2fs_read_end_io(struct bio *bio, int err)
{
29 30
	struct bio_vec *bvec;
	int i;
31

32
	bio_for_each_segment_all(bvec, bio, i) {
33 34
		struct page *page = bvec->bv_page;

35 36 37
		if (!err) {
			SetPageUptodate(page);
		} else {
38 39 40 41
			ClearPageUptodate(page);
			SetPageError(page);
		}
		unlock_page(page);
42
	}
43 44 45 46 47
	bio_put(bio);
}

static void f2fs_write_end_io(struct bio *bio, int err)
{
48
	struct f2fs_sb_info *sbi = bio->bi_private;
49 50
	struct bio_vec *bvec;
	int i;
51

52
	bio_for_each_segment_all(bvec, bio, i) {
53 54
		struct page *page = bvec->bv_page;

55
		if (unlikely(err)) {
56
			set_page_dirty(page);
57
			set_bit(AS_EIO, &page->mapping->flags);
58
			f2fs_stop_checkpoint(sbi);
59 60 61
		}
		end_page_writeback(page);
		dec_page_count(sbi, F2FS_WRITEBACK);
62
	}
63

64 65 66 67
	if (sbi->wait_io) {
		complete(sbi->wait_io);
		sbi->wait_io = NULL;
	}
68 69 70 71 72 73 74 75

	if (!get_pages(sbi, F2FS_WRITEBACK) &&
			!list_empty(&sbi->cp_wait.task_list))
		wake_up(&sbi->cp_wait);

	bio_put(bio);
}

76 77 78 79 80 81 82 83 84 85 86 87
/*
 * Low-level block read/write IO operations.
 */
static struct bio *__bio_alloc(struct f2fs_sb_info *sbi, block_t blk_addr,
				int npages, bool is_read)
{
	struct bio *bio;

	/* No failure on bio allocation */
	bio = bio_alloc(GFP_NOIO, npages);

	bio->bi_bdev = sbi->sb->s_bdev;
88
	bio->bi_iter.bi_sector = SECTOR_FROM_BLOCK(sbi, blk_addr);
89
	bio->bi_end_io = is_read ? f2fs_read_end_io : f2fs_write_end_io;
90
	bio->bi_private = sbi;
91 92 93 94

	return bio;
}

J
Jaegeuk Kim 已提交
95
static void __submit_merged_bio(struct f2fs_bio_info *io)
96
{
J
Jaegeuk Kim 已提交
97 98
	struct f2fs_io_info *fio = &io->fio;
	int rw;
99 100 101 102

	if (!io->bio)
		return;

103
	rw = fio->rw;
104 105

	if (is_read_io(rw)) {
106 107
		trace_f2fs_submit_read_bio(io->sbi->sb, rw,
						fio->type, io->bio);
J
Jaegeuk Kim 已提交
108
		submit_bio(rw, io->bio);
109
	} else {
110 111 112 113 114 115 116 117
		trace_f2fs_submit_write_bio(io->sbi->sb, rw,
						fio->type, io->bio);
		/*
		 * META_FLUSH is only from the checkpoint procedure, and we
		 * should wait this metadata bio for FS consistency.
		 */
		if (fio->type == META_FLUSH) {
			DECLARE_COMPLETION_ONSTACK(wait);
118
			io->sbi->wait_io = &wait;
119 120 121 122 123
			submit_bio(rw, io->bio);
			wait_for_completion(&wait);
		} else {
			submit_bio(rw, io->bio);
		}
124
	}
125

126 127 128 129
	io->bio = NULL;
}

void f2fs_submit_merged_bio(struct f2fs_sb_info *sbi,
J
Jaegeuk Kim 已提交
130
				enum page_type type, int rw)
131 132 133 134 135 136
{
	enum page_type btype = PAGE_TYPE_OF_BIO(type);
	struct f2fs_bio_info *io;

	io = is_read_io(rw) ? &sbi->read_io : &sbi->write_io[btype];

137
	down_write(&io->io_rwsem);
J
Jaegeuk Kim 已提交
138 139 140 141

	/* change META to META_FLUSH in the checkpoint procedure */
	if (type >= META_FLUSH) {
		io->fio.type = META_FLUSH;
J
Jaegeuk Kim 已提交
142 143 144 145
		if (test_opt(sbi, NOBARRIER))
			io->fio.rw = WRITE_FLUSH | REQ_META | REQ_PRIO;
		else
			io->fio.rw = WRITE_FLUSH_FUA | REQ_META | REQ_PRIO;
J
Jaegeuk Kim 已提交
146 147
	}
	__submit_merged_bio(io);
148
	up_write(&io->io_rwsem);
149 150 151 152 153 154 155 156 157 158 159 160 161 162
}

/*
 * Fill the locked page with data located in the block address.
 * Return unlocked page.
 */
int f2fs_submit_page_bio(struct f2fs_sb_info *sbi, struct page *page,
					block_t blk_addr, int rw)
{
	struct bio *bio;

	trace_f2fs_submit_page_bio(page, blk_addr, rw);

	/* Allocate a new bio */
163
	bio = __bio_alloc(sbi, blk_addr, 1, is_read_io(rw));
164 165 166 167 168 169 170 171 172 173 174 175

	if (bio_add_page(bio, page, PAGE_CACHE_SIZE, 0) < PAGE_CACHE_SIZE) {
		bio_put(bio);
		f2fs_put_page(page, 1);
		return -EFAULT;
	}

	submit_bio(rw, bio);
	return 0;
}

void f2fs_submit_page_mbio(struct f2fs_sb_info *sbi, struct page *page,
J
Jaegeuk Kim 已提交
176
			block_t blk_addr, struct f2fs_io_info *fio)
177
{
J
Jaegeuk Kim 已提交
178
	enum page_type btype = PAGE_TYPE_OF_BIO(fio->type);
179
	struct f2fs_bio_info *io;
180
	bool is_read = is_read_io(fio->rw);
181

182
	io = is_read ? &sbi->read_io : &sbi->write_io[btype];
183 184 185

	verify_block_addr(sbi, blk_addr);

186
	down_write(&io->io_rwsem);
187

188
	if (!is_read)
189 190
		inc_page_count(sbi, F2FS_WRITEBACK);

191
	if (io->bio && (io->last_block_in_bio != blk_addr - 1 ||
J
Jaegeuk Kim 已提交
192 193
						io->fio.rw != fio->rw))
		__submit_merged_bio(io);
194 195
alloc_new:
	if (io->bio == NULL) {
196 197 198
		int bio_blocks = MAX_BIO_BLOCKS(max_hw_blocks(sbi));

		io->bio = __bio_alloc(sbi, blk_addr, bio_blocks, is_read);
J
Jaegeuk Kim 已提交
199
		io->fio = *fio;
200 201 202 203
	}

	if (bio_add_page(io->bio, page, PAGE_CACHE_SIZE, 0) <
							PAGE_CACHE_SIZE) {
J
Jaegeuk Kim 已提交
204
		__submit_merged_bio(io);
205 206 207 208 209
		goto alloc_new;
	}

	io->last_block_in_bio = blk_addr;

210
	up_write(&io->io_rwsem);
J
Jaegeuk Kim 已提交
211
	trace_f2fs_submit_page_mbio(page, fio->rw, fio->type, blk_addr);
212 213
}

J
Jaegeuk Kim 已提交
214
/*
215 216 217 218 219 220 221 222 223 224 225 226
 * Lock ordering for the change of data block address:
 * ->data_page
 *  ->node_page
 *    update block addresses in the node page
 */
static void __set_data_blkaddr(struct dnode_of_data *dn, block_t new_addr)
{
	struct f2fs_node *rn;
	__le32 *addr_array;
	struct page *node_page = dn->node_page;
	unsigned int ofs_in_node = dn->ofs_in_node;

227
	f2fs_wait_on_page_writeback(node_page, NODE);
228

229
	rn = F2FS_NODE(node_page);
230 231 232 233 234 235 236 237 238

	/* Get physical address of data block */
	addr_array = blkaddr_in_node(rn);
	addr_array[ofs_in_node] = cpu_to_le32(new_addr);
	set_page_dirty(node_page);
}

int reserve_new_block(struct dnode_of_data *dn)
{
239
	struct f2fs_sb_info *sbi = F2FS_I_SB(dn->inode);
240

241
	if (unlikely(is_inode_flag_set(F2FS_I(dn->inode), FI_NO_ALLOC)))
242
		return -EPERM;
243
	if (unlikely(!inc_valid_block_count(sbi, dn->inode, 1)))
244 245
		return -ENOSPC;

246 247
	trace_f2fs_reserve_new_block(dn->inode, dn->nid, dn->ofs_in_node);

248 249
	__set_data_blkaddr(dn, NEW_ADDR);
	dn->data_blkaddr = NEW_ADDR;
250
	mark_inode_dirty(dn->inode);
251 252 253 254
	sync_inode_page(dn);
	return 0;
}

255 256 257 258 259
int f2fs_reserve_block(struct dnode_of_data *dn, pgoff_t index)
{
	bool need_put = dn->inode_page ? false : true;
	int err;

260
	/* if inode_page exists, index should be zero */
261
	f2fs_bug_on(F2FS_I_SB(dn->inode), !need_put && index);
262

263 264 265
	err = get_dnode_of_data(dn, index, ALLOC_NODE);
	if (err)
		return err;
266

267 268
	if (dn->data_blkaddr == NULL_ADDR)
		err = reserve_new_block(dn);
269
	if (err || need_put)
270 271 272 273
		f2fs_put_dnode(dn);
	return err;
}

274 275 276 277 278 279 280
static int check_extent_cache(struct inode *inode, pgoff_t pgofs,
					struct buffer_head *bh_result)
{
	struct f2fs_inode_info *fi = F2FS_I(inode);
	pgoff_t start_fofs, end_fofs;
	block_t start_blkaddr;

281 282 283
	if (is_inode_flag_set(fi, FI_NO_EXTENT))
		return 0;

284 285 286 287 288 289
	read_lock(&fi->ext.ext_lock);
	if (fi->ext.len == 0) {
		read_unlock(&fi->ext.ext_lock);
		return 0;
	}

290 291
	stat_inc_total_hit(inode->i_sb);

292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308
	start_fofs = fi->ext.fofs;
	end_fofs = fi->ext.fofs + fi->ext.len - 1;
	start_blkaddr = fi->ext.blk_addr;

	if (pgofs >= start_fofs && pgofs <= end_fofs) {
		unsigned int blkbits = inode->i_sb->s_blocksize_bits;
		size_t count;

		clear_buffer_new(bh_result);
		map_bh(bh_result, inode->i_sb,
				start_blkaddr + pgofs - start_fofs);
		count = end_fofs - pgofs + 1;
		if (count < (UINT_MAX >> blkbits))
			bh_result->b_size = (count << blkbits);
		else
			bh_result->b_size = UINT_MAX;

309
		stat_inc_read_hit(inode->i_sb);
310 311 312 313 314 315 316 317 318 319 320 321
		read_unlock(&fi->ext.ext_lock);
		return 1;
	}
	read_unlock(&fi->ext.ext_lock);
	return 0;
}

void update_extent_cache(block_t blk_addr, struct dnode_of_data *dn)
{
	struct f2fs_inode_info *fi = F2FS_I(dn->inode);
	pgoff_t fofs, start_fofs, end_fofs;
	block_t start_blkaddr, end_blkaddr;
322
	int need_update = true;
323

324
	f2fs_bug_on(F2FS_I_SB(dn->inode), blk_addr == NEW_ADDR);
325 326
	fofs = start_bidx_of_node(ofs_of_node(dn->node_page), fi) +
							dn->ofs_in_node;
327 328 329 330

	/* Update the page address in the parent node */
	__set_data_blkaddr(dn, blk_addr);

331 332 333
	if (is_inode_flag_set(fi, FI_NO_EXTENT))
		return;

334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354
	write_lock(&fi->ext.ext_lock);

	start_fofs = fi->ext.fofs;
	end_fofs = fi->ext.fofs + fi->ext.len - 1;
	start_blkaddr = fi->ext.blk_addr;
	end_blkaddr = fi->ext.blk_addr + fi->ext.len - 1;

	/* Drop and initialize the matched extent */
	if (fi->ext.len == 1 && fofs == start_fofs)
		fi->ext.len = 0;

	/* Initial extent */
	if (fi->ext.len == 0) {
		if (blk_addr != NULL_ADDR) {
			fi->ext.fofs = fofs;
			fi->ext.blk_addr = blk_addr;
			fi->ext.len = 1;
		}
		goto end_update;
	}

N
Namjae Jeon 已提交
355
	/* Front merge */
356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379
	if (fofs == start_fofs - 1 && blk_addr == start_blkaddr - 1) {
		fi->ext.fofs--;
		fi->ext.blk_addr--;
		fi->ext.len++;
		goto end_update;
	}

	/* Back merge */
	if (fofs == end_fofs + 1 && blk_addr == end_blkaddr + 1) {
		fi->ext.len++;
		goto end_update;
	}

	/* Split the existing extent */
	if (fi->ext.len > 1 &&
		fofs >= start_fofs && fofs <= end_fofs) {
		if ((end_fofs - fofs) < (fi->ext.len >> 1)) {
			fi->ext.len = fofs - start_fofs;
		} else {
			fi->ext.fofs = fofs + 1;
			fi->ext.blk_addr = start_blkaddr +
					fofs - start_fofs + 1;
			fi->ext.len -= fofs - start_fofs + 1;
		}
380 381
	} else {
		need_update = false;
382 383
	}

384 385 386 387 388 389
	/* Finally, if the extent is very fragmented, let's drop the cache. */
	if (fi->ext.len < F2FS_MIN_EXTENT_LEN) {
		fi->ext.len = 0;
		set_inode_flag(fi, FI_NO_EXTENT);
		need_update = true;
	}
390 391
end_update:
	write_unlock(&fi->ext.ext_lock);
392 393 394
	if (need_update)
		sync_inode_page(dn);
	return;
395 396
}

397
struct page *find_data_page(struct inode *inode, pgoff_t index, bool sync)
398 399 400 401 402 403 404 405 406 407 408 409
{
	struct address_space *mapping = inode->i_mapping;
	struct dnode_of_data dn;
	struct page *page;
	int err;

	page = find_get_page(mapping, index);
	if (page && PageUptodate(page))
		return page;
	f2fs_put_page(page, 0);

	set_new_dnode(&dn, inode, NULL, NULL, 0);
410
	err = get_dnode_of_data(&dn, index, LOOKUP_NODE);
411 412 413 414 415 416 417 418
	if (err)
		return ERR_PTR(err);
	f2fs_put_dnode(&dn);

	if (dn.data_blkaddr == NULL_ADDR)
		return ERR_PTR(-ENOENT);

	/* By fallocate(), there is no cached page, but with NEW_ADDR */
419
	if (unlikely(dn.data_blkaddr == NEW_ADDR))
420 421
		return ERR_PTR(-EINVAL);

422
	page = grab_cache_page(mapping, index);
423 424 425
	if (!page)
		return ERR_PTR(-ENOMEM);

426 427 428 429 430
	if (PageUptodate(page)) {
		unlock_page(page);
		return page;
	}

431
	err = f2fs_submit_page_bio(F2FS_I_SB(inode), page, dn.data_blkaddr,
432
					sync ? READ_SYNC : READA);
433 434 435
	if (err)
		return ERR_PTR(err);

436 437
	if (sync) {
		wait_on_page_locked(page);
438
		if (unlikely(!PageUptodate(page))) {
439 440 441
			f2fs_put_page(page, 0);
			return ERR_PTR(-EIO);
		}
442 443 444 445
	}
	return page;
}

J
Jaegeuk Kim 已提交
446
/*
447 448 449 450 451 452 453 454 455 456 457
 * If it tries to access a hole, return an error.
 * Because, the callers, functions in dir.c and GC, should be able to know
 * whether this page exists or not.
 */
struct page *get_lock_data_page(struct inode *inode, pgoff_t index)
{
	struct address_space *mapping = inode->i_mapping;
	struct dnode_of_data dn;
	struct page *page;
	int err;

458
repeat:
459
	page = grab_cache_page(mapping, index);
460 461 462
	if (!page)
		return ERR_PTR(-ENOMEM);

463
	set_new_dnode(&dn, inode, NULL, NULL, 0);
464
	err = get_dnode_of_data(&dn, index, LOOKUP_NODE);
465 466
	if (err) {
		f2fs_put_page(page, 1);
467
		return ERR_PTR(err);
468
	}
469 470
	f2fs_put_dnode(&dn);

471
	if (unlikely(dn.data_blkaddr == NULL_ADDR)) {
472
		f2fs_put_page(page, 1);
473
		return ERR_PTR(-ENOENT);
474
	}
475 476 477 478

	if (PageUptodate(page))
		return page;

J
Jaegeuk Kim 已提交
479 480 481 482 483 484 485 486 487 488 489
	/*
	 * A new dentry page is allocated but not able to be written, since its
	 * new inode page couldn't be allocated due to -ENOSPC.
	 * In such the case, its blkaddr can be remained as NEW_ADDR.
	 * see, f2fs_add_link -> get_new_data_page -> init_inode_metadata.
	 */
	if (dn.data_blkaddr == NEW_ADDR) {
		zero_user_segment(page, 0, PAGE_CACHE_SIZE);
		SetPageUptodate(page);
		return page;
	}
490

491 492
	err = f2fs_submit_page_bio(F2FS_I_SB(inode), page,
					dn.data_blkaddr, READ_SYNC);
493
	if (err)
494
		return ERR_PTR(err);
495 496

	lock_page(page);
497
	if (unlikely(!PageUptodate(page))) {
498 499
		f2fs_put_page(page, 1);
		return ERR_PTR(-EIO);
500
	}
501
	if (unlikely(page->mapping != mapping)) {
502 503
		f2fs_put_page(page, 1);
		goto repeat;
504 505 506 507
	}
	return page;
}

J
Jaegeuk Kim 已提交
508
/*
509 510
 * Caller ensures that this data page is never allocated.
 * A new zero-filled data page is allocated in the page cache.
511
 *
C
Chao Yu 已提交
512 513
 * Also, caller should grab and release a rwsem by calling f2fs_lock_op() and
 * f2fs_unlock_op().
514
 * Note that, ipage is set only by make_empty_dir.
515
 */
516
struct page *get_new_data_page(struct inode *inode,
517
		struct page *ipage, pgoff_t index, bool new_i_size)
518 519 520 521 522 523
{
	struct address_space *mapping = inode->i_mapping;
	struct page *page;
	struct dnode_of_data dn;
	int err;

524
	set_new_dnode(&dn, inode, ipage, NULL, 0);
525
	err = f2fs_reserve_block(&dn, index);
526 527
	if (err)
		return ERR_PTR(err);
528
repeat:
529
	page = grab_cache_page(mapping, index);
530 531 532 533
	if (!page) {
		err = -ENOMEM;
		goto put_err;
	}
534 535 536 537 538 539

	if (PageUptodate(page))
		return page;

	if (dn.data_blkaddr == NEW_ADDR) {
		zero_user_segment(page, 0, PAGE_CACHE_SIZE);
540
		SetPageUptodate(page);
541
	} else {
542 543
		err = f2fs_submit_page_bio(F2FS_I_SB(inode), page,
						dn.data_blkaddr, READ_SYNC);
544
		if (err)
545 546
			goto put_err;

547
		lock_page(page);
548
		if (unlikely(!PageUptodate(page))) {
549
			f2fs_put_page(page, 1);
550 551
			err = -EIO;
			goto put_err;
552
		}
553
		if (unlikely(page->mapping != mapping)) {
554 555
			f2fs_put_page(page, 1);
			goto repeat;
556 557 558 559 560 561
		}
	}

	if (new_i_size &&
		i_size_read(inode) < ((index + 1) << PAGE_CACHE_SHIFT)) {
		i_size_write(inode, ((index + 1) << PAGE_CACHE_SHIFT));
562 563
		/* Only the directory inode sets new_i_size */
		set_inode_flag(F2FS_I(inode), FI_UPDATE_DIR);
564 565
	}
	return page;
566 567 568 569

put_err:
	f2fs_put_dnode(&dn);
	return ERR_PTR(err);
570 571
}

572 573
static int __allocate_data_block(struct dnode_of_data *dn)
{
574
	struct f2fs_sb_info *sbi = F2FS_I_SB(dn->inode);
575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603
	struct f2fs_summary sum;
	block_t new_blkaddr;
	struct node_info ni;
	int type;

	if (unlikely(is_inode_flag_set(F2FS_I(dn->inode), FI_NO_ALLOC)))
		return -EPERM;
	if (unlikely(!inc_valid_block_count(sbi, dn->inode, 1)))
		return -ENOSPC;

	__set_data_blkaddr(dn, NEW_ADDR);
	dn->data_blkaddr = NEW_ADDR;

	get_node_info(sbi, dn->nid, &ni);
	set_summary(&sum, dn->nid, dn->ofs_in_node, ni.version);

	type = CURSEG_WARM_DATA;

	allocate_data_block(sbi, NULL, NULL_ADDR, &new_blkaddr, &sum, type);

	/* direct IO doesn't use extent cache to maximize the performance */
	set_inode_flag(F2FS_I(dn->inode), FI_NO_EXTENT);
	update_extent_cache(new_blkaddr, dn);
	clear_inode_flag(F2FS_I(dn->inode), FI_NO_EXTENT);

	dn->data_blkaddr = new_blkaddr;
	return 0;
}

J
Jaegeuk Kim 已提交
604
/*
C
Chao Yu 已提交
605 606 607 608 609 610
 * get_data_block() now supported readahead/bmap/rw direct_IO with mapped bh.
 * If original data blocks are allocated, then give them to blockdev.
 * Otherwise,
 *     a. preallocate requested block addresses
 *     b. do not use extent cache for better performance
 *     c. give the block addresses to blockdev
611
 */
612 613
static int __get_data_block(struct inode *inode, sector_t iblock,
			struct buffer_head *bh_result, int create, bool fiemap)
614 615 616 617
{
	unsigned int blkbits = inode->i_sb->s_blocksize_bits;
	unsigned maxblocks = bh_result->b_size >> blkbits;
	struct dnode_of_data dn;
618 619 620 621
	int mode = create ? ALLOC_NODE : LOOKUP_NODE_RA;
	pgoff_t pgofs, end_offset;
	int err = 0, ofs = 1;
	bool allocated = false;
622 623 624 625

	/* Get the page offset from the block offset(iblock) */
	pgofs =	(pgoff_t)(iblock >> (PAGE_CACHE_SHIFT - blkbits));

626 627 628
	if (check_extent_cache(inode, pgofs, bh_result))
		goto out;

629
	if (create) {
630 631
		f2fs_balance_fs(F2FS_I_SB(inode));
		f2fs_lock_op(F2FS_I_SB(inode));
632
	}
633 634 635

	/* When reading holes, we need its node page */
	set_new_dnode(&dn, inode, NULL, NULL, 0);
636
	err = get_dnode_of_data(&dn, pgofs, mode);
637
	if (err) {
638 639 640
		if (err == -ENOENT)
			err = 0;
		goto unlock_out;
641
	}
642
	if (dn.data_blkaddr == NEW_ADDR && !fiemap)
643
		goto put_out;
644

645 646 647 648 649 650 651 652 653 654 655 656
	if (dn.data_blkaddr != NULL_ADDR) {
		map_bh(bh_result, inode->i_sb, dn.data_blkaddr);
	} else if (create) {
		err = __allocate_data_block(&dn);
		if (err)
			goto put_out;
		allocated = true;
		map_bh(bh_result, inode->i_sb, dn.data_blkaddr);
	} else {
		goto put_out;
	}

657
	end_offset = ADDRS_PER_PAGE(dn.node_page, F2FS_I(inode));
658 659 660 661 662 663 664 665 666 667 668 669 670
	bh_result->b_size = (((size_t)1) << blkbits);
	dn.ofs_in_node++;
	pgofs++;

get_next:
	if (dn.ofs_in_node >= end_offset) {
		if (allocated)
			sync_inode_page(&dn);
		allocated = false;
		f2fs_put_dnode(&dn);

		set_new_dnode(&dn, inode, NULL, NULL, 0);
		err = get_dnode_of_data(&dn, pgofs, mode);
671
		if (err) {
672 673 674 675
			if (err == -ENOENT)
				err = 0;
			goto unlock_out;
		}
676
		if (dn.data_blkaddr == NEW_ADDR && !fiemap)
677 678
			goto put_out;

679
		end_offset = ADDRS_PER_PAGE(dn.node_page, F2FS_I(inode));
680
	}
681

682 683 684 685 686 687 688 689 690
	if (maxblocks > (bh_result->b_size >> blkbits)) {
		block_t blkaddr = datablock_addr(dn.node_page, dn.ofs_in_node);
		if (blkaddr == NULL_ADDR && create) {
			err = __allocate_data_block(&dn);
			if (err)
				goto sync_out;
			allocated = true;
			blkaddr = dn.data_blkaddr;
		}
A
arter97 已提交
691
		/* Give more consecutive addresses for the readahead */
692 693 694 695 696 697 698
		if (blkaddr == (bh_result->b_blocknr + ofs)) {
			ofs++;
			dn.ofs_in_node++;
			pgofs++;
			bh_result->b_size += (((size_t)1) << blkbits);
			goto get_next;
		}
699
	}
700 701 702 703
sync_out:
	if (allocated)
		sync_inode_page(&dn);
put_out:
704
	f2fs_put_dnode(&dn);
705 706
unlock_out:
	if (create)
707
		f2fs_unlock_op(F2FS_I_SB(inode));
708 709 710
out:
	trace_f2fs_get_data_block(inode, iblock, bh_result, err);
	return err;
711 712
}

713 714 715 716 717 718 719 720 721 722 723 724
static int get_data_block(struct inode *inode, sector_t iblock,
			struct buffer_head *bh_result, int create)
{
	return __get_data_block(inode, iblock, bh_result, create, false);
}

static int get_data_block_fiemap(struct inode *inode, sector_t iblock,
			struct buffer_head *bh_result, int create)
{
	return __get_data_block(inode, iblock, bh_result, create, true);
}

J
Jaegeuk Kim 已提交
725 726 727
int f2fs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
		u64 start, u64 len)
{
728 729
	return generic_block_fiemap(inode, fieinfo,
				start, len, get_data_block_fiemap);
J
Jaegeuk Kim 已提交
730 731
}

732 733
static int f2fs_read_data_page(struct file *file, struct page *page)
{
H
Huajun Li 已提交
734 735 736
	struct inode *inode = page->mapping->host;
	int ret;

737 738
	trace_f2fs_readpage(page, DATA);

A
arter97 已提交
739
	/* If the file has inline data, try to read it directly */
H
Huajun Li 已提交
740 741 742 743 744 745
	if (f2fs_has_inline_data(inode))
		ret = f2fs_read_inline_data(inode, page);
	else
		ret = mpage_readpage(page, get_data_block);

	return ret;
746 747 748 749 750 751
}

static int f2fs_read_data_pages(struct file *file,
			struct address_space *mapping,
			struct list_head *pages, unsigned nr_pages)
{
H
Huajun Li 已提交
752 753 754 755 756 757
	struct inode *inode = file->f_mapping->host;

	/* If the file has inline data, skip readpages */
	if (f2fs_has_inline_data(inode))
		return 0;

758
	return mpage_readpages(mapping, pages, nr_pages, get_data_block);
759 760
}

J
Jaegeuk Kim 已提交
761
int do_write_data_page(struct page *page, struct f2fs_io_info *fio)
762 763
{
	struct inode *inode = page->mapping->host;
J
Jaegeuk Kim 已提交
764
	block_t old_blkaddr, new_blkaddr;
765 766 767 768
	struct dnode_of_data dn;
	int err = 0;

	set_new_dnode(&dn, inode, NULL, NULL, 0);
769
	err = get_dnode_of_data(&dn, page->index, LOOKUP_NODE);
770 771 772
	if (err)
		return err;

J
Jaegeuk Kim 已提交
773
	old_blkaddr = dn.data_blkaddr;
774 775

	/* This page is already truncated */
J
Jaegeuk Kim 已提交
776
	if (old_blkaddr == NULL_ADDR)
777 778 779 780 781 782 783 784
		goto out_writepage;

	set_page_writeback(page);

	/*
	 * If current allocation needs SSR,
	 * it had better in-place writes for updated data.
	 */
J
Jaegeuk Kim 已提交
785
	if (unlikely(old_blkaddr != NEW_ADDR &&
786 787
			!is_cold_data(page) &&
			need_inplace_update(inode))) {
J
Jaegeuk Kim 已提交
788
		rewrite_data_page(page, old_blkaddr, fio);
789
		set_inode_flag(F2FS_I(inode), FI_UPDATE_WRITE);
790
	} else {
J
Jaegeuk Kim 已提交
791 792
		write_data_page(page, &dn, &new_blkaddr, fio);
		update_extent_cache(new_blkaddr, &dn);
793
		set_inode_flag(F2FS_I(inode), FI_APPEND_WRITE);
794 795 796 797 798 799 800 801 802 803
	}
out_writepage:
	f2fs_put_dnode(&dn);
	return err;
}

static int f2fs_write_data_page(struct page *page,
					struct writeback_control *wbc)
{
	struct inode *inode = page->mapping->host;
804
	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
805 806 807
	loff_t i_size = i_size_read(inode);
	const pgoff_t end_index = ((unsigned long long) i_size)
							>> PAGE_CACHE_SHIFT;
H
Huajun Li 已提交
808
	unsigned offset = 0;
809
	bool need_balance_fs = false;
810
	int err = 0;
J
Jaegeuk Kim 已提交
811 812
	struct f2fs_io_info fio = {
		.type = DATA,
C
Chris Fries 已提交
813
		.rw = (wbc->sync_mode == WB_SYNC_ALL) ? WRITE_SYNC : WRITE,
J
Jaegeuk Kim 已提交
814
	};
815

816 817
	trace_f2fs_writepage(page, DATA);

818
	if (page->index < end_index)
819
		goto write;
820 821 822 823 824 825

	/*
	 * If the offset is out-of-range of file size,
	 * this page does not have to be written to disk.
	 */
	offset = i_size & (PAGE_CACHE_SIZE - 1);
826
	if ((page->index >= end_index + 1) || !offset)
827
		goto out;
828 829

	zero_user_segment(page, offset, PAGE_CACHE_SIZE);
830
write:
831
	if (unlikely(sbi->por_doing))
832 833
		goto redirty_out;

834
	/* Dentry blocks are controlled by checkpoint */
835
	if (S_ISDIR(inode->i_mode)) {
836 837
		if (unlikely(f2fs_cp_error(sbi)))
			goto redirty_out;
J
Jaegeuk Kim 已提交
838
		err = do_write_data_page(page, &fio);
839 840
		goto done;
	}
H
Huajun Li 已提交
841

842 843 844 845
	/* we should bypass data pages to proceed the kworkder jobs */
	if (unlikely(f2fs_cp_error(sbi))) {
		SetPageError(page);
		unlock_page(page);
846
		goto out;
847 848
	}

849
	if (!wbc->for_reclaim)
850
		need_balance_fs = true;
851
	else if (has_not_enough_free_secs(sbi, 0))
852
		goto redirty_out;
853

854 855 856 857 858 859 860 861 862
	f2fs_lock_op(sbi);
	if (f2fs_has_inline_data(inode) || f2fs_may_inline(inode))
		err = f2fs_write_inline_data(inode, page, offset);
	else
		err = do_write_data_page(page, &fio);
	f2fs_unlock_op(sbi);
done:
	if (err && err != -ENOENT)
		goto redirty_out;
863 864

	clear_cold_data(page);
865
out:
866
	inode_dec_dirty_pages(inode);
867
	unlock_page(page);
868
	if (need_balance_fs)
869
		f2fs_balance_fs(sbi);
870 871
	if (wbc->for_reclaim)
		f2fs_submit_merged_bio(sbi, DATA, WRITE);
872 873 874
	return 0;

redirty_out:
875
	redirty_page_for_writepage(wbc, page);
876
	return AOP_WRITEPAGE_ACTIVATE;
877 878
}

879 880 881 882 883 884 885 886 887
static int __f2fs_writepage(struct page *page, struct writeback_control *wbc,
			void *data)
{
	struct address_space *mapping = data;
	int ret = mapping->a_ops->writepage(page, wbc);
	mapping_set_error(mapping, ret);
	return ret;
}

888
static int f2fs_write_data_pages(struct address_space *mapping,
889 890 891
			    struct writeback_control *wbc)
{
	struct inode *inode = mapping->host;
892
	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
893
	bool locked = false;
894
	int ret;
895
	long diff;
896

897 898
	trace_f2fs_writepages(mapping->host, wbc, DATA);

P
P J P 已提交
899 900 901 902
	/* deal with chardevs and other special file */
	if (!mapping->a_ops->writepage)
		return 0;

903
	if (S_ISDIR(inode->i_mode) && wbc->sync_mode == WB_SYNC_NONE &&
904
			get_dirty_pages(inode) < nr_pages_to_skip(sbi, DATA) &&
905
			available_free_memory(sbi, DIRTY_DENTS))
906
		goto skip_write;
907

908
	diff = nr_pages_to_write(sbi, DATA, wbc);
909

910
	if (!S_ISDIR(inode->i_mode)) {
911
		mutex_lock(&sbi->writepages);
912 913
		locked = true;
	}
914
	ret = write_cache_pages(mapping, wbc, __f2fs_writepage, mapping);
915
	if (locked)
916
		mutex_unlock(&sbi->writepages);
J
Jaegeuk Kim 已提交
917 918

	f2fs_submit_merged_bio(sbi, DATA, WRITE);
919 920 921

	remove_dirty_dir_inode(inode);

922
	wbc->nr_to_write = max((long)0, wbc->nr_to_write - diff);
923
	return ret;
924 925

skip_write:
926
	wbc->pages_skipped += get_dirty_pages(inode);
927
	return 0;
928 929
}

930 931 932 933 934 935
static void f2fs_write_failed(struct address_space *mapping, loff_t to)
{
	struct inode *inode = mapping->host;

	if (to > inode->i_size) {
		truncate_pagecache(inode, inode->i_size);
936
		truncate_blocks(inode, inode->i_size, true);
937 938 939
	}
}

940 941 942 943 944
static int f2fs_write_begin(struct file *file, struct address_space *mapping,
		loff_t pos, unsigned len, unsigned flags,
		struct page **pagep, void **fsdata)
{
	struct inode *inode = mapping->host;
945
	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
946 947 948 949 950
	struct page *page;
	pgoff_t index = ((unsigned long long) pos) >> PAGE_CACHE_SHIFT;
	struct dnode_of_data dn;
	int err = 0;

951 952
	trace_f2fs_write_begin(inode, pos, len, flags);

953
	f2fs_balance_fs(sbi);
954
repeat:
955
	err = f2fs_convert_inline_data(inode, pos + len, NULL);
956
	if (err)
957
		goto fail;
958

959
	page = grab_cache_page_write_begin(mapping, index, flags);
960 961 962 963
	if (!page) {
		err = -ENOMEM;
		goto fail;
	}
964 965 966 967

	/* to avoid latency during memory pressure */
	unlock_page(page);

968 969
	*pagep = page;

970 971
	if (f2fs_has_inline_data(inode) && (pos + len) <= MAX_INLINE_DATA)
		goto inline_data;
H
Huajun Li 已提交
972

973
	f2fs_lock_op(sbi);
974
	set_new_dnode(&dn, inode, NULL, NULL, 0);
975
	err = f2fs_reserve_block(&dn, index);
976
	f2fs_unlock_op(sbi);
977
	if (err) {
978
		f2fs_put_page(page, 0);
979
		goto fail;
980
	}
H
Huajun Li 已提交
981
inline_data:
982 983 984 985 986 987 988 989
	lock_page(page);
	if (unlikely(page->mapping != mapping)) {
		f2fs_put_page(page, 1);
		goto repeat;
	}

	f2fs_wait_on_page_writeback(page, DATA);

990 991 992 993 994 995 996 997 998
	if ((len == PAGE_CACHE_SIZE) || PageUptodate(page))
		return 0;

	if ((pos & PAGE_CACHE_MASK) >= i_size_read(inode)) {
		unsigned start = pos & (PAGE_CACHE_SIZE - 1);
		unsigned end = start + len;

		/* Reading beyond i_size is simple: memset to zero */
		zero_user_segments(page, 0, start, end, PAGE_CACHE_SIZE);
999
		goto out;
1000 1001 1002 1003 1004
	}

	if (dn.data_blkaddr == NEW_ADDR) {
		zero_user_segment(page, 0, PAGE_CACHE_SIZE);
	} else {
1005
		if (f2fs_has_inline_data(inode)) {
H
Huajun Li 已提交
1006
			err = f2fs_read_inline_data(inode, page);
1007 1008
			if (err) {
				page_cache_release(page);
1009
				goto fail;
1010 1011
			}
		} else {
H
Huajun Li 已提交
1012
			err = f2fs_submit_page_bio(sbi, page, dn.data_blkaddr,
1013
							READ_SYNC);
1014
			if (err)
1015
				goto fail;
1016 1017
		}

1018
		lock_page(page);
1019
		if (unlikely(!PageUptodate(page))) {
1020
			f2fs_put_page(page, 1);
1021 1022
			err = -EIO;
			goto fail;
1023
		}
1024
		if (unlikely(page->mapping != mapping)) {
1025 1026
			f2fs_put_page(page, 1);
			goto repeat;
1027 1028
		}
	}
1029
out:
1030 1031 1032
	SetPageUptodate(page);
	clear_cold_data(page);
	return 0;
1033 1034 1035
fail:
	f2fs_write_failed(mapping, pos + len);
	return err;
1036 1037
}

1038 1039 1040 1041 1042 1043 1044
static int f2fs_write_end(struct file *file,
			struct address_space *mapping,
			loff_t pos, unsigned len, unsigned copied,
			struct page *page, void *fsdata)
{
	struct inode *inode = page->mapping->host;

1045 1046
	trace_f2fs_write_end(inode, pos, len, copied);

1047 1048 1049 1050 1051 1052 1053 1054
	set_page_dirty(page);

	if (pos + copied > i_size_read(inode)) {
		i_size_write(inode, pos + copied);
		mark_inode_dirty(inode);
		update_inode_page(inode);
	}

1055
	f2fs_put_page(page, 1);
1056 1057 1058
	return copied;
}

1059
static int check_direct_IO(struct inode *inode, int rw,
A
Al Viro 已提交
1060
		struct iov_iter *iter, loff_t offset)
1061 1062 1063 1064 1065 1066 1067 1068 1069
{
	unsigned blocksize_mask = inode->i_sb->s_blocksize - 1;

	if (rw == READ)
		return 0;

	if (offset & blocksize_mask)
		return -EINVAL;

A
Al Viro 已提交
1070 1071 1072
	if (iov_iter_alignment(iter) & blocksize_mask)
		return -EINVAL;

1073 1074 1075
	return 0;
}

1076
static ssize_t f2fs_direct_IO(int rw, struct kiocb *iocb,
A
Al Viro 已提交
1077
		struct iov_iter *iter, loff_t offset)
1078 1079
{
	struct file *file = iocb->ki_filp;
1080 1081 1082 1083
	struct address_space *mapping = file->f_mapping;
	struct inode *inode = mapping->host;
	size_t count = iov_iter_count(iter);
	int err;
1084

H
Huajun Li 已提交
1085 1086 1087 1088
	/* Let buffer I/O handle the inline data case. */
	if (f2fs_has_inline_data(inode))
		return 0;

A
Al Viro 已提交
1089
	if (check_direct_IO(inode, rw, iter, offset))
1090 1091
		return 0;

1092
	/* clear fsync mark to recover these blocks */
1093
	fsync_mark_clear(F2FS_I_SB(inode), inode->i_ino);
1094

1095 1096
	trace_f2fs_direct_IO_enter(inode, offset, count, rw);

1097 1098 1099
	err = blockdev_direct_IO(rw, iocb, inode, iter, offset, get_data_block);
	if (err < 0 && (rw & WRITE))
		f2fs_write_failed(mapping, offset + count);
1100 1101 1102

	trace_f2fs_direct_IO_exit(inode, offset, count, rw, err);

1103
	return err;
1104 1105
}

1106 1107
static void f2fs_invalidate_data_page(struct page *page, unsigned int offset,
				      unsigned int length)
1108 1109
{
	struct inode *inode = page->mapping->host;
1110 1111 1112 1113

	if (offset % PAGE_CACHE_SIZE || length != PAGE_CACHE_SIZE)
		return;

1114
	if (PageDirty(page))
1115
		inode_dec_dirty_pages(inode);
1116 1117 1118 1119 1120 1121
	ClearPagePrivate(page);
}

static int f2fs_release_data_page(struct page *page, gfp_t wait)
{
	ClearPagePrivate(page);
1122
	return 1;
1123 1124 1125 1126 1127 1128 1129
}

static int f2fs_set_data_page_dirty(struct page *page)
{
	struct address_space *mapping = page->mapping;
	struct inode *inode = mapping->host;

1130 1131
	trace_f2fs_set_page_dirty(page, DATA);

1132
	SetPageUptodate(page);
1133 1134
	mark_inode_dirty(inode);

1135 1136
	if (!PageDirty(page)) {
		__set_page_dirty_nobuffers(page);
1137
		update_dirty_page(inode, page);
1138 1139 1140 1141 1142
		return 1;
	}
	return 0;
}

J
Jaegeuk Kim 已提交
1143 1144
static sector_t f2fs_bmap(struct address_space *mapping, sector_t block)
{
1145 1146 1147 1148 1149
	struct inode *inode = mapping->host;

	if (f2fs_has_inline_data(inode))
		return 0;

1150
	return generic_block_bmap(mapping, block, get_data_block);
J
Jaegeuk Kim 已提交
1151 1152
}

1153 1154 1155 1156 1157 1158
const struct address_space_operations f2fs_dblock_aops = {
	.readpage	= f2fs_read_data_page,
	.readpages	= f2fs_read_data_pages,
	.writepage	= f2fs_write_data_page,
	.writepages	= f2fs_write_data_pages,
	.write_begin	= f2fs_write_begin,
1159
	.write_end	= f2fs_write_end,
1160 1161 1162 1163
	.set_page_dirty	= f2fs_set_data_page_dirty,
	.invalidatepage	= f2fs_invalidate_data_page,
	.releasepage	= f2fs_release_data_page,
	.direct_IO	= f2fs_direct_IO,
J
Jaegeuk Kim 已提交
1164
	.bmap		= f2fs_bmap,
1165
};