data.c 47.2 KB
Newer Older
J
Jaegeuk Kim 已提交
1
/*
2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18
 * fs/f2fs/data.c
 *
 * Copyright (c) 2012 Samsung Electronics Co., Ltd.
 *             http://www.samsung.com/
 *
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License version 2 as
 * published by the Free Software Foundation.
 */
#include <linux/fs.h>
#include <linux/f2fs_fs.h>
#include <linux/buffer_head.h>
#include <linux/mpage.h>
#include <linux/writeback.h>
#include <linux/backing-dev.h>
#include <linux/blkdev.h>
#include <linux/bio.h>
19
#include <linux/prefetch.h>
20
#include <linux/uio.h>
J
Jaegeuk Kim 已提交
21
#include <linux/cleancache.h>
22 23 24 25

#include "f2fs.h"
#include "node.h"
#include "segment.h"
J
Jaegeuk Kim 已提交
26
#include "trace.h"
27
#include <trace/events/f2fs.h>
28

29 30 31
static struct kmem_cache *extent_tree_slab;
static struct kmem_cache *extent_node_slab;

32 33
static void f2fs_read_end_io(struct bio *bio, int err)
{
34 35
	struct bio_vec *bvec;
	int i;
36

37 38 39 40 41 42 43 44 45
	if (f2fs_bio_encrypted(bio)) {
		if (err) {
			f2fs_release_crypto_ctx(bio->bi_private);
		} else {
			f2fs_end_io_crypto_work(bio->bi_private, bio);
			return;
		}
	}

46 47
	bio_for_each_segment_all(bvec, bio, i) {
		struct page *page = bvec->bv_page;
J
Jaegeuk Kim 已提交
48 49 50 51 52 53 54 55 56 57 58 59

		if (!err) {
			SetPageUptodate(page);
		} else {
			ClearPageUptodate(page);
			SetPageError(page);
		}
		unlock_page(page);
	}
	bio_put(bio);
}

60 61
static void f2fs_write_end_io(struct bio *bio, int err)
{
62
	struct f2fs_sb_info *sbi = bio->bi_private;
63 64
	struct bio_vec *bvec;
	int i;
65

66
	bio_for_each_segment_all(bvec, bio, i) {
67 68
		struct page *page = bvec->bv_page;

69 70
		f2fs_restore_and_release_control_page(&page);

71
		if (unlikely(err)) {
72
			set_page_dirty(page);
73
			set_bit(AS_EIO, &page->mapping->flags);
74
			f2fs_stop_checkpoint(sbi);
75 76 77
		}
		end_page_writeback(page);
		dec_page_count(sbi, F2FS_WRITEBACK);
78
	}
79 80 81 82 83 84 85 86

	if (!get_pages(sbi, F2FS_WRITEBACK) &&
			!list_empty(&sbi->cp_wait.task_list))
		wake_up(&sbi->cp_wait);

	bio_put(bio);
}

87 88 89 90 91 92 93 94 95 96 97 98
/*
 * Low-level block read/write IO operations.
 */
static struct bio *__bio_alloc(struct f2fs_sb_info *sbi, block_t blk_addr,
				int npages, bool is_read)
{
	struct bio *bio;

	/* No failure on bio allocation */
	bio = bio_alloc(GFP_NOIO, npages);

	bio->bi_bdev = sbi->sb->s_bdev;
C
Chao Yu 已提交
99
	bio->bi_iter.bi_sector = SECTOR_FROM_BLOCK(blk_addr);
100
	bio->bi_end_io = is_read ? f2fs_read_end_io : f2fs_write_end_io;
101
	bio->bi_private = is_read ? NULL : sbi;
102 103 104 105

	return bio;
}

J
Jaegeuk Kim 已提交
106
static void __submit_merged_bio(struct f2fs_bio_info *io)
107
{
J
Jaegeuk Kim 已提交
108
	struct f2fs_io_info *fio = &io->fio;
109 110 111 112

	if (!io->bio)
		return;

113
	if (is_read_io(fio->rw))
114
		trace_f2fs_submit_read_bio(io->sbi->sb, fio, io->bio);
115
	else
116
		trace_f2fs_submit_write_bio(io->sbi->sb, fio, io->bio);
117

118
	submit_bio(fio->rw, io->bio);
119 120 121 122
	io->bio = NULL;
}

void f2fs_submit_merged_bio(struct f2fs_sb_info *sbi,
J
Jaegeuk Kim 已提交
123
				enum page_type type, int rw)
124 125 126 127 128 129
{
	enum page_type btype = PAGE_TYPE_OF_BIO(type);
	struct f2fs_bio_info *io;

	io = is_read_io(rw) ? &sbi->read_io : &sbi->write_io[btype];

130
	down_write(&io->io_rwsem);
J
Jaegeuk Kim 已提交
131 132 133 134

	/* change META to META_FLUSH in the checkpoint procedure */
	if (type >= META_FLUSH) {
		io->fio.type = META_FLUSH;
J
Jaegeuk Kim 已提交
135 136 137 138
		if (test_opt(sbi, NOBARRIER))
			io->fio.rw = WRITE_FLUSH | REQ_META | REQ_PRIO;
		else
			io->fio.rw = WRITE_FLUSH_FUA | REQ_META | REQ_PRIO;
J
Jaegeuk Kim 已提交
139 140
	}
	__submit_merged_bio(io);
141
	up_write(&io->io_rwsem);
142 143 144 145 146 147
}

/*
 * Fill the locked page with data located in the block address.
 * Return unlocked page.
 */
148
int f2fs_submit_page_bio(struct f2fs_io_info *fio)
149 150
{
	struct bio *bio;
151
	struct page *page = fio->encrypted_page ? fio->encrypted_page : fio->page;
152

153
	trace_f2fs_submit_page_bio(page, fio);
154
	f2fs_trace_ios(fio, 0);
155 156

	/* Allocate a new bio */
157
	bio = __bio_alloc(fio->sbi, fio->blk_addr, 1, is_read_io(fio->rw));
158 159 160 161 162 163 164

	if (bio_add_page(bio, page, PAGE_CACHE_SIZE, 0) < PAGE_CACHE_SIZE) {
		bio_put(bio);
		f2fs_put_page(page, 1);
		return -EFAULT;
	}

165
	submit_bio(fio->rw, bio);
166 167 168
	return 0;
}

169
void f2fs_submit_page_mbio(struct f2fs_io_info *fio)
170
{
171
	struct f2fs_sb_info *sbi = fio->sbi;
J
Jaegeuk Kim 已提交
172
	enum page_type btype = PAGE_TYPE_OF_BIO(fio->type);
173
	struct f2fs_bio_info *io;
174
	bool is_read = is_read_io(fio->rw);
175
	struct page *bio_page;
176

177
	io = is_read ? &sbi->read_io : &sbi->write_io[btype];
178

179
	verify_block_addr(sbi, fio->blk_addr);
180

181
	down_write(&io->io_rwsem);
182

183
	if (!is_read)
184 185
		inc_page_count(sbi, F2FS_WRITEBACK);

186
	if (io->bio && (io->last_block_in_bio != fio->blk_addr - 1 ||
J
Jaegeuk Kim 已提交
187 188
						io->fio.rw != fio->rw))
		__submit_merged_bio(io);
189 190
alloc_new:
	if (io->bio == NULL) {
J
Jaegeuk Kim 已提交
191
		int bio_blocks = MAX_BIO_BLOCKS(sbi);
192

193
		io->bio = __bio_alloc(sbi, fio->blk_addr, bio_blocks, is_read);
J
Jaegeuk Kim 已提交
194
		io->fio = *fio;
195 196
	}

197 198 199
	bio_page = fio->encrypted_page ? fio->encrypted_page : fio->page;

	if (bio_add_page(io->bio, bio_page, PAGE_CACHE_SIZE, 0) <
200
							PAGE_CACHE_SIZE) {
J
Jaegeuk Kim 已提交
201
		__submit_merged_bio(io);
202 203 204
		goto alloc_new;
	}

205
	io->last_block_in_bio = fio->blk_addr;
206
	f2fs_trace_ios(fio, 0);
207

208
	up_write(&io->io_rwsem);
209
	trace_f2fs_submit_page_mbio(fio->page, fio);
210 211
}

J
Jaegeuk Kim 已提交
212
/*
213 214 215 216 217
 * Lock ordering for the change of data block address:
 * ->data_page
 *  ->node_page
 *    update block addresses in the node page
 */
218
void set_data_blkaddr(struct dnode_of_data *dn)
219 220 221 222 223 224
{
	struct f2fs_node *rn;
	__le32 *addr_array;
	struct page *node_page = dn->node_page;
	unsigned int ofs_in_node = dn->ofs_in_node;

225
	f2fs_wait_on_page_writeback(node_page, NODE);
226

227
	rn = F2FS_NODE(node_page);
228 229 230

	/* Get physical address of data block */
	addr_array = blkaddr_in_node(rn);
J
Jaegeuk Kim 已提交
231
	addr_array[ofs_in_node] = cpu_to_le32(dn->data_blkaddr);
232 233 234 235 236
	set_page_dirty(node_page);
}

int reserve_new_block(struct dnode_of_data *dn)
{
237
	struct f2fs_sb_info *sbi = F2FS_I_SB(dn->inode);
238

239
	if (unlikely(is_inode_flag_set(F2FS_I(dn->inode), FI_NO_ALLOC)))
240
		return -EPERM;
241
	if (unlikely(!inc_valid_block_count(sbi, dn->inode, 1)))
242 243
		return -ENOSPC;

244 245
	trace_f2fs_reserve_new_block(dn->inode, dn->nid, dn->ofs_in_node);

246
	dn->data_blkaddr = NEW_ADDR;
247
	set_data_blkaddr(dn);
248
	mark_inode_dirty(dn->inode);
249 250 251 252
	sync_inode_page(dn);
	return 0;
}

253 254 255 256 257 258 259 260
int f2fs_reserve_block(struct dnode_of_data *dn, pgoff_t index)
{
	bool need_put = dn->inode_page ? false : true;
	int err;

	err = get_dnode_of_data(dn, index, ALLOC_NODE);
	if (err)
		return err;
261

262 263
	if (dn->data_blkaddr == NULL_ADDR)
		err = reserve_new_block(dn);
264
	if (err || need_put)
265 266 267 268
		f2fs_put_dnode(dn);
	return err;
}

269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294
static struct extent_node *__attach_extent_node(struct f2fs_sb_info *sbi,
				struct extent_tree *et, struct extent_info *ei,
				struct rb_node *parent, struct rb_node **p)
{
	struct extent_node *en;

	en = kmem_cache_alloc(extent_node_slab, GFP_ATOMIC);
	if (!en)
		return NULL;

	en->ei = *ei;
	INIT_LIST_HEAD(&en->list);

	rb_link_node(&en->rb_node, parent, p);
	rb_insert_color(&en->rb_node, &et->root);
	et->count++;
	atomic_inc(&sbi->total_ext_node);
	return en;
}

static void __detach_extent_node(struct f2fs_sb_info *sbi,
				struct extent_tree *et, struct extent_node *en)
{
	rb_erase(&en->rb_node, &et->root);
	et->count--;
	atomic_dec(&sbi->total_ext_node);
295 296 297

	if (et->cached_en == en)
		et->cached_en = NULL;
298 299
}

300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322
static struct extent_tree *__grab_extent_tree(struct inode *inode)
{
	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
	struct extent_tree *et;
	nid_t ino = inode->i_ino;

	down_write(&sbi->extent_tree_lock);
	et = radix_tree_lookup(&sbi->extent_tree_root, ino);
	if (!et) {
		et = f2fs_kmem_cache_alloc(extent_tree_slab, GFP_NOFS);
		f2fs_radix_tree_insert(&sbi->extent_tree_root, ino, et);
		memset(et, 0, sizeof(struct extent_tree));
		et->ino = ino;
		et->root = RB_ROOT;
		et->cached_en = NULL;
		rwlock_init(&et->lock);
		atomic_set(&et->refcount, 0);
		et->count = 0;
		sbi->total_ext_tree++;
	}
	atomic_inc(&et->refcount);
	up_write(&sbi->extent_tree_lock);

J
Jaegeuk Kim 已提交
323 324 325
	/* never died untill evict_inode */
	F2FS_I(inode)->extent_tree = et;

326 327 328
	return et;
}

329 330 331 332 333 334
static struct extent_node *__lookup_extent_tree(struct extent_tree *et,
							unsigned int fofs)
{
	struct rb_node *node = et->root.rb_node;
	struct extent_node *en;

335 336 337 338 339 340 341
	if (et->cached_en) {
		struct extent_info *cei = &et->cached_en->ei;

		if (cei->fofs <= fofs && cei->fofs + cei->len > fofs)
			return et->cached_en;
	}

342 343 344
	while (node) {
		en = rb_entry(node, struct extent_node, rb_node);

345
		if (fofs < en->ei.fofs)
346
			node = node->rb_left;
347
		else if (fofs >= en->ei.fofs + en->ei.len)
348
			node = node->rb_right;
349
		else
350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413
			return en;
	}
	return NULL;
}

static struct extent_node *__try_back_merge(struct f2fs_sb_info *sbi,
				struct extent_tree *et, struct extent_node *en)
{
	struct extent_node *prev;
	struct rb_node *node;

	node = rb_prev(&en->rb_node);
	if (!node)
		return NULL;

	prev = rb_entry(node, struct extent_node, rb_node);
	if (__is_back_mergeable(&en->ei, &prev->ei)) {
		en->ei.fofs = prev->ei.fofs;
		en->ei.blk = prev->ei.blk;
		en->ei.len += prev->ei.len;
		__detach_extent_node(sbi, et, prev);
		return prev;
	}
	return NULL;
}

static struct extent_node *__try_front_merge(struct f2fs_sb_info *sbi,
				struct extent_tree *et, struct extent_node *en)
{
	struct extent_node *next;
	struct rb_node *node;

	node = rb_next(&en->rb_node);
	if (!node)
		return NULL;

	next = rb_entry(node, struct extent_node, rb_node);
	if (__is_front_mergeable(&en->ei, &next->ei)) {
		en->ei.len += next->ei.len;
		__detach_extent_node(sbi, et, next);
		return next;
	}
	return NULL;
}

static struct extent_node *__insert_extent_tree(struct f2fs_sb_info *sbi,
				struct extent_tree *et, struct extent_info *ei,
				struct extent_node **den)
{
	struct rb_node **p = &et->root.rb_node;
	struct rb_node *parent = NULL;
	struct extent_node *en;

	while (*p) {
		parent = *p;
		en = rb_entry(parent, struct extent_node, rb_node);

		if (ei->fofs < en->ei.fofs) {
			if (__is_front_mergeable(ei, &en->ei)) {
				f2fs_bug_on(sbi, !den);
				en->ei.fofs = ei->fofs;
				en->ei.blk = ei->blk;
				en->ei.len += ei->len;
				*den = __try_back_merge(sbi, et, en);
J
Jaegeuk Kim 已提交
414
				goto update_out;
415 416 417 418 419 420 421
			}
			p = &(*p)->rb_left;
		} else if (ei->fofs >= en->ei.fofs + en->ei.len) {
			if (__is_back_mergeable(ei, &en->ei)) {
				f2fs_bug_on(sbi, !den);
				en->ei.len += ei->len;
				*den = __try_front_merge(sbi, et, en);
J
Jaegeuk Kim 已提交
422
				goto update_out;
423 424 425 426 427 428 429
			}
			p = &(*p)->rb_right;
		} else {
			f2fs_bug_on(sbi, 1);
		}
	}

J
Jaegeuk Kim 已提交
430 431 432 433 434 435 436 437
	en = __attach_extent_node(sbi, et, ei, parent, p);
	if (!en)
		return NULL;
update_out:
	if (en->ei.len > et->largest.len)
		et->largest = en->ei;
	et->cached_en = en;
	return en;
438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468
}

static unsigned int __free_extent_tree(struct f2fs_sb_info *sbi,
					struct extent_tree *et, bool free_all)
{
	struct rb_node *node, *next;
	struct extent_node *en;
	unsigned int count = et->count;

	node = rb_first(&et->root);
	while (node) {
		next = rb_next(node);
		en = rb_entry(node, struct extent_node, rb_node);

		if (free_all) {
			spin_lock(&sbi->extent_lock);
			if (!list_empty(&en->list))
				list_del_init(&en->list);
			spin_unlock(&sbi->extent_lock);
		}

		if (free_all || list_empty(&en->list)) {
			__detach_extent_node(sbi, et, en);
			kmem_cache_free(extent_node_slab, en);
		}
		node = next;
	}

	return count - et->count;
}

J
Jaegeuk Kim 已提交
469 470 471 472 473 474 475 476 477
static void __drop_largest_extent(struct inode *inode, pgoff_t fofs)
{
	struct extent_info *largest = &F2FS_I(inode)->extent_tree->largest;

	if (largest->fofs <= fofs && largest->fofs + largest->len > fofs)
		largest->len = 0;
}

void f2fs_init_extent_tree(struct inode *inode, struct f2fs_extent *i_ext)
478 479 480 481 482 483
{
	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
	struct extent_tree *et;
	struct extent_node *en;
	struct extent_info ei;

J
Jaegeuk Kim 已提交
484
	if (!f2fs_may_extent_tree(inode))
485 486 487 488
		return;

	et = __grab_extent_tree(inode);

J
Jaegeuk Kim 已提交
489 490
	if (!i_ext || le32_to_cpu(i_ext->len) < F2FS_MIN_EXTENT_LEN)
		return;
491 492 493 494

	set_extent_info(&ei, le32_to_cpu(i_ext->fofs),
		le32_to_cpu(i_ext->blk), le32_to_cpu(i_ext->len));

J
Jaegeuk Kim 已提交
495 496 497 498
	write_lock(&et->lock);
	if (et->count)
		goto out;

499 500 501 502 503 504 505 506 507 508
	en = __insert_extent_tree(sbi, et, &ei, NULL);
	if (en) {
		spin_lock(&sbi->extent_lock);
		list_add_tail(&en->list, &sbi->extent_list);
		spin_unlock(&sbi->extent_lock);
	}
out:
	write_unlock(&et->lock);
}

509 510 511 512
static bool f2fs_lookup_extent_tree(struct inode *inode, pgoff_t pgofs,
							struct extent_info *ei)
{
	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
J
Jaegeuk Kim 已提交
513
	struct extent_tree *et = F2FS_I(inode)->extent_tree;
514
	struct extent_node *en;
515
	bool ret = false;
516

J
Jaegeuk Kim 已提交
517
	f2fs_bug_on(sbi, !et);
518

J
Jaegeuk Kim 已提交
519
	trace_f2fs_lookup_extent_tree_start(inode, pgofs);
520 521

	read_lock(&et->lock);
522 523 524 525 526 527 528 529 530

	if (et->largest.fofs <= pgofs &&
			et->largest.fofs + et->largest.len > pgofs) {
		*ei = et->largest;
		ret = true;
		stat_inc_read_hit(sbi->sb);
		goto out;
	}

531 532 533 534 535 536
	en = __lookup_extent_tree(et, pgofs);
	if (en) {
		*ei = en->ei;
		spin_lock(&sbi->extent_lock);
		if (!list_empty(&en->list))
			list_move_tail(&en->list, &sbi->extent_list);
537
		et->cached_en = en;
538
		spin_unlock(&sbi->extent_lock);
539
		ret = true;
540 541
		stat_inc_read_hit(sbi->sb);
	}
542
out:
543 544 545
	stat_inc_total_hit(sbi->sb);
	read_unlock(&et->lock);

546 547
	trace_f2fs_lookup_extent_tree_end(inode, pgofs, ei);
	return ret;
548 549
}

J
Jaegeuk Kim 已提交
550 551
/* return true, if on-disk extent should be updated */
static bool f2fs_update_extent_tree(struct inode *inode, pgoff_t fofs,
552 553 554
							block_t blkaddr)
{
	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
J
Jaegeuk Kim 已提交
555
	struct extent_tree *et = F2FS_I(inode)->extent_tree;
556 557
	struct extent_node *en = NULL, *en1 = NULL, *en2 = NULL, *en3 = NULL;
	struct extent_node *den = NULL;
J
Jaegeuk Kim 已提交
558
	struct extent_info ei, dei, prev;
559 560
	unsigned int endofs;

J
Jaegeuk Kim 已提交
561 562
	if (!et)
		return false;
563

J
Jaegeuk Kim 已提交
564
	trace_f2fs_update_extent_tree(inode, fofs, blkaddr);
565 566 567

	write_lock(&et->lock);

J
Jaegeuk Kim 已提交
568 569 570 571 572 573 574 575 576 577 578
	if (is_inode_flag_set(F2FS_I(inode), FI_NO_EXTENT)) {
		write_unlock(&et->lock);
		return false;
	}

	prev = et->largest;
	dei.len = 0;

	/* we do not guarantee that the largest extent is cached all the time */
	__drop_largest_extent(inode, fofs);

579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599
	/* 1. lookup and remove existing extent info in cache */
	en = __lookup_extent_tree(et, fofs);
	if (!en)
		goto update_extent;

	dei = en->ei;
	__detach_extent_node(sbi, et, en);

	/* 2. if extent can be split more, split and insert the left part */
	if (dei.len > 1) {
		/*  insert left part of split extent into cache */
		if (fofs - dei.fofs >= F2FS_MIN_EXTENT_LEN) {
			set_extent_info(&ei, dei.fofs, dei.blk,
							fofs - dei.fofs);
			en1 = __insert_extent_tree(sbi, et, &ei, NULL);
		}

		/* insert right part of split extent into cache */
		endofs = dei.fofs + dei.len - 1;
		if (endofs - fofs >= F2FS_MIN_EXTENT_LEN) {
			set_extent_info(&ei, fofs + 1,
600
				fofs - dei.fofs + dei.blk + 1, endofs - fofs);
601 602 603 604 605 606 607 608 609
			en2 = __insert_extent_tree(sbi, et, &ei, NULL);
		}
	}

update_extent:
	/* 3. update extent in extent cache */
	if (blkaddr) {
		set_extent_info(&ei, fofs, blkaddr, 1);
		en3 = __insert_extent_tree(sbi, et, &ei, &den);
J
Jaegeuk Kim 已提交
610 611 612 613 614 615 616 617

		/* give up extent_cache, if split and small updates happen */
		if (dei.len >= 1 &&
				prev.len < F2FS_MIN_EXTENT_LEN &&
				et->largest.len < F2FS_MIN_EXTENT_LEN) {
			et->largest.len = 0;
			set_inode_flag(F2FS_I(inode), FI_NO_EXTENT);
		}
618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648
	}

	/* 4. update in global extent list */
	spin_lock(&sbi->extent_lock);
	if (en && !list_empty(&en->list))
		list_del(&en->list);
	/*
	 * en1 and en2 split from en, they will become more and more smaller
	 * fragments after splitting several times. So if the length is smaller
	 * than F2FS_MIN_EXTENT_LEN, we will not add them into extent tree.
	 */
	if (en1)
		list_add_tail(&en1->list, &sbi->extent_list);
	if (en2)
		list_add_tail(&en2->list, &sbi->extent_list);
	if (en3) {
		if (list_empty(&en3->list))
			list_add_tail(&en3->list, &sbi->extent_list);
		else
			list_move_tail(&en3->list, &sbi->extent_list);
	}
	if (den && !list_empty(&den->list))
		list_del(&den->list);
	spin_unlock(&sbi->extent_lock);

	/* 5. release extent node */
	if (en)
		kmem_cache_free(extent_node_slab, en);
	if (den)
		kmem_cache_free(extent_node_slab, den);

J
Jaegeuk Kim 已提交
649 650
	if (is_inode_flag_set(F2FS_I(inode), FI_NO_EXTENT))
		__free_extent_tree(sbi, et, true);
651

J
Jaegeuk Kim 已提交
652
	write_unlock(&et->lock);
653

J
Jaegeuk Kim 已提交
654
	return !__is_extent_same(&prev, &et->largest);
655 656
}

J
Jaegeuk Kim 已提交
657
unsigned int f2fs_shrink_extent_tree(struct f2fs_sb_info *sbi, int nr_shrink)
658 659 660 661
{
	struct extent_tree *treevec[EXT_TREE_VEC_SIZE];
	struct extent_node *en, *tmp;
	unsigned long ino = F2FS_ROOT_INO(sbi);
J
Jaegeuk Kim 已提交
662
	struct radix_tree_root *root = &sbi->extent_tree_root;
663
	unsigned int found;
664
	unsigned int node_cnt = 0, tree_cnt = 0;
665

C
Chao Yu 已提交
666
	if (!test_opt(sbi, EXTENT_CACHE))
J
Jaegeuk Kim 已提交
667
		return 0;
668 669 670 671 672 673 674 675 676

	spin_lock(&sbi->extent_lock);
	list_for_each_entry_safe(en, tmp, &sbi->extent_list, list) {
		if (!nr_shrink--)
			break;
		list_del_init(&en->list);
	}
	spin_unlock(&sbi->extent_lock);

J
Jaegeuk Kim 已提交
677
	if (!down_write_trylock(&sbi->extent_tree_lock))
J
Jaegeuk Kim 已提交
678 679
		goto out;

J
Jaegeuk Kim 已提交
680
	while ((found = radix_tree_gang_lookup(root,
681 682 683 684 685 686 687 688
				(void **)treevec, ino, EXT_TREE_VEC_SIZE))) {
		unsigned i;

		ino = treevec[found - 1]->ino + 1;
		for (i = 0; i < found; i++) {
			struct extent_tree *et = treevec[i];

			write_lock(&et->lock);
689
			node_cnt += __free_extent_tree(sbi, et, false);
690
			write_unlock(&et->lock);
J
Jaegeuk Kim 已提交
691 692 693 694 695 696
			if (!atomic_read(&et->refcount) && !et->count) {
				radix_tree_delete(root, et->ino);
				kmem_cache_free(extent_tree_slab, et);
				sbi->total_ext_tree--;
				tree_cnt++;
			}
697 698 699
		}
	}
	up_write(&sbi->extent_tree_lock);
J
Jaegeuk Kim 已提交
700
out:
701
	trace_f2fs_shrink_extent_tree(sbi, node_cnt, tree_cnt);
J
Jaegeuk Kim 已提交
702 703

	return node_cnt + tree_cnt;
704 705
}

J
Jaegeuk Kim 已提交
706
unsigned int f2fs_destroy_extent_node(struct inode *inode)
707 708
{
	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
J
Jaegeuk Kim 已提交
709
	struct extent_tree *et = F2FS_I(inode)->extent_tree;
710
	unsigned int node_cnt = 0;
711

712
	if (!et)
J
Jaegeuk Kim 已提交
713
		return 0;
714 715

	write_lock(&et->lock);
716
	node_cnt = __free_extent_tree(sbi, et, true);
717 718
	write_unlock(&et->lock);

J
Jaegeuk Kim 已提交
719 720
	return node_cnt;
}
721

J
Jaegeuk Kim 已提交
722 723 724 725 726 727 728 729 730 731 732 733
void f2fs_destroy_extent_tree(struct inode *inode)
{
	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
	struct extent_tree *et = F2FS_I(inode)->extent_tree;
	unsigned int node_cnt = 0;

	if (!et)
		return;

	if (inode->i_nlink && !is_bad_inode(inode) && et->count) {
		atomic_dec(&et->refcount);
		return;
734
	}
J
Jaegeuk Kim 已提交
735 736 737 738 739 740 741

	/* free all extent info belong to this extent tree */
	node_cnt = f2fs_destroy_extent_node(inode);

	/* delete extent tree entry in radix tree */
	down_write(&sbi->extent_tree_lock);
	atomic_dec(&et->refcount);
742 743 744 745 746
	f2fs_bug_on(sbi, atomic_read(&et->refcount) || et->count);
	radix_tree_delete(&sbi->extent_tree_root, inode->i_ino);
	kmem_cache_free(extent_tree_slab, et);
	sbi->total_ext_tree--;
	up_write(&sbi->extent_tree_lock);
747

J
Jaegeuk Kim 已提交
748
	F2FS_I(inode)->extent_tree = NULL;
749

J
Jaegeuk Kim 已提交
750 751
	trace_f2fs_destroy_extent_tree(inode, node_cnt);
	return;
752 753
}

754 755 756
static bool f2fs_lookup_extent_cache(struct inode *inode, pgoff_t pgofs,
							struct extent_info *ei)
{
J
Jaegeuk Kim 已提交
757
	if (!f2fs_may_extent_tree(inode))
758 759
		return false;

J
Jaegeuk Kim 已提交
760
	return f2fs_lookup_extent_tree(inode, pgofs, ei);
761 762 763 764 765 766 767
}

void f2fs_update_extent_cache(struct dnode_of_data *dn)
{
	struct f2fs_inode_info *fi = F2FS_I(dn->inode);
	pgoff_t fofs;

J
Jaegeuk Kim 已提交
768
	if (!f2fs_may_extent_tree(dn->inode))
769 770
		return;

J
Jaegeuk Kim 已提交
771 772
	f2fs_bug_on(F2FS_I_SB(dn->inode), dn->data_blkaddr == NEW_ADDR);

773 774 775
	fofs = start_bidx_of_node(ofs_of_node(dn->node_page), fi) +
							dn->ofs_in_node;

J
Jaegeuk Kim 已提交
776
	if (f2fs_update_extent_tree(dn->inode, fofs, dn->data_blkaddr))
777
		sync_inode_page(dn);
778 779
}

780
struct page *get_read_data_page(struct inode *inode, pgoff_t index, int rw)
781 782 783 784
{
	struct address_space *mapping = inode->i_mapping;
	struct dnode_of_data dn;
	struct page *page;
C
Chao Yu 已提交
785
	struct extent_info ei;
786
	int err;
787
	struct f2fs_io_info fio = {
788
		.sbi = F2FS_I_SB(inode),
789
		.type = DATA,
790
		.rw = rw,
791
		.encrypted_page = NULL,
792
	};
793

794 795 796
	if (f2fs_encrypted_inode(inode) && S_ISREG(inode->i_mode))
		return read_mapping_page(mapping, index, NULL);

797
	page = grab_cache_page(mapping, index);
798 799 800
	if (!page)
		return ERR_PTR(-ENOMEM);

C
Chao Yu 已提交
801 802 803 804 805
	if (f2fs_lookup_extent_cache(inode, index, &ei)) {
		dn.data_blkaddr = ei.blk + index - ei.fofs;
		goto got_it;
	}

806
	set_new_dnode(&dn, inode, NULL, NULL, 0);
807
	err = get_dnode_of_data(&dn, index, LOOKUP_NODE);
808 809
	if (err) {
		f2fs_put_page(page, 1);
810
		return ERR_PTR(err);
811
	}
812 813
	f2fs_put_dnode(&dn);

814
	if (unlikely(dn.data_blkaddr == NULL_ADDR)) {
815
		f2fs_put_page(page, 1);
816
		return ERR_PTR(-ENOENT);
817
	}
C
Chao Yu 已提交
818
got_it:
819 820
	if (PageUptodate(page)) {
		unlock_page(page);
821
		return page;
822
	}
823

J
Jaegeuk Kim 已提交
824 825 826 827 828 829 830 831 832
	/*
	 * A new dentry page is allocated but not able to be written, since its
	 * new inode page couldn't be allocated due to -ENOSPC.
	 * In such the case, its blkaddr can be remained as NEW_ADDR.
	 * see, f2fs_add_link -> get_new_data_page -> init_inode_metadata.
	 */
	if (dn.data_blkaddr == NEW_ADDR) {
		zero_user_segment(page, 0, PAGE_CACHE_SIZE);
		SetPageUptodate(page);
833
		unlock_page(page);
J
Jaegeuk Kim 已提交
834 835
		return page;
	}
836

837
	fio.blk_addr = dn.data_blkaddr;
838 839
	fio.page = page;
	err = f2fs_submit_page_bio(&fio);
840
	if (err)
841
		return ERR_PTR(err);
842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882
	return page;
}

struct page *find_data_page(struct inode *inode, pgoff_t index)
{
	struct address_space *mapping = inode->i_mapping;
	struct page *page;

	page = find_get_page(mapping, index);
	if (page && PageUptodate(page))
		return page;
	f2fs_put_page(page, 0);

	page = get_read_data_page(inode, index, READ_SYNC);
	if (IS_ERR(page))
		return page;

	if (PageUptodate(page))
		return page;

	wait_on_page_locked(page);
	if (unlikely(!PageUptodate(page))) {
		f2fs_put_page(page, 0);
		return ERR_PTR(-EIO);
	}
	return page;
}

/*
 * If it tries to access a hole, return an error.
 * Because, the callers, functions in dir.c and GC, should be able to know
 * whether this page exists or not.
 */
struct page *get_lock_data_page(struct inode *inode, pgoff_t index)
{
	struct address_space *mapping = inode->i_mapping;
	struct page *page;
repeat:
	page = get_read_data_page(inode, index, READ_SYNC);
	if (IS_ERR(page))
		return page;
883

884
	/* wait for read completion */
885
	lock_page(page);
886
	if (unlikely(!PageUptodate(page))) {
887 888
		f2fs_put_page(page, 1);
		return ERR_PTR(-EIO);
889
	}
890
	if (unlikely(page->mapping != mapping)) {
891 892
		f2fs_put_page(page, 1);
		goto repeat;
893 894 895 896
	}
	return page;
}

J
Jaegeuk Kim 已提交
897
/*
898 899
 * Caller ensures that this data page is never allocated.
 * A new zero-filled data page is allocated in the page cache.
900
 *
C
Chao Yu 已提交
901 902
 * Also, caller should grab and release a rwsem by calling f2fs_lock_op() and
 * f2fs_unlock_op().
903
 * Note that, ipage is set only by make_empty_dir.
904
 */
905
struct page *get_new_data_page(struct inode *inode,
906
		struct page *ipage, pgoff_t index, bool new_i_size)
907 908 909 910 911
{
	struct address_space *mapping = inode->i_mapping;
	struct page *page;
	struct dnode_of_data dn;
	int err;
912 913 914 915
repeat:
	page = grab_cache_page(mapping, index);
	if (!page)
		return ERR_PTR(-ENOMEM);
916

917
	set_new_dnode(&dn, inode, ipage, NULL, 0);
918
	err = f2fs_reserve_block(&dn, index);
919 920
	if (err) {
		f2fs_put_page(page, 1);
921
		return ERR_PTR(err);
922
	}
923 924
	if (!ipage)
		f2fs_put_dnode(&dn);
925 926

	if (PageUptodate(page))
927
		goto got_it;
928 929 930

	if (dn.data_blkaddr == NEW_ADDR) {
		zero_user_segment(page, 0, PAGE_CACHE_SIZE);
931
		SetPageUptodate(page);
932
	} else {
933
		f2fs_put_page(page, 1);
934

935 936
		page = get_read_data_page(inode, index, READ_SYNC);
		if (IS_ERR(page))
937
			goto repeat;
938 939 940

		/* wait for read completion */
		lock_page(page);
941
	}
942
got_it:
943 944 945
	if (new_i_size &&
		i_size_read(inode) < ((index + 1) << PAGE_CACHE_SHIFT)) {
		i_size_write(inode, ((index + 1) << PAGE_CACHE_SHIFT));
946 947
		/* Only the directory inode sets new_i_size */
		set_inode_flag(F2FS_I(inode), FI_UPDATE_DIR);
948 949 950 951
	}
	return page;
}

952 953
static int __allocate_data_block(struct dnode_of_data *dn)
{
954
	struct f2fs_sb_info *sbi = F2FS_I_SB(dn->inode);
955
	struct f2fs_inode_info *fi = F2FS_I(dn->inode);
956 957
	struct f2fs_summary sum;
	struct node_info ni;
958
	int seg = CURSEG_WARM_DATA;
959
	pgoff_t fofs;
960 961 962

	if (unlikely(is_inode_flag_set(F2FS_I(dn->inode), FI_NO_ALLOC)))
		return -EPERM;
963 964 965 966 967

	dn->data_blkaddr = datablock_addr(dn->node_page, dn->ofs_in_node);
	if (dn->data_blkaddr == NEW_ADDR)
		goto alloc;

968 969 970
	if (unlikely(!inc_valid_block_count(sbi, dn->inode, 1)))
		return -ENOSPC;

971
alloc:
972 973 974
	get_node_info(sbi, dn->nid, &ni);
	set_summary(&sum, dn->nid, dn->ofs_in_node, ni.version);

975 976 977
	if (dn->ofs_in_node == 0 && dn->inode_page == dn->node_page)
		seg = CURSEG_DIRECT_IO;

978 979
	allocate_data_block(sbi, NULL, dn->data_blkaddr, &dn->data_blkaddr,
								&sum, seg);
980
	set_data_blkaddr(dn);
981

982 983 984 985 986 987
	/* update i_size */
	fofs = start_bidx_of_node(ofs_of_node(dn->node_page), fi) +
							dn->ofs_in_node;
	if (i_size_read(dn->inode) < ((fofs + 1) << PAGE_CACHE_SHIFT))
		i_size_write(dn->inode, ((fofs + 1) << PAGE_CACHE_SHIFT));

J
Jaegeuk Kim 已提交
988 989 990
	/* direct IO doesn't use extent cache to maximize the performance */
	__drop_largest_extent(dn->inode, fofs);

991 992 993
	return 0;
}

994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016
static void __allocate_data_blocks(struct inode *inode, loff_t offset,
							size_t count)
{
	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
	struct dnode_of_data dn;
	u64 start = F2FS_BYTES_TO_BLK(offset);
	u64 len = F2FS_BYTES_TO_BLK(count);
	bool allocated;
	u64 end_offset;

	while (len) {
		f2fs_balance_fs(sbi);
		f2fs_lock_op(sbi);

		/* When reading holes, we need its node page */
		set_new_dnode(&dn, inode, NULL, NULL, 0);
		if (get_dnode_of_data(&dn, start, ALLOC_NODE))
			goto out;

		allocated = false;
		end_offset = ADDRS_PER_PAGE(dn.node_page, F2FS_I(inode));

		while (dn.ofs_in_node < end_offset && len) {
1017 1018 1019
			block_t blkaddr;

			blkaddr = datablock_addr(dn.node_page, dn.ofs_in_node);
1020
			if (blkaddr == NULL_ADDR || blkaddr == NEW_ADDR) {
1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046
				if (__allocate_data_block(&dn))
					goto sync_out;
				allocated = true;
			}
			len--;
			start++;
			dn.ofs_in_node++;
		}

		if (allocated)
			sync_inode_page(&dn);

		f2fs_put_dnode(&dn);
		f2fs_unlock_op(sbi);
	}
	return;

sync_out:
	if (allocated)
		sync_inode_page(&dn);
	f2fs_put_dnode(&dn);
out:
	f2fs_unlock_op(sbi);
	return;
}

J
Jaegeuk Kim 已提交
1047
/*
J
Jaegeuk Kim 已提交
1048 1049
 * f2fs_map_blocks() now supported readahead/bmap/rw direct_IO with
 * f2fs_map_blocks structure.
C
Chao Yu 已提交
1050 1051 1052 1053 1054
 * If original data blocks are allocated, then give them to blockdev.
 * Otherwise,
 *     a. preallocate requested block addresses
 *     b. do not use extent cache for better performance
 *     c. give the block addresses to blockdev
1055
 */
J
Jaegeuk Kim 已提交
1056 1057
static int f2fs_map_blocks(struct inode *inode, struct f2fs_map_blocks *map,
			int create, bool fiemap)
1058
{
J
Jaegeuk Kim 已提交
1059
	unsigned int maxblocks = map->m_len;
1060
	struct dnode_of_data dn;
1061 1062 1063
	int mode = create ? ALLOC_NODE : LOOKUP_NODE_RA;
	pgoff_t pgofs, end_offset;
	int err = 0, ofs = 1;
1064
	struct extent_info ei;
1065
	bool allocated = false;
1066

J
Jaegeuk Kim 已提交
1067 1068 1069 1070 1071
	map->m_len = 0;
	map->m_flags = 0;

	/* it only supports block size == page size */
	pgofs =	(pgoff_t)map->m_lblk;
1072

1073
	if (f2fs_lookup_extent_cache(inode, pgofs, &ei)) {
J
Jaegeuk Kim 已提交
1074 1075 1076
		map->m_pblk = ei.blk + pgofs - ei.fofs;
		map->m_len = min((pgoff_t)maxblocks, ei.fofs + ei.len - pgofs);
		map->m_flags = F2FS_MAP_MAPPED;
1077
		goto out;
1078
	}
1079

1080
	if (create)
1081
		f2fs_lock_op(F2FS_I_SB(inode));
1082 1083 1084

	/* When reading holes, we need its node page */
	set_new_dnode(&dn, inode, NULL, NULL, 0);
1085
	err = get_dnode_of_data(&dn, pgofs, mode);
1086
	if (err) {
1087 1088 1089
		if (err == -ENOENT)
			err = 0;
		goto unlock_out;
1090
	}
1091
	if (dn.data_blkaddr == NEW_ADDR && !fiemap)
1092
		goto put_out;
1093

1094
	if (dn.data_blkaddr != NULL_ADDR) {
J
Jaegeuk Kim 已提交
1095 1096
		map->m_flags = F2FS_MAP_MAPPED;
		map->m_pblk = dn.data_blkaddr;
1097 1098
		if (dn.data_blkaddr == NEW_ADDR)
			map->m_flags |= F2FS_MAP_UNWRITTEN;
1099 1100 1101 1102 1103
	} else if (create) {
		err = __allocate_data_block(&dn);
		if (err)
			goto put_out;
		allocated = true;
J
Jaegeuk Kim 已提交
1104 1105
		map->m_flags = F2FS_MAP_NEW | F2FS_MAP_MAPPED;
		map->m_pblk = dn.data_blkaddr;
1106 1107 1108 1109
	} else {
		goto put_out;
	}

1110
	end_offset = ADDRS_PER_PAGE(dn.node_page, F2FS_I(inode));
J
Jaegeuk Kim 已提交
1111
	map->m_len = 1;
1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123
	dn.ofs_in_node++;
	pgofs++;

get_next:
	if (dn.ofs_in_node >= end_offset) {
		if (allocated)
			sync_inode_page(&dn);
		allocated = false;
		f2fs_put_dnode(&dn);

		set_new_dnode(&dn, inode, NULL, NULL, 0);
		err = get_dnode_of_data(&dn, pgofs, mode);
1124
		if (err) {
1125 1126 1127 1128
			if (err == -ENOENT)
				err = 0;
			goto unlock_out;
		}
1129
		if (dn.data_blkaddr == NEW_ADDR && !fiemap)
1130 1131
			goto put_out;

1132
		end_offset = ADDRS_PER_PAGE(dn.node_page, F2FS_I(inode));
1133
	}
1134

J
Jaegeuk Kim 已提交
1135
	if (maxblocks > map->m_len) {
1136 1137 1138 1139 1140 1141
		block_t blkaddr = datablock_addr(dn.node_page, dn.ofs_in_node);
		if (blkaddr == NULL_ADDR && create) {
			err = __allocate_data_block(&dn);
			if (err)
				goto sync_out;
			allocated = true;
J
Jaegeuk Kim 已提交
1142
			map->m_flags |= F2FS_MAP_NEW;
1143 1144
			blkaddr = dn.data_blkaddr;
		}
A
arter97 已提交
1145
		/* Give more consecutive addresses for the readahead */
1146 1147 1148 1149
		if ((map->m_pblk != NEW_ADDR &&
				blkaddr == (map->m_pblk + ofs)) ||
				(map->m_pblk == NEW_ADDR &&
				blkaddr == NEW_ADDR)) {
1150 1151 1152
			ofs++;
			dn.ofs_in_node++;
			pgofs++;
J
Jaegeuk Kim 已提交
1153
			map->m_len++;
1154 1155
			goto get_next;
		}
1156
	}
1157 1158 1159 1160
sync_out:
	if (allocated)
		sync_inode_page(&dn);
put_out:
1161
	f2fs_put_dnode(&dn);
1162 1163
unlock_out:
	if (create)
1164
		f2fs_unlock_op(F2FS_I_SB(inode));
1165
out:
J
Jaegeuk Kim 已提交
1166
	trace_f2fs_map_blocks(inode, map, err);
1167
	return err;
1168 1169
}

J
Jaegeuk Kim 已提交
1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187
static int __get_data_block(struct inode *inode, sector_t iblock,
			struct buffer_head *bh, int create, bool fiemap)
{
	struct f2fs_map_blocks map;
	int ret;

	map.m_lblk = iblock;
	map.m_len = bh->b_size >> inode->i_blkbits;

	ret = f2fs_map_blocks(inode, &map, create, fiemap);
	if (!ret) {
		map_bh(bh, inode->i_sb, map.m_pblk);
		bh->b_state = (bh->b_state & ~F2FS_MAP_FLAGS) | map.m_flags;
		bh->b_size = map.m_len << inode->i_blkbits;
	}
	return ret;
}

1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199
static int get_data_block(struct inode *inode, sector_t iblock,
			struct buffer_head *bh_result, int create)
{
	return __get_data_block(inode, iblock, bh_result, create, false);
}

static int get_data_block_fiemap(struct inode *inode, sector_t iblock,
			struct buffer_head *bh_result, int create)
{
	return __get_data_block(inode, iblock, bh_result, create, true);
}

1200 1201 1202 1203 1204 1205 1206 1207 1208 1209
static inline sector_t logical_to_blk(struct inode *inode, loff_t offset)
{
	return (offset >> inode->i_blkbits);
}

static inline loff_t blk_to_logical(struct inode *inode, sector_t blk)
{
	return (blk << inode->i_blkbits);
}

J
Jaegeuk Kim 已提交
1210 1211 1212
int f2fs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
		u64 start, u64 len)
{
1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310
	struct buffer_head map_bh;
	sector_t start_blk, last_blk;
	loff_t isize = i_size_read(inode);
	u64 logical = 0, phys = 0, size = 0;
	u32 flags = 0;
	bool past_eof = false, whole_file = false;
	int ret = 0;

	ret = fiemap_check_flags(fieinfo, FIEMAP_FLAG_SYNC);
	if (ret)
		return ret;

	mutex_lock(&inode->i_mutex);

	if (len >= isize) {
		whole_file = true;
		len = isize;
	}

	if (logical_to_blk(inode, len) == 0)
		len = blk_to_logical(inode, 1);

	start_blk = logical_to_blk(inode, start);
	last_blk = logical_to_blk(inode, start + len - 1);
next:
	memset(&map_bh, 0, sizeof(struct buffer_head));
	map_bh.b_size = len;

	ret = get_data_block_fiemap(inode, start_blk, &map_bh, 0);
	if (ret)
		goto out;

	/* HOLE */
	if (!buffer_mapped(&map_bh)) {
		start_blk++;

		if (!past_eof && blk_to_logical(inode, start_blk) >= isize)
			past_eof = 1;

		if (past_eof && size) {
			flags |= FIEMAP_EXTENT_LAST;
			ret = fiemap_fill_next_extent(fieinfo, logical,
					phys, size, flags);
		} else if (size) {
			ret = fiemap_fill_next_extent(fieinfo, logical,
					phys, size, flags);
			size = 0;
		}

		/* if we have holes up to/past EOF then we're done */
		if (start_blk > last_blk || past_eof || ret)
			goto out;
	} else {
		if (start_blk > last_blk && !whole_file) {
			ret = fiemap_fill_next_extent(fieinfo, logical,
					phys, size, flags);
			goto out;
		}

		/*
		 * if size != 0 then we know we already have an extent
		 * to add, so add it.
		 */
		if (size) {
			ret = fiemap_fill_next_extent(fieinfo, logical,
					phys, size, flags);
			if (ret)
				goto out;
		}

		logical = blk_to_logical(inode, start_blk);
		phys = blk_to_logical(inode, map_bh.b_blocknr);
		size = map_bh.b_size;
		flags = 0;
		if (buffer_unwritten(&map_bh))
			flags = FIEMAP_EXTENT_UNWRITTEN;

		start_blk += logical_to_blk(inode, size);

		/*
		 * If we are past the EOF, then we need to make sure as
		 * soon as we find a hole that the last extent we found
		 * is marked with FIEMAP_EXTENT_LAST
		 */
		if (!past_eof && logical + size >= isize)
			past_eof = true;
	}
	cond_resched();
	if (fatal_signal_pending(current))
		ret = -EINTR;
	else
		goto next;
out:
	if (ret == 1)
		ret = 0;

	mutex_unlock(&inode->i_mutex);
	return ret;
J
Jaegeuk Kim 已提交
1311 1312
}

J
Jaegeuk Kim 已提交
1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403
/*
 * This function was originally taken from fs/mpage.c, and customized for f2fs.
 * Major change was from block_size == page_size in f2fs by default.
 */
static int f2fs_mpage_readpages(struct address_space *mapping,
			struct list_head *pages, struct page *page,
			unsigned nr_pages)
{
	struct bio *bio = NULL;
	unsigned page_idx;
	sector_t last_block_in_bio = 0;
	struct inode *inode = mapping->host;
	const unsigned blkbits = inode->i_blkbits;
	const unsigned blocksize = 1 << blkbits;
	sector_t block_in_file;
	sector_t last_block;
	sector_t last_block_in_file;
	sector_t block_nr;
	struct block_device *bdev = inode->i_sb->s_bdev;
	struct f2fs_map_blocks map;

	map.m_pblk = 0;
	map.m_lblk = 0;
	map.m_len = 0;
	map.m_flags = 0;

	for (page_idx = 0; nr_pages; page_idx++, nr_pages--) {

		prefetchw(&page->flags);
		if (pages) {
			page = list_entry(pages->prev, struct page, lru);
			list_del(&page->lru);
			if (add_to_page_cache_lru(page, mapping,
						  page->index, GFP_KERNEL))
				goto next_page;
		}

		block_in_file = (sector_t)page->index;
		last_block = block_in_file + nr_pages;
		last_block_in_file = (i_size_read(inode) + blocksize - 1) >>
								blkbits;
		if (last_block > last_block_in_file)
			last_block = last_block_in_file;

		/*
		 * Map blocks using the previous result first.
		 */
		if ((map.m_flags & F2FS_MAP_MAPPED) &&
				block_in_file > map.m_lblk &&
				block_in_file < (map.m_lblk + map.m_len))
			goto got_it;

		/*
		 * Then do more f2fs_map_blocks() calls until we are
		 * done with this page.
		 */
		map.m_flags = 0;

		if (block_in_file < last_block) {
			map.m_lblk = block_in_file;
			map.m_len = last_block - block_in_file;

			if (f2fs_map_blocks(inode, &map, 0, false))
				goto set_error_page;
		}
got_it:
		if ((map.m_flags & F2FS_MAP_MAPPED)) {
			block_nr = map.m_pblk + block_in_file - map.m_lblk;
			SetPageMappedToDisk(page);

			if (!PageUptodate(page) && !cleancache_get_page(page)) {
				SetPageUptodate(page);
				goto confused;
			}
		} else {
			zero_user_segment(page, 0, PAGE_CACHE_SIZE);
			SetPageUptodate(page);
			unlock_page(page);
			goto next_page;
		}

		/*
		 * This page will go to BIO.  Do we need to send this
		 * BIO off first?
		 */
		if (bio && (last_block_in_bio != block_nr - 1)) {
submit_and_realloc:
			submit_bio(READ, bio);
			bio = NULL;
		}
		if (bio == NULL) {
1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424
			struct f2fs_crypto_ctx *ctx = NULL;

			if (f2fs_encrypted_inode(inode) &&
					S_ISREG(inode->i_mode)) {
				struct page *cpage;

				ctx = f2fs_get_crypto_ctx(inode);
				if (IS_ERR(ctx))
					goto set_error_page;

				/* wait the page to be moved by cleaning */
				cpage = find_lock_page(
						META_MAPPING(F2FS_I_SB(inode)),
						block_nr);
				if (cpage) {
					f2fs_wait_on_page_writeback(cpage,
									DATA);
					f2fs_put_page(cpage, 1);
				}
			}

J
Jaegeuk Kim 已提交
1425 1426
			bio = bio_alloc(GFP_KERNEL,
				min_t(int, nr_pages, bio_get_nr_vecs(bdev)));
1427 1428 1429
			if (!bio) {
				if (ctx)
					f2fs_release_crypto_ctx(ctx);
J
Jaegeuk Kim 已提交
1430
				goto set_error_page;
1431
			}
J
Jaegeuk Kim 已提交
1432 1433
			bio->bi_bdev = bdev;
			bio->bi_iter.bi_sector = SECTOR_FROM_BLOCK(block_nr);
1434
			bio->bi_end_io = f2fs_read_end_io;
1435
			bio->bi_private = ctx;
J
Jaegeuk Kim 已提交
1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463
		}

		if (bio_add_page(bio, page, blocksize, 0) < blocksize)
			goto submit_and_realloc;

		last_block_in_bio = block_nr;
		goto next_page;
set_error_page:
		SetPageError(page);
		zero_user_segment(page, 0, PAGE_CACHE_SIZE);
		unlock_page(page);
		goto next_page;
confused:
		if (bio) {
			submit_bio(READ, bio);
			bio = NULL;
		}
		unlock_page(page);
next_page:
		if (pages)
			page_cache_release(page);
	}
	BUG_ON(pages && !list_empty(pages));
	if (bio)
		submit_bio(READ, bio);
	return 0;
}

1464 1465
static int f2fs_read_data_page(struct file *file, struct page *page)
{
H
Huajun Li 已提交
1466
	struct inode *inode = page->mapping->host;
1467
	int ret = -EAGAIN;
H
Huajun Li 已提交
1468

1469 1470
	trace_f2fs_readpage(page, DATA);

A
arter97 已提交
1471
	/* If the file has inline data, try to read it directly */
H
Huajun Li 已提交
1472 1473
	if (f2fs_has_inline_data(inode))
		ret = f2fs_read_inline_data(inode, page);
1474
	if (ret == -EAGAIN)
J
Jaegeuk Kim 已提交
1475
		ret = f2fs_mpage_readpages(page->mapping, NULL, page, 1);
H
Huajun Li 已提交
1476
	return ret;
1477 1478 1479 1480 1481 1482
}

static int f2fs_read_data_pages(struct file *file,
			struct address_space *mapping,
			struct list_head *pages, unsigned nr_pages)
{
H
Huajun Li 已提交
1483 1484 1485 1486 1487 1488
	struct inode *inode = file->f_mapping->host;

	/* If the file has inline data, skip readpages */
	if (f2fs_has_inline_data(inode))
		return 0;

J
Jaegeuk Kim 已提交
1489
	return f2fs_mpage_readpages(mapping, pages, NULL, nr_pages);
1490 1491
}

1492
int do_write_data_page(struct f2fs_io_info *fio)
1493
{
1494
	struct page *page = fio->page;
1495 1496 1497 1498 1499
	struct inode *inode = page->mapping->host;
	struct dnode_of_data dn;
	int err = 0;

	set_new_dnode(&dn, inode, NULL, NULL, 0);
1500
	err = get_dnode_of_data(&dn, page->index, LOOKUP_NODE);
1501 1502 1503
	if (err)
		return err;

1504
	fio->blk_addr = dn.data_blkaddr;
1505 1506

	/* This page is already truncated */
1507 1508
	if (fio->blk_addr == NULL_ADDR) {
		ClearPageUptodate(page);
1509
		goto out_writepage;
1510
	}
1511

1512 1513 1514 1515 1516 1517 1518 1519
	if (f2fs_encrypted_inode(inode) && S_ISREG(inode->i_mode)) {
		fio->encrypted_page = f2fs_encrypt(inode, fio->page);
		if (IS_ERR(fio->encrypted_page)) {
			err = PTR_ERR(fio->encrypted_page);
			goto out_writepage;
		}
	}

1520 1521 1522 1523 1524 1525
	set_page_writeback(page);

	/*
	 * If current allocation needs SSR,
	 * it had better in-place writes for updated data.
	 */
1526
	if (unlikely(fio->blk_addr != NEW_ADDR &&
1527 1528
			!is_cold_data(page) &&
			need_inplace_update(inode))) {
1529
		rewrite_data_page(fio);
1530
		set_inode_flag(F2FS_I(inode), FI_UPDATE_WRITE);
1531
		trace_f2fs_do_write_data_page(page, IPU);
1532
	} else {
1533
		write_data_page(&dn, fio);
1534
		set_data_blkaddr(&dn);
1535
		f2fs_update_extent_cache(&dn);
1536
		trace_f2fs_do_write_data_page(page, OPU);
1537
		set_inode_flag(F2FS_I(inode), FI_APPEND_WRITE);
1538 1539
		if (page->index == 0)
			set_inode_flag(F2FS_I(inode), FI_FIRST_BLOCK_WRITTEN);
1540 1541 1542 1543 1544 1545 1546 1547 1548 1549
	}
out_writepage:
	f2fs_put_dnode(&dn);
	return err;
}

static int f2fs_write_data_page(struct page *page,
					struct writeback_control *wbc)
{
	struct inode *inode = page->mapping->host;
1550
	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
1551 1552 1553
	loff_t i_size = i_size_read(inode);
	const pgoff_t end_index = ((unsigned long long) i_size)
							>> PAGE_CACHE_SHIFT;
H
Huajun Li 已提交
1554
	unsigned offset = 0;
1555
	bool need_balance_fs = false;
1556
	int err = 0;
J
Jaegeuk Kim 已提交
1557
	struct f2fs_io_info fio = {
1558
		.sbi = sbi,
J
Jaegeuk Kim 已提交
1559
		.type = DATA,
C
Chris Fries 已提交
1560
		.rw = (wbc->sync_mode == WB_SYNC_ALL) ? WRITE_SYNC : WRITE,
1561
		.page = page,
1562
		.encrypted_page = NULL,
J
Jaegeuk Kim 已提交
1563
	};
1564

1565 1566
	trace_f2fs_writepage(page, DATA);

1567
	if (page->index < end_index)
1568
		goto write;
1569 1570 1571 1572 1573 1574

	/*
	 * If the offset is out-of-range of file size,
	 * this page does not have to be written to disk.
	 */
	offset = i_size & (PAGE_CACHE_SIZE - 1);
1575
	if ((page->index >= end_index + 1) || !offset)
1576
		goto out;
1577 1578

	zero_user_segment(page, offset, PAGE_CACHE_SIZE);
1579
write:
1580
	if (unlikely(is_sbi_flag_set(sbi, SBI_POR_DOING)))
1581
		goto redirty_out;
1582 1583 1584 1585 1586
	if (f2fs_is_drop_cache(inode))
		goto out;
	if (f2fs_is_volatile_file(inode) && !wbc->for_reclaim &&
			available_free_memory(sbi, BASE_CHECK))
		goto redirty_out;
1587

1588
	/* Dentry blocks are controlled by checkpoint */
1589
	if (S_ISDIR(inode->i_mode)) {
1590 1591
		if (unlikely(f2fs_cp_error(sbi)))
			goto redirty_out;
1592
		err = do_write_data_page(&fio);
1593 1594
		goto done;
	}
H
Huajun Li 已提交
1595

1596 1597 1598
	/* we should bypass data pages to proceed the kworkder jobs */
	if (unlikely(f2fs_cp_error(sbi))) {
		SetPageError(page);
1599
		goto out;
1600 1601
	}

1602
	if (!wbc->for_reclaim)
1603
		need_balance_fs = true;
1604
	else if (has_not_enough_free_secs(sbi, 0))
1605
		goto redirty_out;
1606

1607
	err = -EAGAIN;
1608
	f2fs_lock_op(sbi);
1609 1610 1611
	if (f2fs_has_inline_data(inode))
		err = f2fs_write_inline_data(inode, page);
	if (err == -EAGAIN)
1612
		err = do_write_data_page(&fio);
1613 1614 1615 1616
	f2fs_unlock_op(sbi);
done:
	if (err && err != -ENOENT)
		goto redirty_out;
1617 1618

	clear_cold_data(page);
1619
out:
1620
	inode_dec_dirty_pages(inode);
1621 1622
	if (err)
		ClearPageUptodate(page);
1623
	unlock_page(page);
1624
	if (need_balance_fs)
1625
		f2fs_balance_fs(sbi);
1626 1627
	if (wbc->for_reclaim)
		f2fs_submit_merged_bio(sbi, DATA, WRITE);
1628 1629 1630
	return 0;

redirty_out:
1631
	redirty_page_for_writepage(wbc, page);
1632
	return AOP_WRITEPAGE_ACTIVATE;
1633 1634
}

1635 1636 1637 1638 1639 1640 1641 1642 1643
static int __f2fs_writepage(struct page *page, struct writeback_control *wbc,
			void *data)
{
	struct address_space *mapping = data;
	int ret = mapping->a_ops->writepage(page, wbc);
	mapping_set_error(mapping, ret);
	return ret;
}

1644
static int f2fs_write_data_pages(struct address_space *mapping,
1645 1646 1647
			    struct writeback_control *wbc)
{
	struct inode *inode = mapping->host;
1648
	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
1649
	bool locked = false;
1650
	int ret;
1651
	long diff;
1652

1653 1654
	trace_f2fs_writepages(mapping->host, wbc, DATA);

P
P J P 已提交
1655 1656 1657 1658
	/* deal with chardevs and other special file */
	if (!mapping->a_ops->writepage)
		return 0;

1659
	if (S_ISDIR(inode->i_mode) && wbc->sync_mode == WB_SYNC_NONE &&
1660
			get_dirty_pages(inode) < nr_pages_to_skip(sbi, DATA) &&
1661
			available_free_memory(sbi, DIRTY_DENTS))
1662
		goto skip_write;
1663

1664 1665 1666 1667
	/* during POR, we don't need to trigger writepage at all. */
	if (unlikely(is_sbi_flag_set(sbi, SBI_POR_DOING)))
		goto skip_write;

1668
	diff = nr_pages_to_write(sbi, DATA, wbc);
1669

1670 1671 1672 1673
	if (!S_ISDIR(inode->i_mode)) {
		mutex_lock(&sbi->writepages);
		locked = true;
	}
1674
	ret = write_cache_pages(mapping, wbc, __f2fs_writepage, mapping);
1675 1676
	if (locked)
		mutex_unlock(&sbi->writepages);
J
Jaegeuk Kim 已提交
1677 1678

	f2fs_submit_merged_bio(sbi, DATA, WRITE);
1679 1680 1681

	remove_dirty_dir_inode(inode);

1682
	wbc->nr_to_write = max((long)0, wbc->nr_to_write - diff);
1683
	return ret;
1684 1685

skip_write:
1686
	wbc->pages_skipped += get_dirty_pages(inode);
1687
	return 0;
1688 1689
}

1690 1691 1692 1693 1694 1695
static void f2fs_write_failed(struct address_space *mapping, loff_t to)
{
	struct inode *inode = mapping->host;

	if (to > inode->i_size) {
		truncate_pagecache(inode, inode->i_size);
1696
		truncate_blocks(inode, inode->i_size, true);
1697 1698 1699
	}
}

1700 1701 1702 1703 1704
static int f2fs_write_begin(struct file *file, struct address_space *mapping,
		loff_t pos, unsigned len, unsigned flags,
		struct page **pagep, void **fsdata)
{
	struct inode *inode = mapping->host;
1705
	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
1706
	struct page *page, *ipage;
1707 1708 1709 1710
	pgoff_t index = ((unsigned long long) pos) >> PAGE_CACHE_SHIFT;
	struct dnode_of_data dn;
	int err = 0;

1711 1712
	trace_f2fs_write_begin(inode, pos, len, flags);

1713
	f2fs_balance_fs(sbi);
1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724

	/*
	 * We should check this at this moment to avoid deadlock on inode page
	 * and #0 page. The locking rule for inline_data conversion should be:
	 * lock_page(page #0) -> lock_page(inode_page)
	 */
	if (index != 0) {
		err = f2fs_convert_inline_inode(inode);
		if (err)
			goto fail;
	}
1725
repeat:
1726
	page = grab_cache_page_write_begin(mapping, index, flags);
1727 1728 1729 1730
	if (!page) {
		err = -ENOMEM;
		goto fail;
	}
1731

1732 1733
	*pagep = page;

1734
	f2fs_lock_op(sbi);
1735 1736 1737

	/* check inline_data */
	ipage = get_node_page(sbi, inode->i_ino);
1738 1739
	if (IS_ERR(ipage)) {
		err = PTR_ERR(ipage);
1740
		goto unlock_fail;
1741
	}
1742

1743 1744
	set_new_dnode(&dn, inode, ipage, ipage, 0);

1745
	if (f2fs_has_inline_data(inode)) {
1746 1747 1748 1749 1750 1751
		if (pos + len <= MAX_INLINE_DATA) {
			read_inline_data(page, ipage);
			set_inode_flag(F2FS_I(inode), FI_DATA_EXIST);
			sync_inode_page(&dn);
			goto put_next;
		}
1752 1753 1754
		err = f2fs_convert_inline_page(&dn, page);
		if (err)
			goto put_fail;
1755
	}
1756 1757
	err = f2fs_reserve_block(&dn, index);
	if (err)
1758
		goto put_fail;
1759
put_next:
1760 1761 1762
	f2fs_put_dnode(&dn);
	f2fs_unlock_op(sbi);

1763 1764 1765
	if ((len == PAGE_CACHE_SIZE) || PageUptodate(page))
		return 0;

1766 1767
	f2fs_wait_on_page_writeback(page, DATA);

1768 1769 1770 1771 1772 1773
	if ((pos & PAGE_CACHE_MASK) >= i_size_read(inode)) {
		unsigned start = pos & (PAGE_CACHE_SIZE - 1);
		unsigned end = start + len;

		/* Reading beyond i_size is simple: memset to zero */
		zero_user_segments(page, 0, start, end, PAGE_CACHE_SIZE);
1774
		goto out;
1775 1776
	}

1777
	if (dn.data_blkaddr == NEW_ADDR) {
1778 1779
		zero_user_segment(page, 0, PAGE_CACHE_SIZE);
	} else {
1780
		struct f2fs_io_info fio = {
1781
			.sbi = sbi,
1782 1783 1784
			.type = DATA,
			.rw = READ_SYNC,
			.blk_addr = dn.data_blkaddr,
1785
			.page = page,
1786
			.encrypted_page = NULL,
1787
		};
1788
		err = f2fs_submit_page_bio(&fio);
1789 1790
		if (err)
			goto fail;
1791

1792
		lock_page(page);
1793
		if (unlikely(!PageUptodate(page))) {
1794
			f2fs_put_page(page, 1);
1795 1796
			err = -EIO;
			goto fail;
1797
		}
1798
		if (unlikely(page->mapping != mapping)) {
1799 1800
			f2fs_put_page(page, 1);
			goto repeat;
1801
		}
1802 1803 1804 1805 1806 1807 1808 1809 1810

		/* avoid symlink page */
		if (f2fs_encrypted_inode(inode) && S_ISREG(inode->i_mode)) {
			err = f2fs_decrypt_one(inode, page);
			if (err) {
				f2fs_put_page(page, 1);
				goto fail;
			}
		}
1811
	}
1812
out:
1813 1814 1815
	SetPageUptodate(page);
	clear_cold_data(page);
	return 0;
1816

1817 1818
put_fail:
	f2fs_put_dnode(&dn);
1819 1820
unlock_fail:
	f2fs_unlock_op(sbi);
1821
	f2fs_put_page(page, 1);
1822 1823 1824
fail:
	f2fs_write_failed(mapping, pos + len);
	return err;
1825 1826
}

1827 1828 1829 1830 1831 1832 1833
static int f2fs_write_end(struct file *file,
			struct address_space *mapping,
			loff_t pos, unsigned len, unsigned copied,
			struct page *page, void *fsdata)
{
	struct inode *inode = page->mapping->host;

1834 1835
	trace_f2fs_write_end(inode, pos, len, copied);

1836
	set_page_dirty(page);
1837 1838 1839 1840 1841 1842 1843

	if (pos + copied > i_size_read(inode)) {
		i_size_write(inode, pos + copied);
		mark_inode_dirty(inode);
		update_inode_page(inode);
	}

1844
	f2fs_put_page(page, 1);
1845 1846 1847
	return copied;
}

1848 1849
static int check_direct_IO(struct inode *inode, struct iov_iter *iter,
			   loff_t offset)
1850 1851 1852
{
	unsigned blocksize_mask = inode->i_sb->s_blocksize - 1;

1853
	if (iov_iter_rw(iter) == READ)
1854 1855 1856 1857 1858
		return 0;

	if (offset & blocksize_mask)
		return -EINVAL;

A
Al Viro 已提交
1859 1860 1861
	if (iov_iter_alignment(iter) & blocksize_mask)
		return -EINVAL;

1862 1863 1864
	return 0;
}

1865 1866
static ssize_t f2fs_direct_IO(struct kiocb *iocb, struct iov_iter *iter,
			      loff_t offset)
1867 1868
{
	struct file *file = iocb->ki_filp;
1869 1870 1871 1872
	struct address_space *mapping = file->f_mapping;
	struct inode *inode = mapping->host;
	size_t count = iov_iter_count(iter);
	int err;
1873

1874 1875 1876 1877 1878 1879
	/* we don't need to use inline_data strictly */
	if (f2fs_has_inline_data(inode)) {
		err = f2fs_convert_inline_inode(inode);
		if (err)
			return err;
	}
H
Huajun Li 已提交
1880

1881 1882 1883
	if (f2fs_encrypted_inode(inode) && S_ISREG(inode->i_mode))
		return 0;

1884
	if (check_direct_IO(inode, iter, offset))
1885 1886
		return 0;

1887
	trace_f2fs_direct_IO_enter(inode, offset, count, iov_iter_rw(iter));
1888

1889
	if (iov_iter_rw(iter) == WRITE)
1890 1891
		__allocate_data_blocks(inode, offset, count);

1892
	err = blockdev_direct_IO(iocb, inode, iter, offset, get_data_block);
1893
	if (err < 0 && iov_iter_rw(iter) == WRITE)
1894
		f2fs_write_failed(mapping, offset + count);
1895

1896
	trace_f2fs_direct_IO_exit(inode, offset, count, iov_iter_rw(iter), err);
1897

1898
	return err;
1899 1900
}

1901 1902
void f2fs_invalidate_page(struct page *page, unsigned int offset,
							unsigned int length)
1903 1904
{
	struct inode *inode = page->mapping->host;
1905
	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
1906

1907 1908
	if (inode->i_ino >= F2FS_ROOT_INO(sbi) &&
		(offset % PAGE_CACHE_SIZE || length != PAGE_CACHE_SIZE))
1909 1910
		return;

1911 1912 1913 1914 1915 1916 1917 1918
	if (PageDirty(page)) {
		if (inode->i_ino == F2FS_META_INO(sbi))
			dec_page_count(sbi, F2FS_DIRTY_META);
		else if (inode->i_ino == F2FS_NODE_INO(sbi))
			dec_page_count(sbi, F2FS_DIRTY_NODES);
		else
			inode_dec_dirty_pages(inode);
	}
1919 1920 1921
	ClearPagePrivate(page);
}

1922
int f2fs_release_page(struct page *page, gfp_t wait)
1923
{
1924 1925 1926 1927
	/* If this is dirty page, keep PagePrivate */
	if (PageDirty(page))
		return 0;

1928
	ClearPagePrivate(page);
1929
	return 1;
1930 1931 1932 1933 1934 1935 1936
}

static int f2fs_set_data_page_dirty(struct page *page)
{
	struct address_space *mapping = page->mapping;
	struct inode *inode = mapping->host;

1937 1938
	trace_f2fs_set_page_dirty(page, DATA);

1939
	SetPageUptodate(page);
1940

1941
	if (f2fs_is_atomic_file(inode)) {
1942 1943 1944 1945
		register_inmem_page(inode, page);
		return 1;
	}

1946 1947
	if (!PageDirty(page)) {
		__set_page_dirty_nobuffers(page);
1948
		update_dirty_page(inode, page);
1949 1950 1951 1952 1953
		return 1;
	}
	return 0;
}

J
Jaegeuk Kim 已提交
1954 1955
static sector_t f2fs_bmap(struct address_space *mapping, sector_t block)
{
1956 1957
	struct inode *inode = mapping->host;

1958 1959 1960 1961 1962 1963
	/* we don't need to use inline_data strictly */
	if (f2fs_has_inline_data(inode)) {
		int err = f2fs_convert_inline_inode(inode);
		if (err)
			return err;
	}
1964
	return generic_block_bmap(mapping, block, get_data_block);
J
Jaegeuk Kim 已提交
1965 1966
}

1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997
void init_extent_cache_info(struct f2fs_sb_info *sbi)
{
	INIT_RADIX_TREE(&sbi->extent_tree_root, GFP_NOIO);
	init_rwsem(&sbi->extent_tree_lock);
	INIT_LIST_HEAD(&sbi->extent_list);
	spin_lock_init(&sbi->extent_lock);
	sbi->total_ext_tree = 0;
	atomic_set(&sbi->total_ext_node, 0);
}

int __init create_extent_cache(void)
{
	extent_tree_slab = f2fs_kmem_cache_create("f2fs_extent_tree",
			sizeof(struct extent_tree));
	if (!extent_tree_slab)
		return -ENOMEM;
	extent_node_slab = f2fs_kmem_cache_create("f2fs_extent_node",
			sizeof(struct extent_node));
	if (!extent_node_slab) {
		kmem_cache_destroy(extent_tree_slab);
		return -ENOMEM;
	}
	return 0;
}

void destroy_extent_cache(void)
{
	kmem_cache_destroy(extent_node_slab);
	kmem_cache_destroy(extent_tree_slab);
}

1998 1999 2000 2001 2002 2003
const struct address_space_operations f2fs_dblock_aops = {
	.readpage	= f2fs_read_data_page,
	.readpages	= f2fs_read_data_pages,
	.writepage	= f2fs_write_data_page,
	.writepages	= f2fs_write_data_pages,
	.write_begin	= f2fs_write_begin,
2004
	.write_end	= f2fs_write_end,
2005
	.set_page_dirty	= f2fs_set_data_page_dirty,
2006 2007
	.invalidatepage	= f2fs_invalidate_page,
	.releasepage	= f2fs_release_page,
2008
	.direct_IO	= f2fs_direct_IO,
J
Jaegeuk Kim 已提交
2009
	.bmap		= f2fs_bmap,
2010
};