Merge tag 'for-f2fs-4.11' of git://git.kernel.org/pub/scm/linux/kernel/git/jaegeuk/f2fs

Pull f2fs updates from Jaegeuk Kim: "This round introduces several interesting features such as on-disk NAT bitmaps, IO alignment, and a discard thread. And it includes a couple of major bug fixes as below. Enhancements: - introduce on-disk bitmaps to avoid scanning NAT blocks when getting free nids - support IO alignment to prepare open-channel SSD integration in future - introduce a discard thread to avoid long latency during checkpoint and fstrim - use SSR for warm node and enable inline_xattr by default - introduce in-memory bitmaps to check FS consistency for debugging - improve write_begin by avoiding needless read IO Bug fixes: - fix broken zone_reset behavior for SMR drive - fix wrong victim selection policy during GC - fix missing behavior when preparing discard commands - fix bugs in atomic write support and fiemap - workaround to handle multiple f2fs_add_link calls having same name ... and it includes a bunch of clean-up patches as well" * tag 'for-f2fs-4.11' of git://git.kernel.org/pub/scm/linux/kernel/git/jaegeuk/f2fs: (97 commits) f2fs: avoid to flush nat journal entries f2fs: avoid to issue redundant discard commands f2fs: fix a plint compile warning f2fs: add f2fs_drop_inode tracepoint f2fs: Fix zoned block device support f2fs: remove redundant set_page_dirty() f2fs: fix to enlarge size of write_io_dummy mempool f2fs: fix memory leak of write_io_dummy mempool during umount f2fs: fix to update F2FS_{CP_}WB_DATA count correctly f2fs: use MAX_FREE_NIDS for the free nids target f2fs: introduce free nid bitmap f2fs: new helper cur_cp_crc() getting crc in f2fs_checkpoint f2fs: update the comment of default nr_pages to skipping f2fs: drop the duplicate pval in f2fs_getxattr f2fs: Don't update the xattr data that same as the exist f2fs: kill __is_extent_same f2fs: avoid bggc->fggc when enough free segments are avaliable after cp f2fs: select target segment with closer temperature in SSR mode f2fs: show simple call stack in fault injection message f2fs: no need lock_op in f2fs_write_inline_data ...

Merge tag 'for-f2fs-4.11' of git://git.kernel.org/pub/scm/linux/kernel/git/jaegeuk/f2fs
Pull f2fs updates from Jaegeuk Kim: "This round introduces several interesting features such as on-disk NAT bitmaps, IO alignment, and a discard thread. And it includes a couple of major bug fixes as below. Enhancements: - introduce on-disk bitmaps to avoid scanning NAT blocks when getting free nids - support IO alignment to prepare open-channel SSD integration in future - introduce a discard thread to avoid long latency during checkpoint and fstrim - use SSR for warm node and enable inline_xattr by default - introduce in-memory bitmaps to check FS consistency for debugging - improve write_begin by avoiding needless read IO Bug fixes: - fix broken zone_reset behavior for SMR drive - fix wrong victim selection policy during GC - fix missing behavior when preparing discard commands - fix bugs in atomic write support and fiemap - workaround to handle multiple f2fs_add_link calls having same name ... and it includes a bunch of clean-up patches as well" * tag 'for-f2fs-4.11' of git://git.kernel.org/pub/scm/linux/kernel/git/jaegeuk/f2fs: (97 commits) f2fs: avoid to flush nat journal entries f2fs: avoid to issue redundant discard commands f2fs: fix a plint compile warning f2fs: add f2fs_drop_inode tracepoint f2fs: Fix zoned block device support f2fs: remove redundant set_page_dirty() f2fs: fix to enlarge size of write_io_dummy mempool f2fs: fix memory leak of write_io_dummy mempool during umount f2fs: fix to update F2FS_{CP_}WB_DATA count correctly f2fs: use MAX_FREE_NIDS for the free nids target f2fs: introduce free nid bitmap f2fs: new helper cur_cp_crc() getting crc in f2fs_checkpoint f2fs: update the comment of default nr_pages to skipping f2fs: drop the duplicate pval in f2fs_getxattr f2fs: Don't update the xattr data that same as the exist f2fs: kill __is_extent_same f2fs: avoid bggc->fggc when enough free segments are avaliable after cp f2fs: select target segment with closer temperature in SSR mode f2fs: show simple call stack in fault injection message f2fs: no need lock_op in f2fs_write_inline_data ...
25c4e6c3 · Linus Torvalds · 6053dc98 · 900f7362 · 25c4e6c3 · 25c4e6c3
21 changed file
--- a/Documentation/filesystems/f2fs.txt
+++ b/Documentation/filesystems/f2fs.txt
@@ -125,13 +125,14 @@ active_logs=%u         Support configuring the number of active logs. In the
 disable_ext_identify   Disable the extension list configured by mkfs, so f2fs
                       does not aware of cold files such as media files.
 inline_xattr           Enable the inline xattrs feature.
+noinline_xattr         Disable the inline xattrs feature.
 inline_data            Enable the inline data feature: New created small(<~3.4k)
                       files can be written into inode block.
 inline_dentry          Enable the inline dir feature: data in new created
                       directory entries can be written into inode block. The
                       space of inode block which is used to store inline
                       dentries is limited to ~3.4k.
-noinline_dentry        Diable the inline dentry feature.
+noinline_dentry        Disable the inline dentry feature.
 flush_merge	       Merge concurrent cache_flush commands as much as possible
                       to eliminate redundant command issues. If the underlying
 		       device handles the cache_flush command relatively slowly,
@@ -157,6 +158,8 @@ data_flush             Enable data flushing before checkpoint in order to
 mode=%s                Control block allocation mode which supports "adaptive"
                       and "lfs". In "lfs" mode, there should be no random
                       writes towards main area.
+io_bits=%u             Set the bit size of write IO requests. It should be set
+                       with "mode=lfs".

 ================================================================================
 DEBUGFS ENTRIES
@@ -174,7 +177,7 @@ f2fs. Each file shows the whole f2fs information.
 SYSFS ENTRIES
 ================================================================================

-Information about mounted f2f2 file systems can be found in
+Information about mounted f2fs file systems can be found in
 /sys/fs/f2fs.  Each mounted filesystem will have a directory in
 /sys/fs/f2fs based on its device name (i.e., /sys/fs/f2fs/sda).
 The files in each per-device directory are shown in table below.

--- a/fs/f2fs/checkpoint.c
+++ b/fs/f2fs/checkpoint.c
@@ -249,7 +249,8 @@ static int f2fs_write_meta_page(struct page *page,
 	dec_page_count(sbi, F2FS_DIRTY_META);

 	if (wbc->for_reclaim)
-		f2fs_submit_merged_bio_cond(sbi, NULL, page, 0, META, WRITE);
+		f2fs_submit_merged_bio_cond(sbi, page->mapping->host,
+						0, page->index, META, WRITE);

 	unlock_page(page);

@@ -493,6 +494,7 @@ int acquire_orphan_inode(struct f2fs_sb_info *sbi)
 #ifdef CONFIG_F2FS_FAULT_INJECTION
 	if (time_to_inject(sbi, FAULT_ORPHAN)) {
 		spin_unlock(&im->ino_lock);
+		f2fs_show_injection_info(FAULT_ORPHAN);
 		return -ENOSPC;
 	}
 #endif
@@ -681,8 +683,7 @@ static int get_checkpoint_version(struct f2fs_sb_info *sbi, block_t cp_addr,
 		return -EINVAL;
 	}

-	crc = le32_to_cpu(*((__le32 *)((unsigned char *)*cp_block
-							+ crc_offset)));
+	crc = cur_cp_crc(*cp_block);
 	if (!f2fs_crc_valid(sbi, crc, *cp_block, crc_offset)) {
 		f2fs_msg(sbi->sb, KERN_WARNING, "invalid crc value");
 		return -EINVAL;
@@ -891,7 +892,7 @@ int sync_dirty_inodes(struct f2fs_sb_info *sbi, enum inode_type type)
 				F2FS_DIRTY_DENTS : F2FS_DIRTY_DATA));
 		return 0;
 	}
-	fi = list_entry(head->next, struct f2fs_inode_info, dirty_list);
+	fi = list_first_entry(head, struct f2fs_inode_info, dirty_list);
 	inode = igrab(&fi->vfs_inode);
 	spin_unlock(&sbi->inode_lock[type]);
 	if (inode) {
@@ -924,7 +925,7 @@ int f2fs_sync_inode_meta(struct f2fs_sb_info *sbi)
 			spin_unlock(&sbi->inode_lock[DIRTY_META]);
 			return 0;
 		}
-		fi = list_entry(head->next, struct f2fs_inode_info,
+		fi = list_first_entry(head, struct f2fs_inode_info,
 							gdirty_list);
 		inode = igrab(&fi->vfs_inode);
 		spin_unlock(&sbi->inode_lock[DIRTY_META]);
@@ -998,8 +999,6 @@ static int block_operations(struct f2fs_sb_info *sbi)
 static void unblock_operations(struct f2fs_sb_info *sbi)
 {
 	up_write(&sbi->node_write);
-
-	build_free_nids(sbi, false);
 	f2fs_unlock_all(sbi);
 }

@@ -1025,6 +1024,10 @@ static void update_ckpt_flags(struct f2fs_sb_info *sbi, struct cp_control *cpc)

 	spin_lock(&sbi->cp_lock);

+	if (cpc->reason == CP_UMOUNT && ckpt->cp_pack_total_block_count >
+			sbi->blocks_per_seg - NM_I(sbi)->nat_bits_blocks)
+		disable_nat_bits(sbi, false);
+
 	if (cpc->reason == CP_UMOUNT)
 		__set_ckpt_flags(ckpt, CP_UMOUNT_FLAG);
 	else
@@ -1137,6 +1140,28 @@ static int do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)

 	start_blk = __start_cp_next_addr(sbi);

+	/* write nat bits */
+	if (enabled_nat_bits(sbi, cpc)) {
+		__u64 cp_ver = cur_cp_version(ckpt);
+		unsigned int i;
+		block_t blk;
+
+		cp_ver |= ((__u64)crc32 << 32);
+		*(__le64 *)nm_i->nat_bits = cpu_to_le64(cp_ver);
+
+		blk = start_blk + sbi->blocks_per_seg - nm_i->nat_bits_blocks;
+		for (i = 0; i < nm_i->nat_bits_blocks; i++)
+			update_meta_page(sbi, nm_i->nat_bits +
+					(i << F2FS_BLKSIZE_BITS), blk + i);
+
+		/* Flush all the NAT BITS pages */
+		while (get_pages(sbi, F2FS_DIRTY_META)) {
+			sync_meta_pages(sbi, META, LONG_MAX);
+			if (unlikely(f2fs_cp_error(sbi)))
+				return -EIO;
+		}
+	}
+
 	/* need to wait for end_io results */
 	wait_on_all_pages_writeback(sbi);
 	if (unlikely(f2fs_cp_error(sbi)))
@@ -1248,15 +1273,20 @@ int write_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
 	f2fs_flush_merged_bios(sbi);

 	/* this is the case of multiple fstrims without any changes */
-	if (cpc->reason == CP_DISCARD && !is_sbi_flag_set(sbi, SBI_IS_DIRTY)) {
-		f2fs_bug_on(sbi, NM_I(sbi)->dirty_nat_cnt);
-		f2fs_bug_on(sbi, SIT_I(sbi)->dirty_sentries);
-		f2fs_bug_on(sbi, prefree_segments(sbi));
-		flush_sit_entries(sbi, cpc);
-		clear_prefree_segments(sbi, cpc);
-		f2fs_wait_all_discard_bio(sbi);
-		unblock_operations(sbi);
-		goto out;
+	if (cpc->reason == CP_DISCARD) {
+		if (!exist_trim_candidates(sbi, cpc)) {
+			unblock_operations(sbi);
+			goto out;
+		}
+
+		if (NM_I(sbi)->dirty_nat_cnt == 0 &&
+				SIT_I(sbi)->dirty_sentries == 0 &&
+				prefree_segments(sbi) == 0) {
+			flush_sit_entries(sbi, cpc);
+			clear_prefree_segments(sbi, cpc);
+			unblock_operations(sbi);
+			goto out;
+		}
 	}

 	/*
@@ -1268,17 +1298,15 @@ int write_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
 	ckpt->checkpoint_ver = cpu_to_le64(++ckpt_ver);

 	/* write cached NAT/SIT entries to NAT/SIT area */
-	flush_nat_entries(sbi);
+	flush_nat_entries(sbi, cpc);
 	flush_sit_entries(sbi, cpc);

 	/* unlock all the fs_lock[] in do_checkpoint() */
 	err = do_checkpoint(sbi, cpc);
-	if (err) {
+	if (err)
 		release_discard_addrs(sbi);
-	} else {
+	else
 		clear_prefree_segments(sbi, cpc);
-		f2fs_wait_all_discard_bio(sbi);
-	}

 	unblock_operations(sbi);
 	stat_inc_cp_count(sbi->stat_info);

--- a/fs/f2fs/data.c
+++ b/fs/f2fs/data.c
@@ -55,8 +55,10 @@ static void f2fs_read_end_io(struct bio *bio)
 	int i;

 #ifdef CONFIG_F2FS_FAULT_INJECTION
-	if (time_to_inject(F2FS_P_SB(bio->bi_io_vec->bv_page), FAULT_IO))
+	if (time_to_inject(F2FS_P_SB(bio->bi_io_vec->bv_page), FAULT_IO)) {
+		f2fs_show_injection_info(FAULT_IO);
 		bio->bi_error = -EIO;
+	}
 #endif

 	if (f2fs_bio_encrypted(bio)) {
@@ -93,6 +95,17 @@ static void f2fs_write_end_io(struct bio *bio)
 		struct page *page = bvec->bv_page;
 		enum count_type type = WB_DATA_TYPE(page);

+		if (IS_DUMMY_WRITTEN_PAGE(page)) {
+			set_page_private(page, (unsigned long)NULL);
+			ClearPagePrivate(page);
+			unlock_page(page);
+			mempool_free(page, sbi->write_io_dummy);
+
+			if (unlikely(bio->bi_error))
+				f2fs_stop_checkpoint(sbi, true);
+			continue;
+		}
+
 		fscrypt_pullback_bio_page(&page, true);

 		if (unlikely(bio->bi_error)) {
@@ -171,10 +184,46 @@ static inline void __submit_bio(struct f2fs_sb_info *sbi,
 				struct bio *bio, enum page_type type)
 {
 	if (!is_read_io(bio_op(bio))) {
+		unsigned int start;
+
 		if (f2fs_sb_mounted_blkzoned(sbi->sb) &&
 			current->plug && (type == DATA || type == NODE))
 			blk_finish_plug(current->plug);
+
+		if (type != DATA && type != NODE)
+			goto submit_io;
+
+		start = bio->bi_iter.bi_size >> F2FS_BLKSIZE_BITS;
+		start %= F2FS_IO_SIZE(sbi);
+
+		if (start == 0)
+			goto submit_io;
+
+		/* fill dummy pages */
+		for (; start < F2FS_IO_SIZE(sbi); start++) {
+			struct page *page =
+				mempool_alloc(sbi->write_io_dummy,
+					GFP_NOIO | __GFP_ZERO | __GFP_NOFAIL);
+			f2fs_bug_on(sbi, !page);
+
+			SetPagePrivate(page);
+			set_page_private(page, (unsigned long)DUMMY_WRITTEN_PAGE);
+			lock_page(page);
+			if (bio_add_page(bio, page, PAGE_SIZE, 0) < PAGE_SIZE)
+				f2fs_bug_on(sbi, 1);
+		}
+		/*
+		 * In the NODE case, we lose next block address chain. So, we
+		 * need to do checkpoint in f2fs_sync_file.
+		 */
+		if (type == NODE)
+			set_sbi_flag(sbi, SBI_NEED_CP);
 	}
+submit_io:
+	if (is_read_io(bio_op(bio)))
+		trace_f2fs_submit_read_bio(sbi->sb, type, bio);
+	else
+		trace_f2fs_submit_write_bio(sbi->sb, type, bio);
 	submit_bio(bio);
 }

@@ -185,19 +234,19 @@ static void __submit_merged_bio(struct f2fs_bio_info *io)
 	if (!io->bio)
 		return;

+	bio_set_op_attrs(io->bio, fio->op, fio->op_flags);
+
 	if (is_read_io(fio->op))
-		trace_f2fs_submit_read_bio(io->sbi->sb, fio, io->bio);
+		trace_f2fs_prepare_read_bio(io->sbi->sb, fio->type, io->bio);
 	else
-		trace_f2fs_submit_write_bio(io->sbi->sb, fio, io->bio);
-
-	bio_set_op_attrs(io->bio, fio->op, fio->op_flags);
+		trace_f2fs_prepare_write_bio(io->sbi->sb, fio->type, io->bio);

 	__submit_bio(io->sbi, io->bio, fio->type);
 	io->bio = NULL;
 }

-static bool __has_merged_page(struct f2fs_bio_info *io, struct inode *inode,
-						struct page *page, nid_t ino)
+static bool __has_merged_page(struct f2fs_bio_info *io,
+				struct inode *inode, nid_t ino, pgoff_t idx)
 {
 	struct bio_vec *bvec;
 	struct page *target;
@@ -206,7 +255,7 @@ static bool __has_merged_page(struct f2fs_bio_info *io, struct inode *inode,
 	if (!io->bio)
 		return false;

-	if (!inode && !page && !ino)
+	if (!inode && !ino)
 		return true;

 	bio_for_each_segment_all(bvec, io->bio, i) {
@@ -216,10 +265,11 @@ static bool __has_merged_page(struct f2fs_bio_info *io, struct inode *inode,
 		else
 			target = fscrypt_control_page(bvec->bv_page);

+		if (idx != target->index)
+			continue;
+
 		if (inode && inode == target->mapping->host)
 			return true;
-		if (page && page == target)
-			return true;
 		if (ino && ino == ino_of_node(target))
 			return true;
 	}
@@ -228,22 +278,21 @@ static bool __has_merged_page(struct f2fs_bio_info *io, struct inode *inode,
 }

 static bool has_merged_page(struct f2fs_sb_info *sbi, struct inode *inode,
-						struct page *page, nid_t ino,
-						enum page_type type)
+				nid_t ino, pgoff_t idx, enum page_type type)
 {
 	enum page_type btype = PAGE_TYPE_OF_BIO(type);
 	struct f2fs_bio_info *io = &sbi->write_io[btype];
 	bool ret;

 	down_read(&io->io_rwsem);
-	ret = __has_merged_page(io, inode, page, ino);
+	ret = __has_merged_page(io, inode, ino, idx);
 	up_read(&io->io_rwsem);
 	return ret;
 }

 static void __f2fs_submit_merged_bio(struct f2fs_sb_info *sbi,
-				struct inode *inode, struct page *page,
-				nid_t ino, enum page_type type, int rw)
+				struct inode *inode, nid_t ino, pgoff_t idx,
+				enum page_type type, int rw)
 {
 	enum page_type btype = PAGE_TYPE_OF_BIO(type);
 	struct f2fs_bio_info *io;
@@ -252,16 +301,16 @@ static void __f2fs_submit_merged_bio(struct f2fs_sb_info *sbi,

 	down_write(&io->io_rwsem);

-	if (!__has_merged_page(io, inode, page, ino))
+	if (!__has_merged_page(io, inode, ino, idx))
 		goto out;

 	/* change META to META_FLUSH in the checkpoint procedure */
 	if (type >= META_FLUSH) {
 		io->fio.type = META_FLUSH;
 		io->fio.op = REQ_OP_WRITE;
-		io->fio.op_flags = REQ_PREFLUSH | REQ_META | REQ_PRIO;
+		io->fio.op_flags = REQ_META | REQ_PRIO;
 		if (!test_opt(sbi, NOBARRIER))
-			io->fio.op_flags |= REQ_FUA;
+			io->fio.op_flags |= REQ_PREFLUSH | REQ_FUA;
 	}
 	__submit_merged_bio(io);
 out:
@@ -271,15 +320,15 @@ static void __f2fs_submit_merged_bio(struct f2fs_sb_info *sbi,
 void f2fs_submit_merged_bio(struct f2fs_sb_info *sbi, enum page_type type,
 									int rw)
 {
-	__f2fs_submit_merged_bio(sbi, NULL, NULL, 0, type, rw);
+	__f2fs_submit_merged_bio(sbi, NULL, 0, 0, type, rw);
 }

 void f2fs_submit_merged_bio_cond(struct f2fs_sb_info *sbi,
-				struct inode *inode, struct page *page,
-				nid_t ino, enum page_type type, int rw)
+				struct inode *inode, nid_t ino, pgoff_t idx,
+				enum page_type type, int rw)
 {
-	if (has_merged_page(sbi, inode, page, ino, type))
-		__f2fs_submit_merged_bio(sbi, inode, page, ino, type, rw);
+	if (has_merged_page(sbi, inode, ino, idx, type))
+		__f2fs_submit_merged_bio(sbi, inode, ino, idx, type, rw);
 }

 void f2fs_flush_merged_bios(struct f2fs_sb_info *sbi)
@@ -315,13 +364,14 @@ int f2fs_submit_page_bio(struct f2fs_io_info *fio)
 	return 0;
 }

-void f2fs_submit_page_mbio(struct f2fs_io_info *fio)
+int f2fs_submit_page_mbio(struct f2fs_io_info *fio)
 {
 	struct f2fs_sb_info *sbi = fio->sbi;
 	enum page_type btype = PAGE_TYPE_OF_BIO(fio->type);
 	struct f2fs_bio_info *io;
 	bool is_read = is_read_io(fio->op);
 	struct page *bio_page;
+	int err = 0;

 	io = is_read ? &sbi->read_io : &sbi->write_io[btype];

@@ -331,6 +381,9 @@ void f2fs_submit_page_mbio(struct f2fs_io_info *fio)

 	bio_page = fio->encrypted_page ? fio->encrypted_page : fio->page;

+	/* set submitted = 1 as a return value */
+	fio->submitted = 1;
+
 	if (!is_read)
 		inc_page_count(sbi, WB_DATA_TYPE(bio_page));

@@ -342,6 +395,13 @@ void f2fs_submit_page_mbio(struct f2fs_io_info *fio)
 		__submit_merged_bio(io);
 alloc_new:
 	if (io->bio == NULL) {
+		if ((fio->type == DATA || fio->type == NODE) &&
+				fio->new_blkaddr & F2FS_IO_SIZE_MASK(sbi)) {
+			err = -EAGAIN;
+			if (!is_read)
+				dec_page_count(sbi, WB_DATA_TYPE(bio_page));
+			goto out_fail;
+		}
 		io->bio = __bio_alloc(sbi, fio->new_blkaddr,
 						BIO_MAX_PAGES, is_read);
 		io->fio = *fio;
@@ -355,9 +415,10 @@ void f2fs_submit_page_mbio(struct f2fs_io_info *fio)

 	io->last_block_in_bio = fio->new_blkaddr;
 	f2fs_trace_ios(fio, 0);
-
+out_fail:
 	up_write(&io->io_rwsem);
 	trace_f2fs_submit_page_mbio(fio->page, fio);
+	return err;
 }

 static void __set_data_blkaddr(struct dnode_of_data *dn)
@@ -453,7 +514,7 @@ int f2fs_reserve_block(struct dnode_of_data *dn, pgoff_t index)

 int f2fs_get_block(struct dnode_of_data *dn, pgoff_t index)
 {
-	struct extent_info ei;
+	struct extent_info ei  = {0,0,0};
 	struct inode *inode = dn->inode;

 	if (f2fs_lookup_extent_cache(inode, index, &ei)) {
@@ -470,7 +531,7 @@ struct page *get_read_data_page(struct inode *inode, pgoff_t index,
 	struct address_space *mapping = inode->i_mapping;
 	struct dnode_of_data dn;
 	struct page *page;
-	struct extent_info ei;
+	struct extent_info ei = {0,0,0};
 	int err;
 	struct f2fs_io_info fio = {
 		.sbi = F2FS_I_SB(inode),
@@ -694,6 +755,9 @@ int f2fs_preallocate_blocks(struct kiocb *iocb, struct iov_iter *from)
 	struct f2fs_map_blocks map;
 	int err = 0;

+	if (is_inode_flag_set(inode, FI_NO_PREALLOC))
+		return 0;
+
 	map.m_lblk = F2FS_BLK_ALIGN(iocb->ki_pos);
 	map.m_len = F2FS_BYTES_TO_BLK(iocb->ki_pos + iov_iter_count(from));
 	if (map.m_len > map.m_lblk)
@@ -742,7 +806,7 @@ int f2fs_map_blocks(struct inode *inode, struct f2fs_map_blocks *map,
 	int err = 0, ofs = 1;
 	unsigned int ofs_in_node, last_ofs_in_node;
 	blkcnt_t prealloc;
-	struct extent_info ei;
+	struct extent_info ei = {0,0,0};
 	block_t blkaddr;

 	if (!maxblocks)
@@ -806,7 +870,7 @@ int f2fs_map_blocks(struct inode *inode, struct f2fs_map_blocks *map,
 			}
 			if (err)
 				goto sync_out;
-			map->m_flags = F2FS_MAP_NEW;
+			map->m_flags |= F2FS_MAP_NEW;
 			blkaddr = dn.data_blkaddr;
 		} else {
 			if (flag == F2FS_GET_BLOCK_BMAP) {
@@ -906,7 +970,7 @@ static int __get_data_block(struct inode *inode, sector_t iblock,
 	if (!err) {
 		map_bh(bh, inode->i_sb, map.m_pblk);
 		bh->b_state = (bh->b_state & ~F2FS_MAP_FLAGS) | map.m_flags;
-		bh->b_size = map.m_len << inode->i_blkbits;
+		bh->b_size = (u64)map.m_len << inode->i_blkbits;
 	}
 	return err;
 }
@@ -1088,7 +1152,7 @@ static int f2fs_mpage_readpages(struct address_space *mapping,

 		prefetchw(&page->flags);
 		if (pages) {
-			page = list_entry(pages->prev, struct page, lru);
+			page = list_last_entry(pages, struct page, lru);
 			list_del(&page->lru);
 			if (add_to_page_cache_lru(page, mapping,
 						  page->index,
@@ -1207,7 +1271,7 @@ static int f2fs_read_data_pages(struct file *file,
 			struct list_head *pages, unsigned nr_pages)
 {
 	struct inode *inode = file->f_mapping->host;
-	struct page *page = list_entry(pages->prev, struct page, lru);
+	struct page *page = list_last_entry(pages, struct page, lru);

 	trace_f2fs_readpages(inode, page, nr_pages);

@@ -1288,8 +1352,8 @@ int do_write_data_page(struct f2fs_io_info *fio)
 	return err;
 }

-static int f2fs_write_data_page(struct page *page,
-					struct writeback_control *wbc)
+static int __write_data_page(struct page *page, bool *submitted,
+				struct writeback_control *wbc)
 {
 	struct inode *inode = page->mapping->host;
 	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
@@ -1307,6 +1371,7 @@ static int f2fs_write_data_page(struct page *page,
 		.op_flags = wbc_to_write_flags(wbc),
 		.page = page,
 		.encrypted_page = NULL,
+		.submitted = false,
 	};

 	trace_f2fs_writepage(page, DATA);
@@ -1352,9 +1417,12 @@ static int f2fs_write_data_page(struct page *page,
 		goto redirty_out;

 	err = -EAGAIN;
-	f2fs_lock_op(sbi);
-	if (f2fs_has_inline_data(inode))
+	if (f2fs_has_inline_data(inode)) {
 		err = f2fs_write_inline_data(inode, page);
+		if (!err)
+			goto out;
+	}
+	f2fs_lock_op(sbi);
 	if (err == -EAGAIN)
 		err = do_write_data_page(&fio);
 	if (F2FS_I(inode)->last_disk_size < psize)
@@ -1370,15 +1438,22 @@ static int f2fs_write_data_page(struct page *page,
 		ClearPageUptodate(page);

 	if (wbc->for_reclaim) {
-		f2fs_submit_merged_bio_cond(sbi, NULL, page, 0, DATA, WRITE);
+		f2fs_submit_merged_bio_cond(sbi, inode, 0, page->index,
+						DATA, WRITE);
 		remove_dirty_inode(inode);
+		submitted = NULL;
 	}

 	unlock_page(page);
 	f2fs_balance_fs(sbi, need_balance_fs);

-	if (unlikely(f2fs_cp_error(sbi)))
+	if (unlikely(f2fs_cp_error(sbi))) {
 		f2fs_submit_merged_bio(sbi, DATA, WRITE);
+		submitted = NULL;
+	}
+
+	if (submitted)
+		*submitted = fio.submitted;

 	return 0;

@@ -1390,6 +1465,12 @@ static int f2fs_write_data_page(struct page *page,
 	return err;
 }

+static int f2fs_write_data_page(struct page *page,
+					struct writeback_control *wbc)
+{
+	return __write_data_page(page, NULL, wbc);
+}
+
 /*
 * This function was copied from write_cche_pages from mm/page-writeback.c.
 * The major change is making write step of cold data page separately from
@@ -1406,10 +1487,10 @@ static int f2fs_write_cache_pages(struct address_space *mapping,
 	pgoff_t index;
 	pgoff_t end;		/* Inclusive */
 	pgoff_t done_index;
+	pgoff_t last_idx = ULONG_MAX;
 	int cycled;
 	int range_whole = 0;
 	int tag;
-	int nwritten = 0;

 	pagevec_init(&pvec, 0);

@@ -1446,6 +1527,7 @@ static int f2fs_write_cache_pages(struct address_space *mapping,

 		for (i = 0; i < nr_pages; i++) {
 			struct page *page = pvec.pages[i];
+			bool submitted = false;

 			if (page->index > end) {
 				done = 1;
@@ -1479,7 +1561,7 @@ static int f2fs_write_cache_pages(struct address_space *mapping,
 			if (!clear_page_dirty_for_io(page))
 				goto continue_unlock;

-			ret = mapping->a_ops->writepage(page, wbc);
+			ret = __write_data_page(page, &submitted, wbc);
 			if (unlikely(ret)) {
 				/*
 				 * keep nr_to_write, since vfs uses this to
@@ -1493,8 +1575,8 @@ static int f2fs_write_cache_pages(struct address_space *mapping,
 				done_index = page->index + 1;
 				done = 1;
 				break;
-			} else {
-				nwritten++;
+			} else if (submitted) {
+				last_idx = page->index;
 			}

 			if (--wbc->nr_to_write <= 0 &&
@@ -1516,9 +1598,9 @@ static int f2fs_write_cache_pages(struct address_space *mapping,
 	if (wbc->range_cyclic || (range_whole && wbc->nr_to_write > 0))
 		mapping->writeback_index = done_index;

-	if (nwritten)
+	if (last_idx != ULONG_MAX)
 		f2fs_submit_merged_bio_cond(F2FS_M_SB(mapping), mapping->host,
-							NULL, 0, DATA, WRITE);
+						0, last_idx, DATA, WRITE);

 	return ret;
 }
@@ -1591,14 +1673,15 @@ static int prepare_write_begin(struct f2fs_sb_info *sbi,
 	struct dnode_of_data dn;
 	struct page *ipage;
 	bool locked = false;
-	struct extent_info ei;
+	struct extent_info ei = {0,0,0};
 	int err = 0;

 	/*
 	 * we already allocated all the blocks, so we don't need to get
 	 * the block addresses when there is no need to fill the page.
 	 */
-	if (!f2fs_has_inline_data(inode) && len == PAGE_SIZE)
+	if (!f2fs_has_inline_data(inode) && len == PAGE_SIZE &&
+			!is_inode_flag_set(inode, FI_NO_PREALLOC))
 		return 0;

 	if (f2fs_has_inline_data(inode) ||
@@ -1682,7 +1765,12 @@ static int f2fs_write_begin(struct file *file, struct address_space *mapping,
 			goto fail;
 	}
 repeat:
-	page = grab_cache_page_write_begin(mapping, index, flags);
+	/*
+	 * Do not use grab_cache_page_write_begin() to avoid deadlock due to
+	 * wait_for_stable_page. Will wait that below with our IO control.
+	 */
+	page = pagecache_get_page(mapping, index,
+				FGP_LOCK | FGP_WRITE | FGP_CREAT, GFP_NOFS);
 	if (!page) {
 		err = -ENOMEM;
 		goto fail;
@@ -1715,6 +1803,11 @@ static int f2fs_write_begin(struct file *file, struct address_space *mapping,
 	if (len == PAGE_SIZE || PageUptodate(page))
 		return 0;

+	if (!(pos & (PAGE_SIZE - 1)) && (pos + len) >= i_size_read(inode)) {
+		zero_user_segment(page, len, PAGE_SIZE);
+		return 0;
+	}
+
 	if (blkaddr == NEW_ADDR) {
 		zero_user_segment(page, 0, PAGE_SIZE);
 		SetPageUptodate(page);
@@ -1768,7 +1861,7 @@ static int f2fs_write_end(struct file *file,
 	 * let generic_perform_write() try to copy data again through copied=0.
 	 */
 	if (!PageUptodate(page)) {
-		if (unlikely(copied != PAGE_SIZE))
+		if (unlikely(copied != len))
 			copied = 0;
 		else
 			SetPageUptodate(page);
@@ -1917,7 +2010,7 @@ static int f2fs_set_data_page_dirty(struct page *page)
 	if (!PageUptodate(page))
 		SetPageUptodate(page);

-	if (f2fs_is_atomic_file(inode)) {
+	if (f2fs_is_atomic_file(inode) && !f2fs_is_commit_atomic_write(inode)) {
 		if (!IS_ATOMIC_WRITTEN_PAGE(page)) {
 			register_inmem_page(inode, page);
 			return 1;

--- a/fs/f2fs/debug.c
+++ b/fs/f2fs/debug.c
@@ -50,8 +50,16 @@ static void update_general_status(struct f2fs_sb_info *sbi)
 	si->ndirty_files = sbi->ndirty_inode[FILE_INODE];
 	si->ndirty_all = sbi->ndirty_inode[DIRTY_META];
 	si->inmem_pages = get_pages(sbi, F2FS_INMEM_PAGES);
+	si->aw_cnt = atomic_read(&sbi->aw_cnt);
+	si->max_aw_cnt = atomic_read(&sbi->max_aw_cnt);
 	si->nr_wb_cp_data = get_pages(sbi, F2FS_WB_CP_DATA);
 	si->nr_wb_data = get_pages(sbi, F2FS_WB_DATA);
+	if (SM_I(sbi) && SM_I(sbi)->fcc_info)
+		si->nr_flush =
+			atomic_read(&SM_I(sbi)->fcc_info->submit_flush);
+	if (SM_I(sbi) && SM_I(sbi)->dcc_info)
+		si->nr_discard =
+			atomic_read(&SM_I(sbi)->dcc_info->submit_discard);
 	si->total_count = (int)sbi->user_block_count / sbi->blocks_per_seg;
 	si->rsvd_segs = reserved_segments(sbi);
 	si->overp_segs = overprovision_segments(sbi);
@@ -62,6 +70,8 @@ static void update_general_status(struct f2fs_sb_info *sbi)
 	si->inline_xattr = atomic_read(&sbi->inline_xattr);
 	si->inline_inode = atomic_read(&sbi->inline_inode);
 	si->inline_dir = atomic_read(&sbi->inline_dir);
+	si->append = sbi->im[APPEND_INO].ino_num;
+	si->update = sbi->im[UPDATE_INO].ino_num;
 	si->orphans = sbi->im[ORPHAN_INO].ino_num;
 	si->utilization = utilization(sbi);

@@ -183,6 +193,9 @@ static void update_mem_info(struct f2fs_sb_info *sbi)
 	/* build nm */
 	si->base_mem += sizeof(struct f2fs_nm_info);
 	si->base_mem += __bitmap_size(sbi, NAT_BITMAP);
+	si->base_mem += (NM_I(sbi)->nat_bits_blocks << F2FS_BLKSIZE_BITS);
+	si->base_mem += NM_I(sbi)->nat_blocks * NAT_ENTRY_BITMAP_SIZE;
+	si->base_mem += NM_I(sbi)->nat_blocks / 8;

 get_cache:
 	si->cache_mem = 0;
@@ -192,8 +205,10 @@ static void update_mem_info(struct f2fs_sb_info *sbi)
 		si->cache_mem += sizeof(struct f2fs_gc_kthread);

 	/* build merge flush thread */
-	if (SM_I(sbi)->cmd_control_info)
+	if (SM_I(sbi)->fcc_info)
 		si->cache_mem += sizeof(struct flush_cmd_control);
+	if (SM_I(sbi)->dcc_info)
+		si->cache_mem += sizeof(struct discard_cmd_control);

 	/* free nids */
 	si->cache_mem += (NM_I(sbi)->nid_cnt[FREE_NID_LIST] +
@@ -254,8 +269,8 @@ static int stat_show(struct seq_file *s, void *v)
 			   si->inline_inode);
 		seq_printf(s, "  - Inline_dentry Inode: %u\n",
 			   si->inline_dir);
-		seq_printf(s, "  - Orphan Inode: %u\n",
-			   si->orphans);
+		seq_printf(s, "  - Orphan/Append/Update Inode: %u, %u, %u\n",
+			   si->orphans, si->append, si->update);
 		seq_printf(s, "\nMain area: %d segs, %d secs %d zones\n",
 			   si->main_area_segs, si->main_area_sections,
 			   si->main_area_zones);
@@ -314,8 +329,11 @@ static int stat_show(struct seq_file *s, void *v)
 		seq_printf(s, "  - Inner Struct Count: tree: %d(%d), node: %d\n",
 				si->ext_tree, si->zombie_tree, si->ext_node);
 		seq_puts(s, "\nBalancing F2FS Async:\n");
-		seq_printf(s, "  - inmem: %4d, wb_cp_data: %4d, wb_data: %4d\n",
-			   si->inmem_pages, si->nr_wb_cp_data, si->nr_wb_data);
+		seq_printf(s, "  - IO (CP: %4d, Data: %4d, Flush: %4d, Discard: %4d)\n",
+			   si->nr_wb_cp_data, si->nr_wb_data,
+			   si->nr_flush, si->nr_discard);
+		seq_printf(s, "  - inmem: %4d, atomic IO: %4d (Max. %4d)\n",
+			   si->inmem_pages, si->aw_cnt, si->max_aw_cnt);
 		seq_printf(s, "  - nodes: %4d in %4d\n",
 			   si->ndirty_node, si->node_pages);
 		seq_printf(s, "  - dents: %4d in dirs:%4d (%4d)\n",
@@ -414,6 +432,9 @@ int f2fs_build_stats(struct f2fs_sb_info *sbi)
 	atomic_set(&sbi->inline_dir, 0);
 	atomic_set(&sbi->inplace_count, 0);

+	atomic_set(&sbi->aw_cnt, 0);
+	atomic_set(&sbi->max_aw_cnt, 0);
+
 	mutex_lock(&f2fs_stat_mutex);
 	list_add_tail(&si->stat_list, &f2fs_stat_list);
 	mutex_unlock(&f2fs_stat_mutex);

--- a/fs/f2fs/dir.c
+++ b/fs/f2fs/dir.c
@@ -207,9 +207,13 @@ static struct f2fs_dir_entry *find_in_level(struct inode *dir,
 		f2fs_put_page(dentry_page, 0);
 	}

-	if (!de && room && F2FS_I(dir)->chash != namehash) {
-		F2FS_I(dir)->chash = namehash;
-		F2FS_I(dir)->clevel = level;
+	/* This is to increase the speed of f2fs_create */
+	if (!de && room) {
+		F2FS_I(dir)->task = current;
+		if (F2FS_I(dir)->chash != namehash) {
+			F2FS_I(dir)->chash = namehash;
+			F2FS_I(dir)->clevel = level;
+		}
 	}

 	return de;
@@ -548,8 +552,10 @@ int f2fs_add_regular_entry(struct inode *dir, const struct qstr *new_name,

 start:
 #ifdef CONFIG_F2FS_FAULT_INJECTION
-	if (time_to_inject(F2FS_I_SB(dir), FAULT_DIR_DEPTH))
+	if (time_to_inject(F2FS_I_SB(dir), FAULT_DIR_DEPTH)) {
+		f2fs_show_injection_info(FAULT_DIR_DEPTH);
 		return -ENOSPC;
+	}
 #endif
 	if (unlikely(current_depth == MAX_DIR_HASH_DEPTH))
 		return -ENOSPC;
@@ -646,14 +652,34 @@ int __f2fs_add_link(struct inode *dir, const struct qstr *name,
 				struct inode *inode, nid_t ino, umode_t mode)
 {
 	struct fscrypt_name fname;
+	struct page *page = NULL;
+	struct f2fs_dir_entry *de = NULL;
 	int err;

 	err = fscrypt_setup_filename(dir, name, 0, &fname);
 	if (err)
 		return err;

-	err = __f2fs_do_add_link(dir, &fname, inode, ino, mode);
-
+	/*
+	 * An immature stakable filesystem shows a race condition between lookup
+	 * and create. If we have same task when doing lookup and create, it's
+	 * definitely fine as expected by VFS normally. Otherwise, let's just
+	 * verify on-disk dentry one more time, which guarantees filesystem
+	 * consistency more.
+	 */
+	if (current != F2FS_I(dir)->task) {
+		de = __f2fs_find_entry(dir, &fname, &page);
+		F2FS_I(dir)->task = NULL;
+	}
+	if (de) {
+		f2fs_dentry_kunmap(dir, page);
+		f2fs_put_page(page, 0);
+		err = -EEXIST;
+	} else if (IS_ERR(page)) {
+		err = PTR_ERR(page);
+	} else {
+		err = __f2fs_do_add_link(dir, &fname, inode, ino, mode);
+	}
 	fscrypt_free_filename(&fname);
 	return err;
 }

--- a/fs/f2fs/extent_cache.c
+++ b/fs/f2fs/extent_cache.c
@@ -77,7 +77,7 @@ static struct extent_tree *__grab_extent_tree(struct inode *inode)
 	struct extent_tree *et;
 	nid_t ino = inode->i_ino;

-	down_write(&sbi->extent_tree_lock);
+	mutex_lock(&sbi->extent_tree_lock);
 	et = radix_tree_lookup(&sbi->extent_tree_root, ino);
 	if (!et) {
 		et = f2fs_kmem_cache_alloc(extent_tree_slab, GFP_NOFS);
@@ -94,7 +94,7 @@ static struct extent_tree *__grab_extent_tree(struct inode *inode)
 		atomic_dec(&sbi->total_zombie_tree);
 		list_del_init(&et->list);
 	}
-	up_write(&sbi->extent_tree_lock);
+	mutex_unlock(&sbi->extent_tree_lock);

 	/* never died until evict_inode */
 	F2FS_I(inode)->extent_tree = et;
@@ -311,28 +311,24 @@ static struct extent_node *__lookup_extent_tree_ret(struct extent_tree *et,
 	tmp_node = parent;
 	if (parent && fofs > en->ei.fofs)
 		tmp_node = rb_next(parent);
-	*next_ex = tmp_node ?
-		rb_entry(tmp_node, struct extent_node, rb_node) : NULL;
+	*next_ex = rb_entry_safe(tmp_node, struct extent_node, rb_node);

 	tmp_node = parent;
 	if (parent && fofs < en->ei.fofs)
 		tmp_node = rb_prev(parent);
-	*prev_ex = tmp_node ?
-		rb_entry(tmp_node, struct extent_node, rb_node) : NULL;
+	*prev_ex = rb_entry_safe(tmp_node, struct extent_node, rb_node);
 	return NULL;

 lookup_neighbors:
 	if (fofs == en->ei.fofs) {
 		/* lookup prev node for merging backward later */
 		tmp_node = rb_prev(&en->rb_node);
-		*prev_ex = tmp_node ?
-			rb_entry(tmp_node, struct extent_node, rb_node) : NULL;
+		*prev_ex = rb_entry_safe(tmp_node, struct extent_node, rb_node);
 	}
 	if (fofs == en->ei.fofs + en->ei.len - 1) {
 		/* lookup next node for merging frontward later */
 		tmp_node = rb_next(&en->rb_node);
-		*next_ex = tmp_node ?
-			rb_entry(tmp_node, struct extent_node, rb_node) : NULL;
+		*next_ex = rb_entry_safe(tmp_node, struct extent_node, rb_node);
 	}
 	return en;
 }
@@ -352,11 +348,12 @@ static struct extent_node *__try_merge_extent_node(struct inode *inode,
 	}

 	if (next_ex && __is_front_mergeable(ei, &next_ex->ei)) {
-		if (en)
-			__release_extent_node(sbi, et, prev_ex);
 		next_ex->ei.fofs = ei->fofs;
 		next_ex->ei.blk = ei->blk;
 		next_ex->ei.len += ei->len;
+		if (en)
+			__release_extent_node(sbi, et, prev_ex);
+
 		en = next_ex;
 	}

@@ -416,7 +413,7 @@ static struct extent_node *__insert_extent_tree(struct inode *inode,
 	return en;
 }

-static unsigned int f2fs_update_extent_tree_range(struct inode *inode,
+static void f2fs_update_extent_tree_range(struct inode *inode,
 				pgoff_t fofs, block_t blkaddr, unsigned int len)
 {
 	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
@@ -429,7 +426,7 @@ static unsigned int f2fs_update_extent_tree_range(struct inode *inode,
 	unsigned int pos = (unsigned int)fofs;

 	if (!et)
-		return false;
+		return;

 	trace_f2fs_update_extent_tree_range(inode, fofs, blkaddr, len);

@@ -437,7 +434,7 @@ static unsigned int f2fs_update_extent_tree_range(struct inode *inode,

 	if (is_inode_flag_set(inode, FI_NO_EXTENT)) {
 		write_unlock(&et->lock);
-		return false;
+		return;
 	}

 	prev = et->largest;
@@ -492,9 +489,8 @@ static unsigned int f2fs_update_extent_tree_range(struct inode *inode,
 		if (!next_en) {
 			struct rb_node *node = rb_next(&en->rb_node);

-			next_en = node ?
-				rb_entry(node, struct extent_node, rb_node)
-				: NULL;
+			next_en = rb_entry_safe(node, struct extent_node,
+						rb_node);
 		}

 		if (parts)
@@ -535,8 +531,6 @@ static unsigned int f2fs_update_extent_tree_range(struct inode *inode,
 		__free_extent_tree(sbi, et);

 	write_unlock(&et->lock);
-
-	return !__is_extent_same(&prev, &et->largest);
 }

 unsigned int f2fs_shrink_extent_tree(struct f2fs_sb_info *sbi, int nr_shrink)
@@ -552,7 +546,7 @@ unsigned int f2fs_shrink_extent_tree(struct f2fs_sb_info *sbi, int nr_shrink)
 	if (!atomic_read(&sbi->total_zombie_tree))
 		goto free_node;

-	if (!down_write_trylock(&sbi->extent_tree_lock))
+	if (!mutex_trylock(&sbi->extent_tree_lock))
 		goto out;

 	/* 1. remove unreferenced extent tree */
@@ -574,11 +568,11 @@ unsigned int f2fs_shrink_extent_tree(struct f2fs_sb_info *sbi, int nr_shrink)
 			goto unlock_out;
 		cond_resched();
 	}
-	up_write(&sbi->extent_tree_lock);
+	mutex_unlock(&sbi->extent_tree_lock);

 free_node:
 	/* 2. remove LRU extent entries */
-	if (!down_write_trylock(&sbi->extent_tree_lock))
+	if (!mutex_trylock(&sbi->extent_tree_lock))
 		goto out;

 	remained = nr_shrink - (node_cnt + tree_cnt);
@@ -608,7 +602,7 @@ unsigned int f2fs_shrink_extent_tree(struct f2fs_sb_info *sbi, int nr_shrink)
 	spin_unlock(&sbi->extent_lock);

 unlock_out:
-	up_write(&sbi->extent_tree_lock);
+	mutex_unlock(&sbi->extent_tree_lock);
 out:
 	trace_f2fs_shrink_extent_tree(sbi, node_cnt, tree_cnt);

@@ -655,10 +649,10 @@ void f2fs_destroy_extent_tree(struct inode *inode)

 	if (inode->i_nlink && !is_bad_inode(inode) &&
 					atomic_read(&et->node_cnt)) {
-		down_write(&sbi->extent_tree_lock);
+		mutex_lock(&sbi->extent_tree_lock);
 		list_add_tail(&et->list, &sbi->zombie_list);
 		atomic_inc(&sbi->total_zombie_tree);
-		up_write(&sbi->extent_tree_lock);
+		mutex_unlock(&sbi->extent_tree_lock);
 		return;
 	}

@@ -666,12 +660,12 @@ void f2fs_destroy_extent_tree(struct inode *inode)
 	node_cnt = f2fs_destroy_extent_node(inode);

 	/* delete extent tree entry in radix tree */
-	down_write(&sbi->extent_tree_lock);
+	mutex_lock(&sbi->extent_tree_lock);
 	f2fs_bug_on(sbi, atomic_read(&et->node_cnt));
 	radix_tree_delete(&sbi->extent_tree_root, inode->i_ino);
 	kmem_cache_free(extent_tree_slab, et);
 	atomic_dec(&sbi->total_ext_tree);
-	up_write(&sbi->extent_tree_lock);
+	mutex_unlock(&sbi->extent_tree_lock);

 	F2FS_I(inode)->extent_tree = NULL;

@@ -718,7 +712,7 @@ void f2fs_update_extent_cache_range(struct dnode_of_data *dn,
 void init_extent_cache_info(struct f2fs_sb_info *sbi)
 {
 	INIT_RADIX_TREE(&sbi->extent_tree_root, GFP_NOIO);
-	init_rwsem(&sbi->extent_tree_lock);
+	mutex_init(&sbi->extent_tree_lock);
 	INIT_LIST_HEAD(&sbi->extent_list);
 	spin_lock_init(&sbi->extent_lock);
 	atomic_set(&sbi->total_ext_tree, 0);

--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
--- a/fs/f2fs/file.c
+++ b/fs/f2fs/file.c
@@ -20,6 +20,7 @@
 #include <linux/uaccess.h>
 #include <linux/mount.h>
 #include <linux/pagevec.h>
+#include <linux/uio.h>
 #include <linux/uuid.h>
 #include <linux/file.h>

@@ -140,8 +141,6 @@ static inline bool need_do_checkpoint(struct inode *inode)
 		need_cp = true;
 	else if (!is_checkpointed_node(sbi, F2FS_I(inode)->i_pino))
 		need_cp = true;
-	else if (F2FS_I(inode)->xattr_ver == cur_cp_version(F2FS_CKPT(sbi)))
-		need_cp = true;
 	else if (test_opt(sbi, FASTBOOT))
 		need_cp = true;
 	else if (sbi->active_logs == 2)
@@ -167,7 +166,6 @@ static void try_to_fix_pino(struct inode *inode)
 	nid_t pino;

 	down_write(&fi->i_sem);
-	fi->xattr_ver = 0;
 	if (file_wrong_pino(inode) && inode->i_nlink == 1 &&
 			get_parent_ino(inode, &pino)) {
 		f2fs_i_pino_write(inode, pino);
@@ -276,7 +274,8 @@ static int f2fs_do_sync_file(struct file *file, loff_t start, loff_t end,
 flush_out:
 	remove_ino_entry(sbi, ino, UPDATE_INO);
 	clear_inode_flag(inode, FI_UPDATE_WRITE);
-	ret = f2fs_issue_flush(sbi);
+	if (!atomic)
+		ret = f2fs_issue_flush(sbi);
 	f2fs_update_time(sbi, REQ_TIME);
 out:
 	trace_f2fs_sync_file_exit(inode, need_cp, datasync, ret);
@@ -567,8 +566,9 @@ int truncate_blocks(struct inode *inode, u64 from, bool lock)
 	}

 	if (f2fs_has_inline_data(inode)) {
-		if (truncate_inline_inode(ipage, from))
-			set_page_dirty(ipage);
+		truncate_inline_inode(ipage, from);
+		if (from == 0)
+			clear_inode_flag(inode, FI_DATA_EXIST);
 		f2fs_put_page(ipage, 1);
 		truncate_page = true;
 		goto out;
@@ -1541,6 +1541,8 @@ static int f2fs_ioc_start_atomic_write(struct file *filp)
 	if (ret)
 		clear_inode_flag(inode, FI_ATOMIC_FILE);
 out:
+	stat_inc_atomic_write(inode);
+	stat_update_max_atomic_write(inode);
 	inode_unlock(inode);
 	mnt_drop_write_file(filp);
 	return ret;
@@ -1564,15 +1566,18 @@ static int f2fs_ioc_commit_atomic_write(struct file *filp)
 		goto err_out;

 	if (f2fs_is_atomic_file(inode)) {
-		clear_inode_flag(inode, FI_ATOMIC_FILE);
 		ret = commit_inmem_pages(inode);
-		if (ret) {
-			set_inode_flag(inode, FI_ATOMIC_FILE);
+		if (ret)
 			goto err_out;
+
+		ret = f2fs_do_sync_file(filp, 0, LLONG_MAX, 0, true);
+		if (!ret) {
+			clear_inode_flag(inode, FI_ATOMIC_FILE);
+			stat_dec_atomic_write(inode);
 		}
+	} else {
+		ret = f2fs_do_sync_file(filp, 0, LLONG_MAX, 0, true);
 	}
-
-	ret = f2fs_do_sync_file(filp, 0, LLONG_MAX, 0, true);
 err_out:
 	inode_unlock(inode);
 	mnt_drop_write_file(filp);
@@ -1870,7 +1875,7 @@ static int f2fs_defragment_range(struct f2fs_sb_info *sbi,
 {
 	struct inode *inode = file_inode(filp);
 	struct f2fs_map_blocks map = { .m_next_pgofs = NULL };
-	struct extent_info ei;
+	struct extent_info ei = {0,0,0};
 	pgoff_t pg_start, pg_end;
 	unsigned int blk_per_seg = sbi->blocks_per_seg;
 	unsigned int total = 0, sec_num;
@@ -2250,8 +2255,12 @@ static ssize_t f2fs_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
 	inode_lock(inode);
 	ret = generic_write_checks(iocb, from);
 	if (ret > 0) {
-		int err = f2fs_preallocate_blocks(iocb, from);
+		int err;
+
+		if (iov_iter_fault_in_readable(from, iov_iter_count(from)))
+			set_inode_flag(inode, FI_NO_PREALLOC);

+		err = f2fs_preallocate_blocks(iocb, from);
 		if (err) {
 			inode_unlock(inode);
 			return err;
@@ -2259,6 +2268,7 @@ static ssize_t f2fs_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
 		blk_start_plug(&plug);
 		ret = __generic_file_write_iter(iocb, from);
 		blk_finish_plug(&plug);
+		clear_inode_flag(inode, FI_NO_PREALLOC);
 	}
 	inode_unlock(inode);


--- a/fs/f2fs/gc.c
+++ b/fs/f2fs/gc.c
@@ -48,8 +48,10 @@ static int gc_thread_func(void *data)
 		}

 #ifdef CONFIG_F2FS_FAULT_INJECTION
-		if (time_to_inject(sbi, FAULT_CHECKPOINT))
+		if (time_to_inject(sbi, FAULT_CHECKPOINT)) {
+			f2fs_show_injection_info(FAULT_CHECKPOINT);
 			f2fs_stop_checkpoint(sbi, false);
+		}
 #endif

 		/*
@@ -166,7 +168,8 @@ static void select_policy(struct f2fs_sb_info *sbi, int gc_type,
 		p->ofs_unit = sbi->segs_per_sec;
 	}

-	if (p->max_search > sbi->max_victim_search)
+	/* we need to check every dirty segments in the FG_GC case */
+	if (gc_type != FG_GC && p->max_search > sbi->max_victim_search)
 		p->max_search = sbi->max_victim_search;

 	p->offset = sbi->last_victim[p->gc_mode];
@@ -199,6 +202,10 @@ static unsigned int check_bg_victims(struct f2fs_sb_info *sbi)
 	for_each_set_bit(secno, dirty_i->victim_secmap, MAIN_SECS(sbi)) {
 		if (sec_usage_check(sbi, secno))
 			continue;
+
+		if (no_fggc_candidate(sbi, secno))
+			continue;
+
 		clear_bit(secno, dirty_i->victim_secmap);
 		return secno * sbi->segs_per_sec;
 	}
@@ -237,6 +244,16 @@ static unsigned int get_cb_cost(struct f2fs_sb_info *sbi, unsigned int segno)
 	return UINT_MAX - ((100 * (100 - u) * age) / (100 + u));
 }

+static unsigned int get_greedy_cost(struct f2fs_sb_info *sbi,
+						unsigned int segno)
+{
+	unsigned int valid_blocks =
+			get_valid_blocks(sbi, segno, sbi->segs_per_sec);
+
+	return IS_DATASEG(get_seg_entry(sbi, segno)->type) ?
+				valid_blocks * 2 : valid_blocks;
+}
+
 static inline unsigned int get_gc_cost(struct f2fs_sb_info *sbi,
 			unsigned int segno, struct victim_sel_policy *p)
 {
@@ -245,7 +262,7 @@ static inline unsigned int get_gc_cost(struct f2fs_sb_info *sbi,

 	/* alloc_mode == LFS */
 	if (p->gc_mode == GC_GREEDY)
-		return get_valid_blocks(sbi, segno, sbi->segs_per_sec);
+		return get_greedy_cost(sbi, segno);
 	else
 		return get_cb_cost(sbi, segno);
 }
@@ -322,13 +339,15 @@ static int get_victim_by_default(struct f2fs_sb_info *sbi,
 			nsearched++;
 		}

-
 		secno = GET_SECNO(sbi, segno);

 		if (sec_usage_check(sbi, secno))
 			goto next;
 		if (gc_type == BG_GC && test_bit(secno, dirty_i->victim_secmap))
 			goto next;
+		if (gc_type == FG_GC && p.alloc_mode == LFS &&
+					no_fggc_candidate(sbi, secno))
+			goto next;

 		cost = get_gc_cost(sbi, segno, &p);

@@ -569,6 +588,9 @@ static void move_encrypted_block(struct inode *inode, block_t bidx,
 	if (!check_valid_map(F2FS_I_SB(inode), segno, off))
 		goto out;

+	if (f2fs_is_atomic_file(inode))
+		goto out;
+
 	set_new_dnode(&dn, inode, NULL, NULL, 0);
 	err = get_dnode_of_data(&dn, bidx, LOOKUP_NODE);
 	if (err)
@@ -661,6 +683,9 @@ static void move_data_page(struct inode *inode, block_t bidx, int gc_type,
 	if (!check_valid_map(F2FS_I_SB(inode), segno, off))
 		goto out;

+	if (f2fs_is_atomic_file(inode))
+		goto out;
+
 	if (gc_type == BG_GC) {
 		if (PageWriteback(page))
 			goto out;
@@ -921,8 +946,6 @@ int f2fs_gc(struct f2fs_sb_info *sbi, bool sync, bool background)

 	cpc.reason = __get_cp_reason(sbi);
 gc_more:
-	segno = NULL_SEGNO;
-
 	if (unlikely(!(sbi->sb->s_flags & MS_ACTIVE)))
 		goto stop;
 	if (unlikely(f2fs_cp_error(sbi))) {
@@ -930,30 +953,23 @@ int f2fs_gc(struct f2fs_sb_info *sbi, bool sync, bool background)
 		goto stop;
 	}

-	if (gc_type == BG_GC && has_not_enough_free_secs(sbi, sec_freed, 0)) {
-		gc_type = FG_GC;
+	if (gc_type == BG_GC && has_not_enough_free_secs(sbi, 0, 0)) {
 		/*
-		 * If there is no victim and no prefree segment but still not
-		 * enough free sections, we should flush dent/node blocks and do
-		 * garbage collections.
+		 * For example, if there are many prefree_segments below given
+		 * threshold, we can make them free by checkpoint. Then, we
+		 * secure free segments which doesn't need fggc any more.
 		 */
-		if (__get_victim(sbi, &segno, gc_type) ||
-						prefree_segments(sbi)) {
-			ret = write_checkpoint(sbi, &cpc);
-			if (ret)
-				goto stop;
-			segno = NULL_SEGNO;
-		} else if (has_not_enough_free_secs(sbi, 0, 0)) {
-			ret = write_checkpoint(sbi, &cpc);
-			if (ret)
-				goto stop;
-		}
-	} else if (gc_type == BG_GC && !background) {
-		/* f2fs_balance_fs doesn't need to do BG_GC in critical path. */
-		goto stop;
+		ret = write_checkpoint(sbi, &cpc);
+		if (ret)
+			goto stop;
+		if (has_not_enough_free_secs(sbi, 0, 0))
+			gc_type = FG_GC;
 	}

-	if (segno == NULL_SEGNO && !__get_victim(sbi, &segno, gc_type))
+	/* f2fs_balance_fs doesn't need to do BG_GC in critical path. */
+	if (gc_type == BG_GC && !background)
+		goto stop;
+	if (!__get_victim(sbi, &segno, gc_type))
 		goto stop;
 	ret = 0;

@@ -983,5 +999,16 @@ int f2fs_gc(struct f2fs_sb_info *sbi, bool sync, bool background)

 void build_gc_manager(struct f2fs_sb_info *sbi)
 {
+	u64 main_count, resv_count, ovp_count, blocks_per_sec;
+
 	DIRTY_I(sbi)->v_ops = &default_v_ops;
+
+	/* threshold of # of valid blocks in a section for victims of FG_GC */
+	main_count = SM_I(sbi)->main_segments << sbi->log_blocks_per_seg;
+	resv_count = SM_I(sbi)->reserved_segments << sbi->log_blocks_per_seg;
+	ovp_count = SM_I(sbi)->ovp_segments << sbi->log_blocks_per_seg;
+	blocks_per_sec = sbi->blocks_per_seg * sbi->segs_per_sec;
+
+	sbi->fggc_threshold = div64_u64((main_count - ovp_count) * blocks_per_sec,
+					(main_count - resv_count));
 }
--- a/fs/f2fs/inode.c
+++ b/fs/f2fs/inode.c
@@ -373,8 +373,10 @@ void f2fs_evict_inode(struct inode *inode)
 		goto no_delete;

 #ifdef CONFIG_F2FS_FAULT_INJECTION
-	if (time_to_inject(sbi, FAULT_EVICT_INODE))
+	if (time_to_inject(sbi, FAULT_EVICT_INODE)) {
+		f2fs_show_injection_info(FAULT_EVICT_INODE);
 		goto no_delete;
+	}
 #endif

 	remove_ino_entry(sbi, inode->i_ino, APPEND_INO);

--- a/fs/f2fs/namei.c
+++ b/fs/f2fs/namei.c
@@ -321,9 +321,9 @@ static struct dentry *f2fs_lookup(struct inode *dir, struct dentry *dentry,
 		if (err)
 			goto err_out;
 	}
-	if (!IS_ERR(inode) && f2fs_encrypted_inode(dir) &&
-			(S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode)) &&
-			!fscrypt_has_permitted_context(dir, inode)) {
+	if (f2fs_encrypted_inode(dir) &&
+	    (S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode)) &&
+	    !fscrypt_has_permitted_context(dir, inode)) {
 		bool nokey = f2fs_encrypted_inode(inode) &&
 			!fscrypt_has_encryption_key(inode);
 		err = nokey ? -ENOKEY : -EPERM;
@@ -663,6 +663,12 @@ static int f2fs_rename(struct inode *old_dir, struct dentry *old_dentry,
 	bool is_old_inline = f2fs_has_inline_dentry(old_dir);
 	int err = -ENOENT;

+	if ((f2fs_encrypted_inode(old_dir) &&
+			!fscrypt_has_encryption_key(old_dir)) ||
+			(f2fs_encrypted_inode(new_dir) &&
+			!fscrypt_has_encryption_key(new_dir)))
+		return -ENOKEY;
+
 	if ((old_dir != new_dir) && f2fs_encrypted_inode(new_dir) &&
 			!fscrypt_has_permitted_context(new_dir, old_inode)) {
 		err = -EPERM;
@@ -843,6 +849,12 @@ static int f2fs_cross_rename(struct inode *old_dir, struct dentry *old_dentry,
 	int old_nlink = 0, new_nlink = 0;
 	int err = -ENOENT;

+	if ((f2fs_encrypted_inode(old_dir) &&
+			!fscrypt_has_encryption_key(old_dir)) ||
+			(f2fs_encrypted_inode(new_dir) &&
+			!fscrypt_has_encryption_key(new_dir)))
+		return -ENOKEY;
+
 	if ((f2fs_encrypted_inode(old_dir) || f2fs_encrypted_inode(new_dir)) &&
 			(old_dir != new_dir) &&
 			(!fscrypt_has_permitted_context(new_dir, old_inode) ||

--- a/fs/f2fs/node.c
+++ b/fs/f2fs/node.c
--- a/fs/f2fs/node.h
+++ b/fs/f2fs/node.h
@@ -174,7 +174,7 @@ static inline void next_free_nid(struct f2fs_sb_info *sbi, nid_t *nid)
 		spin_unlock(&nm_i->nid_list_lock);
 		return;
 	}
-	fnid = list_entry(nm_i->nid_list[FREE_NID_LIST].next,
+	fnid = list_first_entry(&nm_i->nid_list[FREE_NID_LIST],
 						struct free_nid, list);
 	*nid = fnid->nid;
 	spin_unlock(&nm_i->nid_list_lock);
@@ -186,6 +186,12 @@ static inline void next_free_nid(struct f2fs_sb_info *sbi, nid_t *nid)
 static inline void get_nat_bitmap(struct f2fs_sb_info *sbi, void *addr)
 {
 	struct f2fs_nm_info *nm_i = NM_I(sbi);
+
+#ifdef CONFIG_F2FS_CHECK_FS
+	if (memcmp(nm_i->nat_bitmap, nm_i->nat_bitmap_mir,
+						nm_i->bitmap_size))
+		f2fs_bug_on(sbi, 1);
+#endif
 	memcpy(addr, nm_i->nat_bitmap, nm_i->bitmap_size);
 }

@@ -228,6 +234,9 @@ static inline void set_to_next_nat(struct f2fs_nm_info *nm_i, nid_t start_nid)
 	unsigned int block_off = NAT_BLOCK_OFFSET(start_nid);

 	f2fs_change_bit(block_off, nm_i->nat_bitmap);
+#ifdef CONFIG_F2FS_CHECK_FS
+	f2fs_change_bit(block_off, nm_i->nat_bitmap_mir);
+#endif
 }

 static inline nid_t ino_of_node(struct page *node_page)
@@ -291,14 +300,11 @@ static inline void fill_node_footer_blkaddr(struct page *page, block_t blkaddr)
 {
 	struct f2fs_checkpoint *ckpt = F2FS_CKPT(F2FS_P_SB(page));
 	struct f2fs_node *rn = F2FS_NODE(page);
-	size_t crc_offset = le32_to_cpu(ckpt->checksum_offset);
-	__u64 cp_ver = le64_to_cpu(ckpt->checkpoint_ver);
+	__u64 cp_ver = cur_cp_version(ckpt);
+
+	if (__is_set_ckpt_flags(ckpt, CP_CRC_RECOVERY_FLAG))
+		cp_ver |= (cur_cp_crc(ckpt) << 32);

-	if (__is_set_ckpt_flags(ckpt, CP_CRC_RECOVERY_FLAG)) {
-		__u64 crc = le32_to_cpu(*((__le32 *)
-				((unsigned char *)ckpt + crc_offset)));
-		cp_ver |= (crc << 32);
-	}
 	rn->footer.cp_ver = cpu_to_le64(cp_ver);
 	rn->footer.next_blkaddr = cpu_to_le32(blkaddr);
 }
@@ -306,14 +312,11 @@ static inline void fill_node_footer_blkaddr(struct page *page, block_t blkaddr)
 static inline bool is_recoverable_dnode(struct page *page)
 {
 	struct f2fs_checkpoint *ckpt = F2FS_CKPT(F2FS_P_SB(page));
-	size_t crc_offset = le32_to_cpu(ckpt->checksum_offset);
 	__u64 cp_ver = cur_cp_version(ckpt);

-	if (__is_set_ckpt_flags(ckpt, CP_CRC_RECOVERY_FLAG)) {
-		__u64 crc = le32_to_cpu(*((__le32 *)
-				((unsigned char *)ckpt + crc_offset)));
-		cp_ver |= (crc << 32);
-	}
+	if (__is_set_ckpt_flags(ckpt, CP_CRC_RECOVERY_FLAG))
+		cp_ver |= (cur_cp_crc(ckpt) << 32);
+
 	return cp_ver == cpver_of_node(page);
 }

@@ -343,7 +346,7 @@ static inline bool IS_DNODE(struct page *node_page)
 	unsigned int ofs = ofs_of_node(node_page);

 	if (f2fs_has_xattr_block(ofs))
-		return false;
+		return true;

 	if (ofs == 3 || ofs == 4 + NIDS_PER_BLOCK ||
 			ofs == 5 + 2 * NIDS_PER_BLOCK)

--- a/fs/f2fs/recovery.c
+++ b/fs/f2fs/recovery.c
@@ -378,11 +378,9 @@ static int do_recover_data(struct f2fs_sb_info *sbi, struct inode *inode,
 	if (IS_INODE(page)) {
 		recover_inline_xattr(inode, page);
 	} else if (f2fs_has_xattr_block(ofs_of_node(page))) {
-		/*
-		 * Deprecated; xattr blocks should be found from cold log.
-		 * But, we should remain this for backward compatibility.
-		 */
-		recover_xattr_data(inode, page, blkaddr);
+		err = recover_xattr_data(inode, page, blkaddr);
+		if (!err)
+			recovered++;
 		goto out;
 	}

@@ -428,8 +426,9 @@ static int do_recover_data(struct f2fs_sb_info *sbi, struct inode *inode,
 		}

 		if (!file_keep_isize(inode) &&
-				(i_size_read(inode) <= (start << PAGE_SHIFT)))
-			f2fs_i_size_write(inode, (start + 1) << PAGE_SHIFT);
+			(i_size_read(inode) <= ((loff_t)start << PAGE_SHIFT)))
+			f2fs_i_size_write(inode,
+				(loff_t)(start + 1) << PAGE_SHIFT);

 		/*
 		 * dest is reserved block, invalidate src block
@@ -552,10 +551,8 @@ static int recover_data(struct f2fs_sb_info *sbi, struct list_head *inode_list,

 int recover_fsync_data(struct f2fs_sb_info *sbi, bool check_only)
 {
-	struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_WARM_NODE);
 	struct list_head inode_list;
 	struct list_head dir_list;
-	block_t blkaddr;
 	int err;
 	int ret = 0;
 	bool need_writecp = false;
@@ -571,8 +568,6 @@ int recover_fsync_data(struct f2fs_sb_info *sbi, bool check_only)
 	/* prevent checkpoint */
 	mutex_lock(&sbi->cp_mutex);

-	blkaddr = NEXT_FREE_BLKADDR(sbi, curseg);
-
 	/* step #1: find fsynced inode numbers */
 	err = find_fsync_dnodes(sbi, &inode_list);
 	if (err || list_empty(&inode_list))

--- a/fs/f2fs/segment.c
+++ b/fs/f2fs/segment.c
--- a/fs/f2fs/segment.h
+++ b/fs/f2fs/segment.h
@@ -164,6 +164,9 @@ struct seg_entry {
 	unsigned int ckpt_valid_blocks:10;	/* # of valid blocks last cp */
 	unsigned int padding:6;		/* padding */
 	unsigned char *cur_valid_map;	/* validity bitmap of blocks */
+#ifdef CONFIG_F2FS_CHECK_FS
+	unsigned char *cur_valid_map_mir;	/* mirror of current valid bitmap */
+#endif
 	/*
 	 * # of valid blocks and the validity bitmap stored in the the last
 	 * checkpoint pack. This information is used by the SSR mode.
@@ -186,9 +189,12 @@ struct segment_allocation {
 * the page is atomically written, and it is in inmem_pages list.
 */
 #define ATOMIC_WRITTEN_PAGE		((unsigned long)-1)
+#define DUMMY_WRITTEN_PAGE		((unsigned long)-2)

 #define IS_ATOMIC_WRITTEN_PAGE(page)			\
 		(page_private(page) == (unsigned long)ATOMIC_WRITTEN_PAGE)
+#define IS_DUMMY_WRITTEN_PAGE(page)			\
+		(page_private(page) == (unsigned long)DUMMY_WRITTEN_PAGE)

 struct inmem_pages {
 	struct list_head list;
@@ -203,6 +209,9 @@ struct sit_info {
 	block_t sit_blocks;		/* # of blocks used by SIT area */
 	block_t written_valid_blocks;	/* # of valid blocks in main area */
 	char *sit_bitmap;		/* SIT bitmap pointer */
+#ifdef CONFIG_F2FS_CHECK_FS
+	char *sit_bitmap_mir;		/* SIT bitmap mirror */
+#endif
 	unsigned int bitmap_size;	/* SIT bitmap size */

 	unsigned long *tmp_map;			/* bitmap for temporal use */
@@ -317,6 +326,9 @@ static inline void seg_info_from_raw_sit(struct seg_entry *se,
 	se->ckpt_valid_blocks = GET_SIT_VBLOCKS(rs);
 	memcpy(se->cur_valid_map, rs->valid_map, SIT_VBLOCK_MAP_SIZE);
 	memcpy(se->ckpt_valid_map, rs->valid_map, SIT_VBLOCK_MAP_SIZE);
+#ifdef CONFIG_F2FS_CHECK_FS
+	memcpy(se->cur_valid_map_mir, rs->valid_map, SIT_VBLOCK_MAP_SIZE);
+#endif
 	se->type = GET_SIT_TYPE(rs);
 	se->mtime = le64_to_cpu(rs->mtime);
 }
@@ -414,6 +426,12 @@ static inline void get_sit_bitmap(struct f2fs_sb_info *sbi,
 		void *dst_addr)
 {
 	struct sit_info *sit_i = SIT_I(sbi);
+
+#ifdef CONFIG_F2FS_CHECK_FS
+	if (memcmp(sit_i->sit_bitmap, sit_i->sit_bitmap_mir,
+						sit_i->bitmap_size))
+		f2fs_bug_on(sbi, 1);
+#endif
 	memcpy(dst_addr, sit_i->sit_bitmap, sit_i->bitmap_size);
 }

@@ -634,6 +652,12 @@ static inline pgoff_t current_sit_addr(struct f2fs_sb_info *sbi,

 	check_seg_range(sbi, start);

+#ifdef CONFIG_F2FS_CHECK_FS
+	if (f2fs_test_bit(offset, sit_i->sit_bitmap) !=
+			f2fs_test_bit(offset, sit_i->sit_bitmap_mir))
+		f2fs_bug_on(sbi, 1);
+#endif
+
 	/* calculate sit block address */
 	if (f2fs_test_bit(offset, sit_i->sit_bitmap))
 		blk_addr += sit_i->sit_blocks;
@@ -659,6 +683,9 @@ static inline void set_to_next_sit(struct sit_info *sit_i, unsigned int start)
 	unsigned int block_off = SIT_BLOCK_OFFSET(start);

 	f2fs_change_bit(block_off, sit_i->sit_bitmap);
+#ifdef CONFIG_F2FS_CHECK_FS
+	f2fs_change_bit(block_off, sit_i->sit_bitmap_mir);
+#endif
 }

 static inline unsigned long long get_mtime(struct f2fs_sb_info *sbi)
@@ -689,6 +716,15 @@ static inline block_t sum_blk_addr(struct f2fs_sb_info *sbi, int base, int type)
 				- (base + 1) + type;
 }

+static inline bool no_fggc_candidate(struct f2fs_sb_info *sbi,
+						unsigned int secno)
+{
+	if (get_valid_blocks(sbi, secno, sbi->segs_per_sec) >=
+						sbi->fggc_threshold)
+		return true;
+	return false;
+}
+
 static inline bool sec_usage_check(struct f2fs_sb_info *sbi, unsigned int secno)
 {
 	if (IS_CURSEC(sbi, secno) || (sbi->cur_victim_sec == secno))
@@ -700,8 +736,8 @@ static inline bool sec_usage_check(struct f2fs_sb_info *sbi, unsigned int secno)
 * It is very important to gather dirty pages and write at once, so that we can
 * submit a big bio without interfering other data writes.
 * By default, 512 pages for directory data,
- * 512 pages (2MB) * 3 for three types of nodes, and
- * max_bio_blocks for meta are set.
+ * 512 pages (2MB) * 8 for nodes, and
+ * 256 pages * 8 for meta are set.
 */
 static inline int nr_pages_to_skip(struct f2fs_sb_info *sbi, int type)
 {

--- a/fs/f2fs/super.c
+++ b/fs/f2fs/super.c
@@ -89,6 +89,7 @@ enum {
 	Opt_active_logs,
 	Opt_disable_ext_identify,
 	Opt_inline_xattr,
+	Opt_noinline_xattr,
 	Opt_inline_data,
 	Opt_inline_dentry,
 	Opt_noinline_dentry,
@@ -101,6 +102,7 @@ enum {
 	Opt_noinline_data,
 	Opt_data_flush,
 	Opt_mode,
+	Opt_io_size_bits,
 	Opt_fault_injection,
 	Opt_lazytime,
 	Opt_nolazytime,
@@ -121,6 +123,7 @@ static match_table_t f2fs_tokens = {
 	{Opt_active_logs, "active_logs=%u"},
 	{Opt_disable_ext_identify, "disable_ext_identify"},
 	{Opt_inline_xattr, "inline_xattr"},
+	{Opt_noinline_xattr, "noinline_xattr"},
 	{Opt_inline_data, "inline_data"},
 	{Opt_inline_dentry, "inline_dentry"},
 	{Opt_noinline_dentry, "noinline_dentry"},
@@ -133,6 +136,7 @@ static match_table_t f2fs_tokens = {
 	{Opt_noinline_data, "noinline_data"},
 	{Opt_data_flush, "data_flush"},
 	{Opt_mode, "mode=%s"},
+	{Opt_io_size_bits, "io_bits=%u"},
 	{Opt_fault_injection, "fault_injection=%u"},
 	{Opt_lazytime, "lazytime"},
 	{Opt_nolazytime, "nolazytime"},
@@ -143,6 +147,7 @@ static match_table_t f2fs_tokens = {
 enum {
 	GC_THREAD,	/* struct f2fs_gc_thread */
 	SM_INFO,	/* struct f2fs_sm_info */
+	DCC_INFO,	/* struct discard_cmd_control */
 	NM_INFO,	/* struct f2fs_nm_info */
 	F2FS_SBI,	/* struct f2fs_sb_info */
 #ifdef CONFIG_F2FS_FAULT_INJECTION
@@ -166,6 +171,8 @@ static unsigned char *__struct_ptr(struct f2fs_sb_info *sbi, int struct_type)
 		return (unsigned char *)sbi->gc_thread;
 	else if (struct_type == SM_INFO)
 		return (unsigned char *)SM_I(sbi);
+	else if (struct_type == DCC_INFO)
+		return (unsigned char *)SM_I(sbi)->dcc_info;
 	else if (struct_type == NM_INFO)
 		return (unsigned char *)NM_I(sbi);
 	else if (struct_type == F2FS_SBI)
@@ -281,7 +288,7 @@ F2FS_RW_ATTR(GC_THREAD, f2fs_gc_kthread, gc_max_sleep_time, max_sleep_time);
 F2FS_RW_ATTR(GC_THREAD, f2fs_gc_kthread, gc_no_gc_sleep_time, no_gc_sleep_time);
 F2FS_RW_ATTR(GC_THREAD, f2fs_gc_kthread, gc_idle, gc_idle);
 F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, reclaim_segments, rec_prefree_segments);
-F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, max_small_discards, max_discards);
+F2FS_RW_ATTR(DCC_INFO, discard_cmd_control, max_small_discards, max_discards);
 F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, batched_trim_sections, trim_sections);
 F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, ipu_policy, ipu_policy);
 F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, min_ipu_util, min_ipu_util);
@@ -439,6 +446,9 @@ static int parse_options(struct super_block *sb, char *options)
 		case Opt_inline_xattr:
 			set_opt(sbi, INLINE_XATTR);
 			break;
+		case Opt_noinline_xattr:
+			clear_opt(sbi, INLINE_XATTR);
+			break;
 #else
 		case Opt_user_xattr:
 			f2fs_msg(sb, KERN_INFO,
@@ -452,6 +462,10 @@ static int parse_options(struct super_block *sb, char *options)
 			f2fs_msg(sb, KERN_INFO,
 				"inline_xattr options not supported");
 			break;
+		case Opt_noinline_xattr:
+			f2fs_msg(sb, KERN_INFO,
+				"noinline_xattr options not supported");
+			break;
 #endif
 #ifdef CONFIG_F2FS_FS_POSIX_ACL
 		case Opt_acl:
@@ -535,11 +549,23 @@ static int parse_options(struct super_block *sb, char *options)
 			}
 			kfree(name);
 			break;
+		case Opt_io_size_bits:
+			if (args->from && match_int(args, &arg))
+				return -EINVAL;
+			if (arg > __ilog2_u32(BIO_MAX_PAGES)) {
+				f2fs_msg(sb, KERN_WARNING,
+					"Not support %d, larger than %d",
+					1 << arg, BIO_MAX_PAGES);
+				return -EINVAL;
+			}
+			sbi->write_io_size_bits = arg;
+			break;
 		case Opt_fault_injection:
 			if (args->from && match_int(args, &arg))
 				return -EINVAL;
 #ifdef CONFIG_F2FS_FAULT_INJECTION
 			f2fs_build_fault_attr(sbi, arg);
+			set_opt(sbi, FAULT_INJECTION);
 #else
 			f2fs_msg(sb, KERN_INFO,
 				"FAULT_INJECTION was not selected");
@@ -558,6 +584,13 @@ static int parse_options(struct super_block *sb, char *options)
 			return -EINVAL;
 		}
 	}
+
+	if (F2FS_IO_SIZE_BITS(sbi) && !test_opt(sbi, LFS)) {
+		f2fs_msg(sb, KERN_ERR,
+				"Should set mode=lfs with %uKB-sized IO",
+				F2FS_IO_SIZE_KB(sbi));
+		return -EINVAL;
+	}
 	return 0;
 }

@@ -591,6 +624,7 @@ static struct inode *f2fs_alloc_inode(struct super_block *sb)

 static int f2fs_drop_inode(struct inode *inode)
 {
+	int ret;
 	/*
 	 * This is to avoid a deadlock condition like below.
 	 * writeback_single_inode(inode)
@@ -623,10 +657,12 @@ static int f2fs_drop_inode(struct inode *inode)
 			spin_lock(&inode->i_lock);
 			atomic_dec(&inode->i_count);
 		}
+		trace_f2fs_drop_inode(inode, 0);
 		return 0;
 	}
-
-	return generic_drop_inode(inode);
+	ret = generic_drop_inode(inode);
+	trace_f2fs_drop_inode(inode, ret);
+	return ret;
 }

 int f2fs_inode_dirtied(struct inode *inode, bool sync)
@@ -750,6 +786,9 @@ static void f2fs_put_super(struct super_block *sb)
 		write_checkpoint(sbi, &cpc);
 	}

+	/* be sure to wait for any on-going discard commands */
+	f2fs_wait_discard_bio(sbi, NULL_ADDR);
+
 	/* write_checkpoint can update stat informaion */
 	f2fs_destroy_stats(sbi);

@@ -782,7 +821,7 @@ static void f2fs_put_super(struct super_block *sb)
 	kfree(sbi->raw_super);

 	destroy_device_list(sbi);
-
+	mempool_destroy(sbi->write_io_dummy);
 	destroy_percpu_info(sbi);
 	kfree(sbi);
 }
@@ -882,6 +921,8 @@ static int f2fs_show_options(struct seq_file *seq, struct dentry *root)
 		seq_puts(seq, ",nouser_xattr");
 	if (test_opt(sbi, INLINE_XATTR))
 		seq_puts(seq, ",inline_xattr");
+	else
+		seq_puts(seq, ",noinline_xattr");
 #endif
 #ifdef CONFIG_F2FS_FS_POSIX_ACL
 	if (test_opt(sbi, POSIX_ACL))
@@ -918,6 +959,12 @@ static int f2fs_show_options(struct seq_file *seq, struct dentry *root)
 	else if (test_opt(sbi, LFS))
 		seq_puts(seq, "lfs");
 	seq_printf(seq, ",active_logs=%u", sbi->active_logs);
+	if (F2FS_IO_SIZE_BITS(sbi))
+		seq_printf(seq, ",io_size=%uKB", F2FS_IO_SIZE_KB(sbi));
+#ifdef CONFIG_F2FS_FAULT_INJECTION
+	if (test_opt(sbi, FAULT_INJECTION))
+		seq_puts(seq, ",fault_injection");
+#endif

 	return 0;
 }
@@ -995,6 +1042,7 @@ static void default_options(struct f2fs_sb_info *sbi)
 	sbi->active_logs = NR_CURSEG_TYPE;

 	set_opt(sbi, BG_GC);
+	set_opt(sbi, INLINE_XATTR);
 	set_opt(sbi, INLINE_DATA);
 	set_opt(sbi, INLINE_DENTRY);
 	set_opt(sbi, EXTENT_CACHE);
@@ -1686,36 +1734,55 @@ int f2fs_commit_super(struct f2fs_sb_info *sbi, bool recover)
 static int f2fs_scan_devices(struct f2fs_sb_info *sbi)
 {
 	struct f2fs_super_block *raw_super = F2FS_RAW_SUPER(sbi);
+	unsigned int max_devices = MAX_DEVICES;
 	int i;

-	for (i = 0; i < MAX_DEVICES; i++) {
-		if (!RDEV(i).path[0])
+	/* Initialize single device information */
+	if (!RDEV(0).path[0]) {
+		if (!bdev_is_zoned(sbi->sb->s_bdev))
 			return 0;
+		max_devices = 1;
+	}

-		if (i == 0) {
-			sbi->devs = kzalloc(sizeof(struct f2fs_dev_info) *
-						MAX_DEVICES, GFP_KERNEL);
-			if (!sbi->devs)
-				return -ENOMEM;
-		}
+	/*
+	 * Initialize multiple devices information, or single
+	 * zoned block device information.
+	 */
+	sbi->devs = kcalloc(max_devices, sizeof(struct f2fs_dev_info),
+				GFP_KERNEL);
+	if (!sbi->devs)
+		return -ENOMEM;

-		memcpy(FDEV(i).path, RDEV(i).path, MAX_PATH_LEN);
-		FDEV(i).total_segments = le32_to_cpu(RDEV(i).total_segments);
-		if (i == 0) {
-			FDEV(i).start_blk = 0;
-			FDEV(i).end_blk = FDEV(i).start_blk +
-				(FDEV(i).total_segments <<
-				sbi->log_blocks_per_seg) - 1 +
-				le32_to_cpu(raw_super->segment0_blkaddr);
-		} else {
-			FDEV(i).start_blk = FDEV(i - 1).end_blk + 1;
-			FDEV(i).end_blk = FDEV(i).start_blk +
-				(FDEV(i).total_segments <<
-				sbi->log_blocks_per_seg) - 1;
-		}
+	for (i = 0; i < max_devices; i++) {

-		FDEV(i).bdev = blkdev_get_by_path(FDEV(i).path,
+		if (i > 0 && !RDEV(i).path[0])
+			break;
+
+		if (max_devices == 1) {
+			/* Single zoned block device mount */
+			FDEV(0).bdev =
+				blkdev_get_by_dev(sbi->sb->s_bdev->bd_dev,
+					sbi->sb->s_mode, sbi->sb->s_type);
+		} else {
+			/* Multi-device mount */
+			memcpy(FDEV(i).path, RDEV(i).path, MAX_PATH_LEN);
+			FDEV(i).total_segments =
+				le32_to_cpu(RDEV(i).total_segments);
+			if (i == 0) {
+				FDEV(i).start_blk = 0;
+				FDEV(i).end_blk = FDEV(i).start_blk +
+				    (FDEV(i).total_segments <<
+				    sbi->log_blocks_per_seg) - 1 +
+				    le32_to_cpu(raw_super->segment0_blkaddr);
+			} else {
+				FDEV(i).start_blk = FDEV(i - 1).end_blk + 1;
+				FDEV(i).end_blk = FDEV(i).start_blk +
+					(FDEV(i).total_segments <<
+					sbi->log_blocks_per_seg) - 1;
+			}
+			FDEV(i).bdev = blkdev_get_by_path(FDEV(i).path,
 					sbi->sb->s_mode, sbi->sb->s_type);
+		}
 		if (IS_ERR(FDEV(i).bdev))
 			return PTR_ERR(FDEV(i).bdev);

@@ -1735,6 +1802,8 @@ static int f2fs_scan_devices(struct f2fs_sb_info *sbi)
 					"Failed to initialize F2FS blkzone information");
 				return -EINVAL;
 			}
+			if (max_devices == 1)
+				break;
 			f2fs_msg(sbi->sb, KERN_INFO,
 				"Mount Device [%2d]: %20s, %8u, %8x - %8x (zone: %s)",
 				i, FDEV(i).path,
@@ -1751,6 +1820,8 @@ static int f2fs_scan_devices(struct f2fs_sb_info *sbi)
 				FDEV(i).total_segments,
 				FDEV(i).start_blk, FDEV(i).end_blk);
 	}
+	f2fs_msg(sbi->sb, KERN_INFO,
+			"IO Block Size: %8d KB", F2FS_IO_SIZE_KB(sbi));
 	return 0;
 }

@@ -1868,12 +1939,19 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent)
 	if (err)
 		goto free_options;

+	if (F2FS_IO_SIZE(sbi) > 1) {
+		sbi->write_io_dummy =
+			mempool_create_page_pool(2 * (F2FS_IO_SIZE(sbi) - 1), 0);
+		if (!sbi->write_io_dummy)
+			goto free_options;
+	}
+
 	/* get an inode for meta space */
 	sbi->meta_inode = f2fs_iget(sb, F2FS_META_INO(sbi));
 	if (IS_ERR(sbi->meta_inode)) {
 		f2fs_msg(sb, KERN_ERR, "Failed to read F2FS meta data inode");
 		err = PTR_ERR(sbi->meta_inode);
-		goto free_options;
+		goto free_io_dummy;
 	}

 	err = get_valid_checkpoint(sbi);
@@ -2048,6 +2126,8 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent)
 			sbi->valid_super_block ? 1 : 2, err);
 	}

+	f2fs_msg(sbi->sb, KERN_NOTICE, "Mounted with checkpoint version = %llx",
+				cur_cp_version(F2FS_CKPT(sbi)));
 	f2fs_update_time(sbi, CP_TIME);
 	f2fs_update_time(sbi, REQ_TIME);
 	return 0;
@@ -2091,6 +2171,8 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent)
 free_meta_inode:
 	make_bad_inode(sbi->meta_inode);
 	iput(sbi->meta_inode);
+free_io_dummy:
+	mempool_destroy(sbi->write_io_dummy);
 free_options:
 	destroy_percpu_info(sbi);
 	kfree(options);

--- a/fs/f2fs/xattr.c
+++ b/fs/f2fs/xattr.c
@@ -217,6 +217,112 @@ static struct f2fs_xattr_entry *__find_xattr(void *base_addr, int index,
 	return entry;
 }

+static struct f2fs_xattr_entry *__find_inline_xattr(void *base_addr,
+					void **last_addr, int index,
+					size_t len, const char *name)
+{
+	struct f2fs_xattr_entry *entry;
+	unsigned int inline_size = F2FS_INLINE_XATTR_ADDRS << 2;
+
+	list_for_each_xattr(entry, base_addr) {
+		if ((void *)entry + sizeof(__u32) > base_addr + inline_size ||
+			(void *)XATTR_NEXT_ENTRY(entry) + sizeof(__u32) >
+			base_addr + inline_size) {
+			*last_addr = entry;
+			return NULL;
+		}
+		if (entry->e_name_index != index)
+			continue;
+		if (entry->e_name_len != len)
+			continue;
+		if (!memcmp(entry->e_name, name, len))
+			break;
+	}
+	return entry;
+}
+
+static int lookup_all_xattrs(struct inode *inode, struct page *ipage,
+				unsigned int index, unsigned int len,
+				const char *name, struct f2fs_xattr_entry **xe,
+				void **base_addr)
+{
+	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
+	void *cur_addr, *txattr_addr, *last_addr = NULL;
+	nid_t xnid = F2FS_I(inode)->i_xattr_nid;
+	unsigned int size = xnid ? VALID_XATTR_BLOCK_SIZE : 0;
+	unsigned int inline_size = 0;
+	int err = 0;
+
+	inline_size = inline_xattr_size(inode);
+
+	if (!size && !inline_size)
+		return -ENODATA;
+
+	txattr_addr = kzalloc(inline_size + size + sizeof(__u32),
+							GFP_F2FS_ZERO);
+	if (!txattr_addr)
+		return -ENOMEM;
+
+	/* read from inline xattr */
+	if (inline_size) {
+		struct page *page = NULL;
+		void *inline_addr;
+
+		if (ipage) {
+			inline_addr = inline_xattr_addr(ipage);
+		} else {
+			page = get_node_page(sbi, inode->i_ino);
+			if (IS_ERR(page)) {
+				err = PTR_ERR(page);
+				goto out;
+			}
+			inline_addr = inline_xattr_addr(page);
+		}
+		memcpy(txattr_addr, inline_addr, inline_size);
+		f2fs_put_page(page, 1);
+
+		*xe = __find_inline_xattr(txattr_addr, &last_addr,
+						index, len, name);
+		if (*xe)
+			goto check;
+	}
+
+	/* read from xattr node block */
+	if (xnid) {
+		struct page *xpage;
+		void *xattr_addr;
+
+		/* The inode already has an extended attribute block. */
+		xpage = get_node_page(sbi, xnid);
+		if (IS_ERR(xpage)) {
+			err = PTR_ERR(xpage);
+			goto out;
+		}
+
+		xattr_addr = page_address(xpage);
+		memcpy(txattr_addr + inline_size, xattr_addr, size);
+		f2fs_put_page(xpage, 1);
+	}
+
+	if (last_addr)
+		cur_addr = XATTR_HDR(last_addr) - 1;
+	else
+		cur_addr = txattr_addr;
+
+	*xe = __find_xattr(cur_addr, index, len, name);
+check:
+	if (IS_XATTR_LAST_ENTRY(*xe)) {
+		err = -ENODATA;
+		goto out;
+	}
+
+	*base_addr = txattr_addr;
+	return 0;
+out:
+	kzfree(txattr_addr);
+	return err;
+}
+
 static int read_all_xattrs(struct inode *inode, struct page *ipage,
 							void **base_addr)
 {
@@ -348,23 +454,20 @@ static inline int write_all_xattrs(struct inode *inode, __u32 hsize,
 	}

 	xattr_addr = page_address(xpage);
-	memcpy(xattr_addr, txattr_addr + inline_size, PAGE_SIZE -
-						sizeof(struct node_footer));
+	memcpy(xattr_addr, txattr_addr + inline_size, MAX_XATTR_BLOCK_SIZE);
 	set_page_dirty(xpage);
 	f2fs_put_page(xpage, 1);

-	/* need to checkpoint during fsync */
-	F2FS_I(inode)->xattr_ver = cur_cp_version(F2FS_CKPT(sbi));
 	return 0;
 }

 int f2fs_getxattr(struct inode *inode, int index, const char *name,
 		void *buffer, size_t buffer_size, struct page *ipage)
 {
-	struct f2fs_xattr_entry *entry;
-	void *base_addr;
+	struct f2fs_xattr_entry *entry = NULL;
 	int error = 0;
-	size_t size, len;
+	unsigned int size, len;
+	void *base_addr = NULL;

 	if (name == NULL)
 		return -EINVAL;
@@ -373,21 +476,16 @@ int f2fs_getxattr(struct inode *inode, int index, const char *name,
 	if (len > F2FS_NAME_LEN)
 		return -ERANGE;

-	error = read_all_xattrs(inode, ipage, &base_addr);
+	error = lookup_all_xattrs(inode, ipage, index, len, name,
+				&entry, &base_addr);
 	if (error)
 		return error;

-	entry = __find_xattr(base_addr, index, len, name);
-	if (IS_XATTR_LAST_ENTRY(entry)) {
-		error = -ENODATA;
-		goto cleanup;
-	}
-
 	size = le16_to_cpu(entry->e_value_size);

 	if (buffer && size > buffer_size) {
 		error = -ERANGE;
-		goto cleanup;
+		goto out;
 	}

 	if (buffer) {
@@ -395,8 +493,7 @@ int f2fs_getxattr(struct inode *inode, int index, const char *name,
 		memcpy(buffer, pval, size);
 	}
 	error = size;
-
-cleanup:
+out:
 	kzfree(base_addr);
 	return error;
 }
@@ -445,6 +542,13 @@ ssize_t f2fs_listxattr(struct dentry *dentry, char *buffer, size_t buffer_size)
 	return error;
 }

+static bool f2fs_xattr_value_same(struct f2fs_xattr_entry *entry,
+					const void *value, size_t size)
+{
+	void *pval = entry->e_name + entry->e_name_len;
+	return (entry->e_value_size == size) && !memcmp(pval, value, size);
+}
+
 static int __f2fs_setxattr(struct inode *inode, int index,
 			const char *name, const void *value, size_t size,
 			struct page *ipage, int flags)
@@ -479,12 +583,17 @@ static int __f2fs_setxattr(struct inode *inode, int index,

 	found = IS_XATTR_LAST_ENTRY(here) ? 0 : 1;

-	if ((flags & XATTR_REPLACE) && !found) {
+	if (found) {
+		if ((flags & XATTR_CREATE)) {
+			error = -EEXIST;
+			goto exit;
+		}
+
+		if (f2fs_xattr_value_same(here, value, size))
+			goto exit;
+	} else if ((flags & XATTR_REPLACE)) {
 		error = -ENODATA;
 		goto exit;
-	} else if ((flags & XATTR_CREATE) && found) {
-		error = -EEXIST;
-		goto exit;
 	}

 	last = here;

--- a/fs/f2fs/xattr.h
+++ b/fs/f2fs/xattr.h
@@ -72,9 +72,10 @@ struct f2fs_xattr_entry {
 		for (entry = XATTR_FIRST_ENTRY(addr);\
 				!IS_XATTR_LAST_ENTRY(entry);\
 				entry = XATTR_NEXT_ENTRY(entry))
-
-#define MIN_OFFSET(i)	XATTR_ALIGN(inline_xattr_size(i) + PAGE_SIZE -	\
-				sizeof(struct node_footer) - sizeof(__u32))
+#define MAX_XATTR_BLOCK_SIZE	(PAGE_SIZE - sizeof(struct node_footer))
+#define VALID_XATTR_BLOCK_SIZE	(MAX_XATTR_BLOCK_SIZE - sizeof(__u32))
+#define MIN_OFFSET(i)		XATTR_ALIGN(inline_xattr_size(i) +	\
+						VALID_XATTR_BLOCK_SIZE)

 #define MAX_VALUE_LEN(i)	(MIN_OFFSET(i) -			\
 				sizeof(struct f2fs_xattr_header) -	\

--- a/include/linux/f2fs_fs.h
+++ b/include/linux/f2fs_fs.h
@@ -36,6 +36,12 @@
 #define F2FS_NODE_INO(sbi)	(sbi->node_ino_num)
 #define F2FS_META_INO(sbi)	(sbi->meta_ino_num)

+#define F2FS_IO_SIZE(sbi)	(1 << (sbi)->write_io_size_bits) /* Blocks */
+#define F2FS_IO_SIZE_KB(sbi)	(1 << ((sbi)->write_io_size_bits + 2)) /* KB */
+#define F2FS_IO_SIZE_BYTES(sbi)	(1 << ((sbi)->write_io_size_bits + 12)) /* B */
+#define F2FS_IO_SIZE_BITS(sbi)	((sbi)->write_io_size_bits) /* power of 2 */
+#define F2FS_IO_SIZE_MASK(sbi)	(F2FS_IO_SIZE(sbi) - 1)
+
 /* This flag is used by node and meta inodes, and by recovery */
 #define GFP_F2FS_ZERO		(GFP_NOFS | __GFP_ZERO)
 #define GFP_F2FS_HIGH_ZERO	(GFP_NOFS | __GFP_ZERO | __GFP_HIGHMEM)
@@ -108,6 +114,7 @@ struct f2fs_super_block {
 /*
 * For checkpoint
 */
+#define CP_NAT_BITS_FLAG	0x00000080
 #define CP_CRC_RECOVERY_FLAG	0x00000040
 #define CP_FASTBOOT_FLAG	0x00000020
 #define CP_FSCK_FLAG		0x00000010
@@ -272,6 +279,7 @@ struct f2fs_node {
 * For NAT entries
 */
 #define NAT_ENTRY_PER_BLOCK (PAGE_SIZE / sizeof(struct f2fs_nat_entry))
+#define NAT_ENTRY_BITMAP_SIZE	((NAT_ENTRY_PER_BLOCK + 7) / 8)

 struct f2fs_nat_entry {
 	__u8 version;		/* latest version of cached nat entry */

--- a/include/trace/events/f2fs.h
+++ b/include/trace/events/f2fs.h