提交 5160bcce 编写于 作者: L Linus Torvalds

Merge tag 'f2fs-for-5.1' of git://git.kernel.org/pub/scm/linux/kernel/git/jaegeuk/f2fs

Pull f2fs updates from Jaegeuk Kim:
 "We've continued mainly to fix bugs in this round, as f2fs has been
  shipped in more devices. Especially, we've focused on stabilizing
  checkpoint=disable feature, and provided some interfaces for QA.

  Enhancements:
   - expose FS_NOCOW_FL for pin_file
   - run discard jobs at unmount time with timeout
   - tune discarding thread to avoid idling which consumes power
   - some checking codes to address vulnerabilities
   - give random value to i_generation
   - shutdown with more flags for QA

  Bug fixes:
   - clean up stale objects when mount is failed along with
     checkpoint=disable
   - fix system being stuck due to wrong count by atomic writes
   - handle some corrupted disk cases
   - fix a deadlock in f2fs_read_inline_dir

  We've also added some minor build error fixes and clean-up patches"

* tag 'f2fs-for-5.1' of git://git.kernel.org/pub/scm/linux/kernel/git/jaegeuk/f2fs: (53 commits)
  f2fs: set pin_file under CAP_SYS_ADMIN
  f2fs: fix to avoid deadlock in f2fs_read_inline_dir()
  f2fs: fix to adapt small inline xattr space in __find_inline_xattr()
  f2fs: fix to do sanity check with inode.i_inline_xattr_size
  f2fs: give some messages for inline_xattr_size
  f2fs: don't trigger read IO for beyond EOF page
  f2fs: fix to add refcount once page is tagged PG_private
  f2fs: remove wrong comment in f2fs_invalidate_page()
  f2fs: fix to use kvfree instead of kzfree
  f2fs: print more parameters in trace_f2fs_map_blocks
  f2fs: trace f2fs_ioc_shutdown
  f2fs: fix to avoid deadlock of atomic file operations
  f2fs: fix to dirty inode for i_mode recovery
  f2fs: give random value to i_generation
  f2fs: no need to take page lock in readdir
  f2fs: fix to update iostat correctly in IPU path
  f2fs: fix encrypted page memory leak
  f2fs: make fault injection covering __submit_flush_wait()
  f2fs: fix to retry fill_super only if recovery failed
  f2fs: silence VM_WARN_ON_ONCE in mempool_alloc
  ...
......@@ -86,6 +86,13 @@ Description:
The unit size is one block, now only support configuring in range
of [1, 512].
What: /sys/fs/f2fs/<disk>/umount_discard_timeout
Date: January 2019
Contact: "Jaegeuk Kim" <jaegeuk@kernel.org>
Description:
Set timeout to issue discard commands during umount.
Default: 5 secs
What: /sys/fs/f2fs/<disk>/max_victim_search
Date: January 2014
Contact: "Jaegeuk Kim" <jaegeuk.kim@samsung.com>
......
......@@ -126,6 +126,8 @@ disable_ext_identify Disable the extension list configured by mkfs, so f2fs
does not aware of cold files such as media files.
inline_xattr Enable the inline xattrs feature.
noinline_xattr Disable the inline xattrs feature.
inline_xattr_size=%u Support configuring inline xattr size, it depends on
flexible inline xattr feature.
inline_data Enable the inline data feature: New created small(<~3.4k)
files can be written into inode block.
inline_dentry Enable the inline dir feature: data in new created
......
......@@ -306,8 +306,9 @@ static int f2fs_write_meta_pages(struct address_space *mapping,
goto skip_write;
/* collect a number of dirty meta pages and write together */
if (wbc->for_kupdate ||
get_pages(sbi, F2FS_DIRTY_META) < nr_pages_to_skip(sbi, META))
if (wbc->sync_mode != WB_SYNC_ALL &&
get_pages(sbi, F2FS_DIRTY_META) <
nr_pages_to_skip(sbi, META))
goto skip_write;
/* if locked failed, cp will flush dirty pages instead */
......@@ -405,7 +406,7 @@ static int f2fs_set_meta_page_dirty(struct page *page)
if (!PageDirty(page)) {
__set_page_dirty_nobuffers(page);
inc_page_count(F2FS_P_SB(page), F2FS_DIRTY_META);
SetPagePrivate(page);
f2fs_set_page_private(page, 0);
f2fs_trace_pid(page);
return 1;
}
......@@ -956,7 +957,7 @@ void f2fs_update_dirty_page(struct inode *inode, struct page *page)
inode_inc_dirty_pages(inode);
spin_unlock(&sbi->inode_lock[type]);
SetPagePrivate(page);
f2fs_set_page_private(page, 0);
f2fs_trace_pid(page);
}
......@@ -1259,10 +1260,17 @@ static void update_ckpt_flags(struct f2fs_sb_info *sbi, struct cp_control *cpc)
else
__clear_ckpt_flags(ckpt, CP_DISABLED_FLAG);
if (is_sbi_flag_set(sbi, SBI_CP_DISABLED_QUICK))
__set_ckpt_flags(ckpt, CP_DISABLED_QUICK_FLAG);
else
__clear_ckpt_flags(ckpt, CP_DISABLED_QUICK_FLAG);
if (is_sbi_flag_set(sbi, SBI_QUOTA_SKIP_FLUSH))
__set_ckpt_flags(ckpt, CP_QUOTA_NEED_FSCK_FLAG);
else
__clear_ckpt_flags(ckpt, CP_QUOTA_NEED_FSCK_FLAG);
/*
* TODO: we count on fsck.f2fs to clear this flag until we figure out
* missing cases which clear it incorrectly.
*/
if (is_sbi_flag_set(sbi, SBI_QUOTA_NEED_REPAIR))
__set_ckpt_flags(ckpt, CP_QUOTA_NEED_FSCK_FLAG);
......
......@@ -301,9 +301,10 @@ static inline void __submit_bio(struct f2fs_sb_info *sbi,
for (; start < F2FS_IO_SIZE(sbi); start++) {
struct page *page =
mempool_alloc(sbi->write_io_dummy,
GFP_NOIO | __GFP_ZERO | __GFP_NOFAIL);
GFP_NOIO | __GFP_NOFAIL);
f2fs_bug_on(sbi, !page);
zero_user_segment(page, 0, PAGE_SIZE);
SetPagePrivate(page);
set_page_private(page, (unsigned long)DUMMY_WRITTEN_PAGE);
lock_page(page);
......@@ -1553,6 +1554,9 @@ static int f2fs_mpage_readpages(struct address_space *mapping,
if (last_block > last_block_in_file)
last_block = last_block_in_file;
/* just zeroing out page which is beyond EOF */
if (block_in_file >= last_block)
goto zero_out;
/*
* Map blocks using the previous result first.
*/
......@@ -1565,16 +1569,11 @@ static int f2fs_mpage_readpages(struct address_space *mapping,
* Then do more f2fs_map_blocks() calls until we are
* done with this page.
*/
map.m_flags = 0;
if (block_in_file < last_block) {
map.m_lblk = block_in_file;
map.m_len = last_block - block_in_file;
map.m_lblk = block_in_file;
map.m_len = last_block - block_in_file;
if (f2fs_map_blocks(inode, &map, 0,
F2FS_GET_BLOCK_DEFAULT))
goto set_error_page;
}
if (f2fs_map_blocks(inode, &map, 0, F2FS_GET_BLOCK_DEFAULT))
goto set_error_page;
got_it:
if ((map.m_flags & F2FS_MAP_MAPPED)) {
block_nr = map.m_pblk + block_in_file - map.m_lblk;
......@@ -1589,6 +1588,7 @@ static int f2fs_mpage_readpages(struct address_space *mapping,
DATA_GENERIC))
goto set_error_page;
} else {
zero_out:
zero_user_segment(page, 0, PAGE_SIZE);
if (!PageUptodate(page))
SetPageUptodate(page);
......@@ -1863,8 +1863,13 @@ int f2fs_do_write_data_page(struct f2fs_io_info *fio)
if (fio->need_lock == LOCK_REQ)
f2fs_unlock_op(fio->sbi);
err = f2fs_inplace_write_data(fio);
if (err && PageWriteback(page))
end_page_writeback(page);
if (err) {
if (f2fs_encrypted_file(inode))
fscrypt_pullback_bio_page(&fio->encrypted_page,
true);
if (PageWriteback(page))
end_page_writeback(page);
}
trace_f2fs_do_write_data_page(fio->page, IPU);
set_inode_flag(inode, FI_UPDATE_WRITE);
return err;
......@@ -2315,7 +2320,8 @@ static void f2fs_write_failed(struct address_space *mapping, loff_t to)
down_write(&F2FS_I(inode)->i_mmap_sem);
truncate_pagecache(inode, i_size);
f2fs_truncate_blocks(inode, i_size, true, true);
if (!IS_NOQUOTA(inode))
f2fs_truncate_blocks(inode, i_size, true);
up_write(&F2FS_I(inode)->i_mmap_sem);
up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
......@@ -2585,14 +2591,11 @@ static void f2fs_dio_submit_bio(struct bio *bio, struct inode *inode,
{
struct f2fs_private_dio *dio;
bool write = (bio_op(bio) == REQ_OP_WRITE);
int err;
dio = f2fs_kzalloc(F2FS_I_SB(inode),
sizeof(struct f2fs_private_dio), GFP_NOFS);
if (!dio) {
err = -ENOMEM;
if (!dio)
goto out;
}
dio->inode = inode;
dio->orig_end_io = bio->bi_end_io;
......@@ -2710,12 +2713,10 @@ void f2fs_invalidate_page(struct page *page, unsigned int offset,
clear_cold_data(page);
/* This is atomic written page, keep Private */
if (IS_ATOMIC_WRITTEN_PAGE(page))
return f2fs_drop_inmem_page(inode, page);
set_page_private(page, 0);
ClearPagePrivate(page);
f2fs_clear_page_private(page);
}
int f2fs_release_page(struct page *page, gfp_t wait)
......@@ -2729,8 +2730,7 @@ int f2fs_release_page(struct page *page, gfp_t wait)
return 0;
clear_cold_data(page);
set_page_private(page, 0);
ClearPagePrivate(page);
f2fs_clear_page_private(page);
return 1;
}
......@@ -2798,12 +2798,8 @@ int f2fs_migrate_page(struct address_space *mapping,
return -EAGAIN;
}
/*
* A reference is expected if PagePrivate set when move mapping,
* however F2FS breaks this for maintaining dirty page counts when
* truncating pages. So here adjusting the 'extra_count' make it work.
*/
extra_count = (atomic_written ? 1 : 0) - page_has_private(page);
/* one extra reference was held for atomic_write page */
extra_count = atomic_written ? 1 : 0;
rc = migrate_page_move_mapping(mapping, newpage,
page, mode, extra_count);
if (rc != MIGRATEPAGE_SUCCESS) {
......@@ -2824,9 +2820,10 @@ int f2fs_migrate_page(struct address_space *mapping,
get_page(newpage);
}
if (PagePrivate(page))
SetPagePrivate(newpage);
set_page_private(newpage, page_private(page));
if (PagePrivate(page)) {
f2fs_set_page_private(newpage, page_private(page));
f2fs_clear_page_private(page);
}
if (mode != MIGRATE_SYNC_NO_COPY)
migrate_page_copy(newpage, page);
......
......@@ -96,8 +96,10 @@ static void update_general_status(struct f2fs_sb_info *sbi)
si->free_secs = free_sections(sbi);
si->prefree_count = prefree_segments(sbi);
si->dirty_count = dirty_segments(sbi);
si->node_pages = NODE_MAPPING(sbi)->nrpages;
si->meta_pages = META_MAPPING(sbi)->nrpages;
if (sbi->node_inode)
si->node_pages = NODE_MAPPING(sbi)->nrpages;
if (sbi->meta_inode)
si->meta_pages = META_MAPPING(sbi)->nrpages;
si->nats = NM_I(sbi)->nat_cnt;
si->dirty_nats = NM_I(sbi)->dirty_nat_cnt;
si->sits = MAIN_SEGS(sbi);
......@@ -175,7 +177,6 @@ static void update_sit_info(struct f2fs_sb_info *sbi)
static void update_mem_info(struct f2fs_sb_info *sbi)
{
struct f2fs_stat_info *si = F2FS_STAT(sbi);
unsigned npages;
int i;
if (si->base_mem)
......@@ -258,10 +259,14 @@ static void update_mem_info(struct f2fs_sb_info *sbi)
sizeof(struct extent_node);
si->page_mem = 0;
npages = NODE_MAPPING(sbi)->nrpages;
si->page_mem += (unsigned long long)npages << PAGE_SHIFT;
npages = META_MAPPING(sbi)->nrpages;
si->page_mem += (unsigned long long)npages << PAGE_SHIFT;
if (sbi->node_inode) {
unsigned npages = NODE_MAPPING(sbi)->nrpages;
si->page_mem += (unsigned long long)npages << PAGE_SHIFT;
}
if (sbi->meta_inode) {
unsigned npages = META_MAPPING(sbi)->nrpages;
si->page_mem += (unsigned long long)npages << PAGE_SHIFT;
}
}
static int stat_show(struct seq_file *s, void *v)
......
......@@ -728,7 +728,7 @@ void f2fs_delete_entry(struct f2fs_dir_entry *dentry, struct page *page,
!f2fs_truncate_hole(dir, page->index, page->index + 1)) {
f2fs_clear_page_cache_dirty_tag(page);
clear_page_dirty_for_io(page);
ClearPagePrivate(page);
f2fs_clear_page_private(page);
ClearPageUptodate(page);
clear_cold_data(page);
inode_dec_dirty_pages(dir);
......@@ -800,6 +800,10 @@ int f2fs_fill_dentries(struct dir_context *ctx, struct f2fs_dentry_ptr *d,
if (de->name_len == 0) {
bit_pos++;
ctx->pos = start_pos + bit_pos;
printk_ratelimited(
"%s, invalid namelen(0), ino:%u, run fsck to fix.",
KERN_WARNING, le32_to_cpu(de->ino));
set_sbi_flag(sbi, SBI_NEED_FSCK);
continue;
}
......@@ -810,7 +814,8 @@ int f2fs_fill_dentries(struct dir_context *ctx, struct f2fs_dentry_ptr *d,
/* check memory boundary before moving forward */
bit_pos += GET_DENTRY_SLOTS(le16_to_cpu(de->name_len));
if (unlikely(bit_pos > d->max)) {
if (unlikely(bit_pos > d->max ||
le16_to_cpu(de->name_len) > F2FS_NAME_LEN)) {
f2fs_msg(sbi->sb, KERN_WARNING,
"%s: corrupted namelen=%d, run fsck to fix.",
__func__, le16_to_cpu(de->name_len));
......@@ -891,7 +896,7 @@ static int f2fs_readdir(struct file *file, struct dir_context *ctx)
page_cache_sync_readahead(inode->i_mapping, ra, file, n,
min(npages - n, (pgoff_t)MAX_DIR_RA_PAGES));
dentry_page = f2fs_get_lock_data_page(inode, n, false);
dentry_page = f2fs_find_data_page(inode, n);
if (IS_ERR(dentry_page)) {
err = PTR_ERR(dentry_page);
if (err == -ENOENT) {
......@@ -909,11 +914,11 @@ static int f2fs_readdir(struct file *file, struct dir_context *ctx)
err = f2fs_fill_dentries(ctx, &d,
n * NR_DENTRY_IN_BLOCK, &fstr);
if (err) {
f2fs_put_page(dentry_page, 1);
f2fs_put_page(dentry_page, 0);
break;
}
f2fs_put_page(dentry_page, 1);
f2fs_put_page(dentry_page, 0);
}
out_free:
fscrypt_fname_free_buffer(&fstr);
......
......@@ -506,7 +506,7 @@ static void f2fs_update_extent_tree_range(struct inode *inode,
unsigned int end = fofs + len;
unsigned int pos = (unsigned int)fofs;
bool updated = false;
bool leftmost;
bool leftmost = false;
if (!et)
return;
......
......@@ -190,6 +190,8 @@ enum {
#define DEF_CP_INTERVAL 60 /* 60 secs */
#define DEF_IDLE_INTERVAL 5 /* 5 secs */
#define DEF_DISABLE_INTERVAL 5 /* 5 secs */
#define DEF_DISABLE_QUICK_INTERVAL 1 /* 1 secs */
#define DEF_UMOUNT_DISCARD_TIMEOUT 5 /* 5 secs */
struct cp_control {
int reason;
......@@ -253,7 +255,7 @@ struct discard_entry {
/* max discard pend list number */
#define MAX_PLIST_NUM 512
#define plist_idx(blk_num) ((blk_num) >= MAX_PLIST_NUM ? \
(MAX_PLIST_NUM - 1) : (blk_num - 1))
(MAX_PLIST_NUM - 1) : ((blk_num) - 1))
enum {
D_PREP, /* initial */
......@@ -309,6 +311,7 @@ struct discard_policy {
bool sync; /* submit discard with REQ_SYNC flag */
bool ordered; /* issue discard by lba order */
unsigned int granularity; /* discard granularity */
int timeout; /* discard timeout for put_super */
};
struct discard_cmd_control {
......@@ -455,7 +458,6 @@ struct f2fs_flush_device {
/* for inline stuff */
#define DEF_INLINE_RESERVED_SIZE 1
#define DEF_MIN_INLINE_SIZE 1
static inline int get_extra_isize(struct inode *inode);
static inline int get_inline_xattr_addrs(struct inode *inode);
#define MAX_INLINE_DATA(inode) (sizeof(__le32) * \
......@@ -1098,6 +1100,7 @@ enum {
SBI_IS_SHUTDOWN, /* shutdown by ioctl */
SBI_IS_RECOVERED, /* recovered orphan/data */
SBI_CP_DISABLED, /* CP was disabled last mount */
SBI_CP_DISABLED_QUICK, /* CP was disabled quickly */
SBI_QUOTA_NEED_FLUSH, /* need to flush quota info in CP */
SBI_QUOTA_SKIP_FLUSH, /* skip flushing quota in current CP */
SBI_QUOTA_NEED_REPAIR, /* quota file may be corrupted */
......@@ -1109,6 +1112,7 @@ enum {
DISCARD_TIME,
GC_TIME,
DISABLE_TIME,
UMOUNT_DISCARD_TIMEOUT,
MAX_TIME,
};
......@@ -1237,8 +1241,6 @@ struct f2fs_sb_info {
unsigned int nquota_files; /* # of quota sysfile */
u32 s_next_generation; /* for NFS support */
/* # of pages, see count_type */
atomic_t nr_pages[NR_COUNT_TYPE];
/* # of allocated blocks */
......@@ -1798,13 +1800,12 @@ static inline void inc_page_count(struct f2fs_sb_info *sbi, int count_type)
{
atomic_inc(&sbi->nr_pages[count_type]);
if (count_type == F2FS_DIRTY_DATA || count_type == F2FS_INMEM_PAGES ||
count_type == F2FS_WB_CP_DATA || count_type == F2FS_WB_DATA ||
count_type == F2FS_RD_DATA || count_type == F2FS_RD_NODE ||
count_type == F2FS_RD_META)
return;
set_sbi_flag(sbi, SBI_IS_DIRTY);
if (count_type == F2FS_DIRTY_DENTS ||
count_type == F2FS_DIRTY_NODES ||
count_type == F2FS_DIRTY_META ||
count_type == F2FS_DIRTY_QDATA ||
count_type == F2FS_DIRTY_IMETA)
set_sbi_flag(sbi, SBI_IS_DIRTY);
}
static inline void inode_inc_dirty_pages(struct inode *inode)
......@@ -2156,10 +2157,17 @@ static inline bool is_idle(struct f2fs_sb_info *sbi, int type)
get_pages(sbi, F2FS_RD_META) || get_pages(sbi, F2FS_WB_DATA) ||
get_pages(sbi, F2FS_WB_CP_DATA) ||
get_pages(sbi, F2FS_DIO_READ) ||
get_pages(sbi, F2FS_DIO_WRITE) ||
atomic_read(&SM_I(sbi)->dcc_info->queued_discard) ||
atomic_read(&SM_I(sbi)->fcc_info->queued_flush))
get_pages(sbi, F2FS_DIO_WRITE))
return false;
if (SM_I(sbi) && SM_I(sbi)->dcc_info &&
atomic_read(&SM_I(sbi)->dcc_info->queued_discard))
return false;
if (SM_I(sbi) && SM_I(sbi)->fcc_info &&
atomic_read(&SM_I(sbi)->fcc_info->queued_flush))
return false;
return f2fs_time_over(sbi, type);
}
......@@ -2300,11 +2308,12 @@ static inline void f2fs_change_bit(unsigned int nr, char *addr)
#define F2FS_EXTENTS_FL 0x00080000 /* Inode uses extents */
#define F2FS_EA_INODE_FL 0x00200000 /* Inode used for large EA */
#define F2FS_EOFBLOCKS_FL 0x00400000 /* Blocks allocated beyond EOF */
#define F2FS_NOCOW_FL 0x00800000 /* Do not cow file */
#define F2FS_INLINE_DATA_FL 0x10000000 /* Inode has inline data. */
#define F2FS_PROJINHERIT_FL 0x20000000 /* Create with parents projid */
#define F2FS_RESERVED_FL 0x80000000 /* reserved for ext4 lib */
#define F2FS_FL_USER_VISIBLE 0x304BDFFF /* User visible flags */
#define F2FS_FL_USER_VISIBLE 0x30CBDFFF /* User visible flags */
#define F2FS_FL_USER_MODIFIABLE 0x204BC0FF /* User modifiable flags */
/* Flags we can manipulate with through F2FS_IOC_FSSETXATTR */
......@@ -2761,9 +2770,9 @@ static inline int get_inline_xattr_addrs(struct inode *inode)
#define F2FS_OLD_ATTRIBUTE_SIZE (offsetof(struct f2fs_inode, i_addr))
#define F2FS_FITS_IN_INODE(f2fs_inode, extra_isize, field) \
((offsetof(typeof(*f2fs_inode), field) + \
((offsetof(typeof(*(f2fs_inode)), field) + \
sizeof((f2fs_inode)->field)) \
<= (F2FS_OLD_ATTRIBUTE_SIZE + extra_isize)) \
<= (F2FS_OLD_ATTRIBUTE_SIZE + (extra_isize))) \
static inline void f2fs_reset_iostat(struct f2fs_sb_info *sbi)
{
......@@ -2792,8 +2801,8 @@ static inline void f2fs_update_iostat(struct f2fs_sb_info *sbi,
#define __is_large_section(sbi) ((sbi)->segs_per_sec > 1)
#define __is_meta_io(fio) (PAGE_TYPE_OF_BIO(fio->type) == META && \
(!is_read_io(fio->op) || fio->is_meta))
#define __is_meta_io(fio) (PAGE_TYPE_OF_BIO((fio)->type) == META && \
(!is_read_io((fio)->op) || (fio)->is_meta))
bool f2fs_is_valid_blkaddr(struct f2fs_sb_info *sbi,
block_t blkaddr, int type);
......@@ -2825,13 +2834,33 @@ static inline bool is_valid_data_blkaddr(struct f2fs_sb_info *sbi,
return true;
}
static inline void f2fs_set_page_private(struct page *page,
unsigned long data)
{
if (PagePrivate(page))
return;
get_page(page);
SetPagePrivate(page);
set_page_private(page, data);
}
static inline void f2fs_clear_page_private(struct page *page)
{
if (!PagePrivate(page))
return;
set_page_private(page, 0);
ClearPagePrivate(page);
f2fs_put_page(page, 0);
}
/*
* file.c
*/
int f2fs_sync_file(struct file *file, loff_t start, loff_t end, int datasync);
void f2fs_truncate_data_blocks(struct dnode_of_data *dn);
int f2fs_truncate_blocks(struct inode *inode, u64 from, bool lock,
bool buf_write);
int f2fs_truncate_blocks(struct inode *inode, u64 from, bool lock);
int f2fs_truncate(struct inode *inode);
int f2fs_getattr(const struct path *path, struct kstat *stat,
u32 request_mask, unsigned int flags);
......@@ -3005,7 +3034,7 @@ void f2fs_invalidate_blocks(struct f2fs_sb_info *sbi, block_t addr);
bool f2fs_is_checkpointed_data(struct f2fs_sb_info *sbi, block_t blkaddr);
void f2fs_drop_discard_cmd(struct f2fs_sb_info *sbi);
void f2fs_stop_discard_thread(struct f2fs_sb_info *sbi);
bool f2fs_wait_discard_bios(struct f2fs_sb_info *sbi);
bool f2fs_issue_discard_timeout(struct f2fs_sb_info *sbi);
void f2fs_clear_prefree_segments(struct f2fs_sb_info *sbi,
struct cp_control *cpc);
void f2fs_dirty_to_prefree(struct f2fs_sb_info *sbi);
......@@ -3610,8 +3639,6 @@ extern void f2fs_build_fault_attr(struct f2fs_sb_info *sbi, unsigned int rate,
#define f2fs_build_fault_attr(sbi, rate, type) do { } while (0)
#endif
#endif
static inline bool is_journalled_quota(struct f2fs_sb_info *sbi)
{
#ifdef CONFIG_QUOTA
......@@ -3624,3 +3651,5 @@ static inline bool is_journalled_quota(struct f2fs_sb_info *sbi)
#endif
return false;
}
#endif
......@@ -589,8 +589,7 @@ static int truncate_partial_data_page(struct inode *inode, u64 from,
return 0;
}
int f2fs_truncate_blocks(struct inode *inode, u64 from, bool lock,
bool buf_write)
int f2fs_truncate_blocks(struct inode *inode, u64 from, bool lock)
{
struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
struct dnode_of_data dn;
......@@ -598,7 +597,6 @@ int f2fs_truncate_blocks(struct inode *inode, u64 from, bool lock,
int count = 0, err = 0;
struct page *ipage;
bool truncate_page = false;
int flag = buf_write ? F2FS_GET_BLOCK_PRE_AIO : F2FS_GET_BLOCK_PRE_DIO;
trace_f2fs_truncate_blocks_enter(inode, from);
......@@ -608,7 +606,7 @@ int f2fs_truncate_blocks(struct inode *inode, u64 from, bool lock,
goto free_partial;
if (lock)
__do_map_lock(sbi, flag, true);
f2fs_lock_op(sbi);
ipage = f2fs_get_node_page(sbi, inode->i_ino);
if (IS_ERR(ipage)) {
......@@ -646,7 +644,7 @@ int f2fs_truncate_blocks(struct inode *inode, u64 from, bool lock,
err = f2fs_truncate_inode_blocks(inode, free_from);
out:
if (lock)
__do_map_lock(sbi, flag, false);
f2fs_unlock_op(sbi);
free_partial:
/* lastly zero out the first data page */
if (!err)
......@@ -681,7 +679,7 @@ int f2fs_truncate(struct inode *inode)
return err;
}
err = f2fs_truncate_blocks(inode, i_size_read(inode), true, false);
err = f2fs_truncate_blocks(inode, i_size_read(inode), true);
if (err)
return err;
......@@ -768,7 +766,6 @@ int f2fs_setattr(struct dentry *dentry, struct iattr *attr)
{
struct inode *inode = d_inode(dentry);
int err;
bool size_changed = false;
if (unlikely(f2fs_cp_error(F2FS_I_SB(inode))))
return -EIO;
......@@ -843,8 +840,6 @@ int f2fs_setattr(struct dentry *dentry, struct iattr *attr)
down_write(&F2FS_I(inode)->i_sem);
F2FS_I(inode)->last_disk_size = i_size_read(inode);
up_write(&F2FS_I(inode)->i_sem);
size_changed = true;
}
__setattr_copy(inode, attr);
......@@ -858,7 +853,7 @@ int f2fs_setattr(struct dentry *dentry, struct iattr *attr)
}
/* file size may changed here */
f2fs_mark_inode_dirty_sync(inode, size_changed);
f2fs_mark_inode_dirty_sync(inode, true);
/* inode change will produce dirty node pages flushed by checkpoint */
f2fs_balance_fs(F2FS_I_SB(inode), true);
......@@ -1262,7 +1257,7 @@ static int f2fs_collapse_range(struct inode *inode, loff_t offset, loff_t len)
new_size = i_size_read(inode) - len;
truncate_pagecache(inode, new_size);
ret = f2fs_truncate_blocks(inode, new_size, true, false);
ret = f2fs_truncate_blocks(inode, new_size, true);
up_write(&F2FS_I(inode)->i_mmap_sem);
if (!ret)
f2fs_i_size_write(inode, new_size);
......@@ -1447,7 +1442,7 @@ static int f2fs_insert_range(struct inode *inode, loff_t offset, loff_t len)
f2fs_balance_fs(sbi, true);
down_write(&F2FS_I(inode)->i_mmap_sem);
ret = f2fs_truncate_blocks(inode, i_size_read(inode), true, false);
ret = f2fs_truncate_blocks(inode, i_size_read(inode), true);
up_write(&F2FS_I(inode)->i_mmap_sem);
if (ret)
return ret;
......@@ -1651,6 +1646,8 @@ static int f2fs_ioc_getflags(struct file *filp, unsigned long arg)
flags |= F2FS_ENCRYPT_FL;
if (f2fs_has_inline_data(inode) || f2fs_has_inline_dentry(inode))
flags |= F2FS_INLINE_DATA_FL;
if (is_inode_flag_set(inode, FI_PIN_FILE))
flags |= F2FS_NOCOW_FL;
flags &= F2FS_FL_USER_VISIBLE;
......@@ -1750,10 +1747,12 @@ static int f2fs_ioc_start_atomic_write(struct file *filp)
down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
if (!get_dirty_pages(inode))
goto skip_flush;
f2fs_msg(F2FS_I_SB(inode)->sb, KERN_WARNING,
/*
* Should wait end_io to count F2FS_WB_CP_DATA correctly by
* f2fs_is_atomic_file.
*/
if (get_dirty_pages(inode))
f2fs_msg(F2FS_I_SB(inode)->sb, KERN_WARNING,
"Unexpected flush for atomic writes: ino=%lu, npages=%u",
inode->i_ino, get_dirty_pages(inode));
ret = filemap_write_and_wait_range(inode->i_mapping, 0, LLONG_MAX);
......@@ -1761,7 +1760,7 @@ static int f2fs_ioc_start_atomic_write(struct file *filp)
up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
goto out;
}
skip_flush:
set_inode_flag(inode, FI_ATOMIC_FILE);
clear_inode_flag(inode, FI_ATOMIC_REVOKE_REQUEST);
up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
......@@ -1968,11 +1967,11 @@ static int f2fs_ioc_shutdown(struct file *filp, unsigned long arg)
break;
case F2FS_GOING_DOWN_NEED_FSCK:
set_sbi_flag(sbi, SBI_NEED_FSCK);
set_sbi_flag(sbi, SBI_CP_DISABLED_QUICK);
set_sbi_flag(sbi, SBI_IS_DIRTY);
/* do checkpoint only */
ret = f2fs_sync_fs(sb, 1);
if (ret)
goto out;
break;
goto out;
default:
ret = -EINVAL;
goto out;
......@@ -1988,6 +1987,9 @@ static int f2fs_ioc_shutdown(struct file *filp, unsigned long arg)
out:
if (in != F2FS_GOING_DOWN_FULLSYNC)
mnt_drop_write_file(filp);
trace_f2fs_shutdown(sbi, in, ret);
return ret;
}
......@@ -2871,8 +2873,8 @@ static int f2fs_ioc_set_pin_file(struct file *filp, unsigned long arg)
__u32 pin;
int ret = 0;
if (!inode_owner_or_capable(inode))
return -EACCES;
if (!capable(CAP_SYS_ADMIN))
return -EPERM;
if (get_user(pin, (__u32 __user *)arg))
return -EFAULT;
......
......@@ -298,7 +298,7 @@ bool f2fs_recover_inline_data(struct inode *inode, struct page *npage)
clear_inode_flag(inode, FI_INLINE_DATA);
f2fs_put_page(ipage, 1);
} else if (ri && (ri->i_inline & F2FS_INLINE_DATA)) {
if (f2fs_truncate_blocks(inode, 0, false, false))
if (f2fs_truncate_blocks(inode, 0, false))
return false;
goto process_inline;
}
......@@ -470,7 +470,7 @@ static int f2fs_add_inline_entries(struct inode *dir, void *inline_dentry)
return 0;
punch_dentry_pages:
truncate_inode_pages(&dir->i_data, 0);
f2fs_truncate_blocks(dir, 0, false, false);
f2fs_truncate_blocks(dir, 0, false);
f2fs_remove_dirty_inode(dir);
return err;
}
......@@ -659,6 +659,12 @@ int f2fs_read_inline_dir(struct file *file, struct dir_context *ctx,
if (IS_ERR(ipage))
return PTR_ERR(ipage);
/*
* f2fs_readdir was protected by inode.i_rwsem, it is safe to access
* ipage without page's lock held.
*/
unlock_page(ipage);
inline_dentry = inline_data_addr(inode, ipage);
make_dentry_ptr_inline(inode, &d, inline_dentry);
......@@ -667,7 +673,7 @@ int f2fs_read_inline_dir(struct file *file, struct dir_context *ctx,
if (!err)
ctx->pos = d.max;
f2fs_put_page(ipage, 1);
f2fs_put_page(ipage, 0);
return err < 0 ? err : 0;
}
......
......@@ -14,6 +14,7 @@
#include "f2fs.h"
#include "node.h"
#include "segment.h"
#include "xattr.h"
#include <trace/events/f2fs.h>
......@@ -248,6 +249,20 @@ static bool sanity_check_inode(struct inode *inode, struct page *node_page)
return false;
}
if (f2fs_has_extra_attr(inode) &&
f2fs_sb_has_flexible_inline_xattr(sbi) &&
f2fs_has_inline_xattr(inode) &&
(!fi->i_inline_xattr_size ||
fi->i_inline_xattr_size > MAX_INLINE_XATTR_SIZE)) {
set_sbi_flag(sbi, SBI_NEED_FSCK);
f2fs_msg(sbi->sb, KERN_WARNING,
"%s: inode (ino=%lx) has corrupted "
"i_inline_xattr_size: %d, max: %zu",
__func__, inode->i_ino, fi->i_inline_xattr_size,
MAX_INLINE_XATTR_SIZE);
return false;
}
if (F2FS_I(inode)->extent_tree) {
struct extent_info *ei = &F2FS_I(inode)->extent_tree->largest;
......
......@@ -10,6 +10,7 @@
#include <linux/pagemap.h>
#include <linux/sched.h>
#include <linux/ctype.h>
#include <linux/random.h>
#include <linux/dcache.h>
#include <linux/namei.h>
#include <linux/quotaops.h>
......@@ -50,7 +51,7 @@ static struct inode *f2fs_new_inode(struct inode *dir, umode_t mode)
inode->i_blocks = 0;
inode->i_mtime = inode->i_atime = inode->i_ctime = current_time(inode);
F2FS_I(inode)->i_crtime = inode->i_mtime;
inode->i_generation = sbi->s_next_generation++;
inode->i_generation = prandom_u32();
if (S_ISDIR(inode->i_mode))
F2FS_I(inode)->i_current_depth = 1;
......
......@@ -1920,7 +1920,9 @@ static int f2fs_write_node_pages(struct address_space *mapping,
f2fs_balance_fs_bg(sbi);
/* collect a number of dirty node pages and write together */
if (get_pages(sbi, F2FS_DIRTY_NODES) < nr_pages_to_skip(sbi, NODE))
if (wbc->sync_mode != WB_SYNC_ALL &&
get_pages(sbi, F2FS_DIRTY_NODES) <
nr_pages_to_skip(sbi, NODE))
goto skip_write;
if (wbc->sync_mode == WB_SYNC_ALL)
......@@ -1959,7 +1961,7 @@ static int f2fs_set_node_page_dirty(struct page *page)
if (!PageDirty(page)) {
__set_page_dirty_nobuffers(page);
inc_page_count(F2FS_P_SB(page), F2FS_DIRTY_NODES);
SetPagePrivate(page);
f2fs_set_page_private(page, 0);
f2fs_trace_pid(page);
return 1;
}
......
......@@ -191,8 +191,7 @@ void f2fs_register_inmem_page(struct inode *inode, struct page *page)
f2fs_trace_pid(page);
set_page_private(page, (unsigned long)ATOMIC_WRITTEN_PAGE);
SetPagePrivate(page);
f2fs_set_page_private(page, (unsigned long)ATOMIC_WRITTEN_PAGE);
new = f2fs_kmem_cache_alloc(inmem_entry_slab, GFP_NOFS);
......@@ -215,7 +214,8 @@ void f2fs_register_inmem_page(struct inode *inode, struct page *page)
}
static int __revoke_inmem_pages(struct inode *inode,
struct list_head *head, bool drop, bool recover)
struct list_head *head, bool drop, bool recover,
bool trylock)
{
struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
struct inmem_pages *cur, *tmp;
......@@ -227,7 +227,16 @@ static int __revoke_inmem_pages(struct inode *inode,
if (drop)
trace_f2fs_commit_inmem_page(page, INMEM_DROP);
lock_page(page);
if (trylock) {
/*
* to avoid deadlock in between page lock and
* inmem_lock.
*/
if (!trylock_page(page))
continue;
} else {
lock_page(page);
}
f2fs_wait_on_page_writeback(page, DATA, true, true);
......@@ -270,8 +279,7 @@ static int __revoke_inmem_pages(struct inode *inode,
ClearPageUptodate(page);
clear_cold_data(page);
}
set_page_private(page, 0);
ClearPagePrivate(page);
f2fs_clear_page_private(page);
f2fs_put_page(page, 1);
list_del(&cur->list);
......@@ -318,13 +326,19 @@ void f2fs_drop_inmem_pages(struct inode *inode)
struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
struct f2fs_inode_info *fi = F2FS_I(inode);
mutex_lock(&fi->inmem_lock);
__revoke_inmem_pages(inode, &fi->inmem_pages, true, false);
spin_lock(&sbi->inode_lock[ATOMIC_FILE]);
if (!list_empty(&fi->inmem_ilist))
list_del_init(&fi->inmem_ilist);
spin_unlock(&sbi->inode_lock[ATOMIC_FILE]);
mutex_unlock(&fi->inmem_lock);
while (!list_empty(&fi->inmem_pages)) {
mutex_lock(&fi->inmem_lock);
__revoke_inmem_pages(inode, &fi->inmem_pages,
true, false, true);
if (list_empty(&fi->inmem_pages)) {
spin_lock(&sbi->inode_lock[ATOMIC_FILE]);
if (!list_empty(&fi->inmem_ilist))
list_del_init(&fi->inmem_ilist);
spin_unlock(&sbi->inode_lock[ATOMIC_FILE]);
}
mutex_unlock(&fi->inmem_lock);
}
clear_inode_flag(inode, FI_ATOMIC_FILE);
fi->i_gc_failures[GC_FAILURE_ATOMIC] = 0;
......@@ -354,8 +368,7 @@ void f2fs_drop_inmem_page(struct inode *inode, struct page *page)
kmem_cache_free(inmem_entry_slab, cur);
ClearPageUptodate(page);
set_page_private(page, 0);
ClearPagePrivate(page);
f2fs_clear_page_private(page);
f2fs_put_page(page, 0);
trace_f2fs_commit_inmem_page(page, INMEM_INVALIDATE);
......@@ -429,12 +442,15 @@ static int __f2fs_commit_inmem_pages(struct inode *inode)
* recovery or rewrite & commit last transaction. For other
* error number, revoking was done by filesystem itself.
*/
err = __revoke_inmem_pages(inode, &revoke_list, false, true);
err = __revoke_inmem_pages(inode, &revoke_list,
false, true, false);
/* drop all uncommitted pages */
__revoke_inmem_pages(inode, &fi->inmem_pages, true, false);
__revoke_inmem_pages(inode, &fi->inmem_pages,
true, false, false);
} else {
__revoke_inmem_pages(inode, &revoke_list, false, false);
__revoke_inmem_pages(inode, &revoke_list,
false, false, false);
}
return err;
......@@ -542,9 +558,13 @@ void f2fs_balance_fs_bg(struct f2fs_sb_info *sbi)
static int __submit_flush_wait(struct f2fs_sb_info *sbi,
struct block_device *bdev)
{
struct bio *bio = f2fs_bio_alloc(sbi, 0, true);
struct bio *bio;
int ret;
bio = f2fs_bio_alloc(sbi, 0, false);
if (!bio)
return -ENOMEM;
bio->bi_opf = REQ_OP_WRITE | REQ_SYNC | REQ_PREFLUSH;
bio_set_dev(bio, bdev);
ret = submit_bio_wait(bio);
......@@ -868,6 +888,9 @@ int f2fs_disable_cp_again(struct f2fs_sb_info *sbi)
if (holes[DATA] > ovp || holes[NODE] > ovp)
return -EAGAIN;
if (is_sbi_flag_set(sbi, SBI_CP_DISABLED_QUICK) &&
dirty_segments(sbi) > overprovision_segments(sbi))
return -EAGAIN;
return 0;
}
......@@ -1037,6 +1060,7 @@ static void __init_discard_policy(struct f2fs_sb_info *sbi,
dpolicy->max_requests = DEF_MAX_DISCARD_REQUEST;
dpolicy->io_aware_gran = MAX_PLIST_NUM;
dpolicy->timeout = 0;
if (discard_type == DPOLICY_BG) {
dpolicy->min_interval = DEF_MIN_DISCARD_ISSUE_TIME;
......@@ -1059,6 +1083,8 @@ static void __init_discard_policy(struct f2fs_sb_info *sbi,
} else if (discard_type == DPOLICY_UMOUNT) {
dpolicy->max_requests = UINT_MAX;
dpolicy->io_aware = false;
/* we need to issue all to keep CP_TRIMMED_FLAG */
dpolicy->granularity = 1;
}
}
......@@ -1424,7 +1450,14 @@ static int __issue_discard_cmd(struct f2fs_sb_info *sbi,
int i, issued = 0;
bool io_interrupted = false;
if (dpolicy->timeout != 0)
f2fs_update_time(sbi, dpolicy->timeout);
for (i = MAX_PLIST_NUM - 1; i >= 0; i--) {
if (dpolicy->timeout != 0 &&
f2fs_time_over(sbi, dpolicy->timeout))
break;
if (i + 1 < dpolicy->granularity)
break;
......@@ -1611,7 +1644,7 @@ void f2fs_stop_discard_thread(struct f2fs_sb_info *sbi)
}
/* This comes from f2fs_put_super */
bool f2fs_wait_discard_bios(struct f2fs_sb_info *sbi)
bool f2fs_issue_discard_timeout(struct f2fs_sb_info *sbi)
{
struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
struct discard_policy dpolicy;
......@@ -1619,6 +1652,7 @@ bool f2fs_wait_discard_bios(struct f2fs_sb_info *sbi)
__init_discard_policy(sbi, &dpolicy, DPOLICY_UMOUNT,
dcc->discard_granularity);
dpolicy.timeout = UMOUNT_DISCARD_TIMEOUT;
__issue_discard_cmd(sbi, &dpolicy);
dropped = __drop_discard_cmd(sbi);
......@@ -3164,10 +3198,10 @@ int f2fs_inplace_write_data(struct f2fs_io_info *fio)
stat_inc_inplace_blocks(fio->sbi);
err = f2fs_submit_page_bio(fio);
if (!err)
if (!err) {
update_device_state(fio);
f2fs_update_iostat(fio->sbi, fio->io_type, F2FS_BLKSIZE);
f2fs_update_iostat(fio->sbi, fio->io_type, F2FS_BLKSIZE);
}
return err;
}
......
......@@ -865,7 +865,7 @@ static inline void wake_up_discard_thread(struct f2fs_sb_info *sbi, bool force)
}
}
mutex_unlock(&dcc->cmd_lock);
if (!wakeup)
if (!wakeup || !is_idle(sbi, DISCARD_TIME))
return;
wake_up:
dcc->discard_wake = 1;
......
......@@ -269,7 +269,7 @@ static int f2fs_set_qf_name(struct super_block *sb, int qtype,
if (!qname) {
f2fs_msg(sb, KERN_ERR,
"Not enough memory for storing quotafile name");
return -EINVAL;
return -ENOMEM;
}
if (F2FS_OPTION(sbi).s_qf_names[qtype]) {
if (strcmp(F2FS_OPTION(sbi).s_qf_names[qtype], qname) == 0)
......@@ -586,7 +586,7 @@ static int parse_options(struct super_block *sb, char *options)
case Opt_io_size_bits:
if (args->from && match_int(args, &arg))
return -EINVAL;
if (arg > __ilog2_u32(BIO_MAX_PAGES)) {
if (arg <= 0 || arg > __ilog2_u32(BIO_MAX_PAGES)) {
f2fs_msg(sb, KERN_WARNING,
"Not support %d, larger than %d",
1 << arg, BIO_MAX_PAGES);
......@@ -821,6 +821,8 @@ static int parse_options(struct super_block *sb, char *options)
}
if (test_opt(sbi, INLINE_XATTR_SIZE)) {
int min_size, max_size;
if (!f2fs_sb_has_extra_attr(sbi) ||
!f2fs_sb_has_flexible_inline_xattr(sbi)) {
f2fs_msg(sb, KERN_ERR,
......@@ -834,14 +836,15 @@ static int parse_options(struct super_block *sb, char *options)
"set with inline_xattr option");
return -EINVAL;
}
if (!F2FS_OPTION(sbi).inline_xattr_size ||
F2FS_OPTION(sbi).inline_xattr_size >=
DEF_ADDRS_PER_INODE -
F2FS_TOTAL_EXTRA_ATTR_SIZE -
DEF_INLINE_RESERVED_SIZE -
DEF_MIN_INLINE_SIZE) {
min_size = sizeof(struct f2fs_xattr_header) / sizeof(__le32);
max_size = MAX_INLINE_XATTR_SIZE;
if (F2FS_OPTION(sbi).inline_xattr_size < min_size ||
F2FS_OPTION(sbi).inline_xattr_size > max_size) {
f2fs_msg(sb, KERN_ERR,
"inline xattr size is out of range");
"inline xattr size is out of range: %d ~ %d",
min_size, max_size);
return -EINVAL;
}
}
......@@ -915,6 +918,10 @@ static int f2fs_drop_inode(struct inode *inode)
sb_start_intwrite(inode->i_sb);
f2fs_i_size_write(inode, 0);
f2fs_submit_merged_write_cond(F2FS_I_SB(inode),
inode, NULL, 0, DATA);
truncate_inode_pages_final(inode->i_mapping);
if (F2FS_HAS_BLOCKS(inode))
f2fs_truncate(inode);
......@@ -1048,7 +1055,7 @@ static void f2fs_put_super(struct super_block *sb)
}
/* be sure to wait for any on-going discard commands */
dropped = f2fs_wait_discard_bios(sbi);
dropped = f2fs_issue_discard_timeout(sbi);
if ((f2fs_hw_support_discard(sbi) || f2fs_hw_should_discard(sbi)) &&
!sbi->discard_blks && !dropped) {
......@@ -1075,7 +1082,10 @@ static void f2fs_put_super(struct super_block *sb)
f2fs_bug_on(sbi, sbi->fsync_node_num);
iput(sbi->node_inode);
sbi->node_inode = NULL;
iput(sbi->meta_inode);
sbi->meta_inode = NULL;
/*
* iput() can update stat information, if f2fs_write_checkpoint()
......@@ -1455,9 +1465,16 @@ static int f2fs_enable_quotas(struct super_block *sb);
static int f2fs_disable_checkpoint(struct f2fs_sb_info *sbi)
{
unsigned int s_flags = sbi->sb->s_flags;
struct cp_control cpc;
int err;
int err = 0;
int ret;
if (s_flags & SB_RDONLY) {
f2fs_msg(sbi->sb, KERN_ERR,
"checkpoint=disable on readonly fs");
return -EINVAL;
}
sbi->sb->s_flags |= SB_ACTIVE;
f2fs_update_time(sbi, DISABLE_TIME);
......@@ -1465,18 +1482,24 @@ static int f2fs_disable_checkpoint(struct f2fs_sb_info *sbi)
while (!f2fs_time_over(sbi, DISABLE_TIME)) {
mutex_lock(&sbi->gc_mutex);
err = f2fs_gc(sbi, true, false, NULL_SEGNO);
if (err == -ENODATA)
if (err == -ENODATA) {
err = 0;
break;
}
if (err && err != -EAGAIN)
return err;
break;
}
err = sync_filesystem(sbi->sb);
if (err)
return err;
ret = sync_filesystem(sbi->sb);
if (ret || err) {
err = ret ? ret: err;
goto restore_flag;
}
if (f2fs_disable_cp_again(sbi))
return -EAGAIN;
if (f2fs_disable_cp_again(sbi)) {
err = -EAGAIN;
goto restore_flag;
}
mutex_lock(&sbi->gc_mutex);
cpc.reason = CP_PAUSE;
......@@ -1485,7 +1508,9 @@ static int f2fs_disable_checkpoint(struct f2fs_sb_info *sbi)
sbi->unusable_block_count = 0;
mutex_unlock(&sbi->gc_mutex);
return 0;
restore_flag:
sbi->sb->s_flags = s_flags; /* Restore MS_RDONLY status */
return err;
}
static void f2fs_enable_checkpoint(struct f2fs_sb_info *sbi)
......@@ -2023,6 +2048,12 @@ void f2fs_quota_off_umount(struct super_block *sb)
set_sbi_flag(F2FS_SB(sb), SBI_QUOTA_NEED_REPAIR);
}
}
/*
* In case of checkpoint=disable, we must flush quota blocks.
* This can cause NULL exception for node_inode in end_io, since
* put_super already dropped it.
*/
sync_filesystem(sb);
}
static void f2fs_truncate_quota_inode_pages(struct super_block *sb)
......@@ -2703,6 +2734,8 @@ static void init_sb_info(struct f2fs_sb_info *sbi)
sbi->interval_time[DISCARD_TIME] = DEF_IDLE_INTERVAL;
sbi->interval_time[GC_TIME] = DEF_IDLE_INTERVAL;
sbi->interval_time[DISABLE_TIME] = DEF_DISABLE_INTERVAL;
sbi->interval_time[UMOUNT_DISCARD_TIMEOUT] =
DEF_UMOUNT_DISCARD_TIMEOUT;
clear_sbi_flag(sbi, SBI_NEED_FSCK);
for (i = 0; i < NR_COUNT_TYPE; i++)
......@@ -3022,10 +3055,11 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent)
struct f2fs_super_block *raw_super;
struct inode *root;
int err;
bool retry = true, need_fsck = false;
bool skip_recovery = false, need_fsck = false;
char *options = NULL;
int recovery, i, valid_super_block;
struct curseg_info *seg_i;
int retry_cnt = 1;
try_onemore:
err = -EINVAL;
......@@ -3097,7 +3131,6 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent)
sb->s_maxbytes = sbi->max_file_blocks <<
le32_to_cpu(raw_super->log_blocksize);
sb->s_max_links = F2FS_LINK_MAX;
get_random_bytes(&sbi->s_next_generation, sizeof(u32));
#ifdef CONFIG_QUOTA
sb->dq_op = &f2fs_quota_operations;
......@@ -3200,6 +3233,10 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent)
if (__is_set_ckpt_flags(F2FS_CKPT(sbi), CP_QUOTA_NEED_FSCK_FLAG))
set_sbi_flag(sbi, SBI_QUOTA_NEED_REPAIR);
if (__is_set_ckpt_flags(F2FS_CKPT(sbi), CP_DISABLED_QUICK_FLAG)) {
set_sbi_flag(sbi, SBI_CP_DISABLED_QUICK);
sbi->interval_time[DISABLE_TIME] = DEF_DISABLE_QUICK_INTERVAL;
}
/* Initialize device list */
err = f2fs_scan_devices(sbi);
......@@ -3288,7 +3325,7 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent)
sb->s_root = d_make_root(root); /* allocate root dentry */
if (!sb->s_root) {
err = -ENOMEM;
goto free_root_inode;
goto free_node_inode;
}
err = f2fs_register_sysfs(sbi);
......@@ -3310,7 +3347,7 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent)
goto free_meta;
if (unlikely(is_set_ckpt_flags(sbi, CP_DISABLED_FLAG)))
goto skip_recovery;
goto reset_checkpoint;
/* recover fsynced data */
if (!test_opt(sbi, DISABLE_ROLL_FORWARD)) {
......@@ -3327,11 +3364,13 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent)
if (need_fsck)
set_sbi_flag(sbi, SBI_NEED_FSCK);
if (!retry)
goto skip_recovery;
if (skip_recovery)
goto reset_checkpoint;
err = f2fs_recover_fsync_data(sbi, false);
if (err < 0) {
if (err != -ENOMEM)
skip_recovery = true;
need_fsck = true;
f2fs_msg(sb, KERN_ERR,
"Cannot recover all fsync data errno=%d", err);
......@@ -3347,14 +3386,14 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent)
goto free_meta;
}
}
skip_recovery:
reset_checkpoint:
/* f2fs_recover_fsync_data() cleared this already */
clear_sbi_flag(sbi, SBI_POR_DOING);
if (test_opt(sbi, DISABLE_CHECKPOINT)) {
err = f2fs_disable_checkpoint(sbi);
if (err)
goto free_meta;
goto sync_free_meta;
} else if (is_set_ckpt_flags(sbi, CP_DISABLED_FLAG)) {
f2fs_enable_checkpoint(sbi);
}
......@@ -3367,7 +3406,7 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent)
/* After POR, we can run background GC thread.*/
err = f2fs_start_gc_thread(sbi);
if (err)
goto free_meta;
goto sync_free_meta;
}
kvfree(options);
......@@ -3387,8 +3426,14 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent)
cur_cp_version(F2FS_CKPT(sbi)));
f2fs_update_time(sbi, CP_TIME);
f2fs_update_time(sbi, REQ_TIME);
clear_sbi_flag(sbi, SBI_CP_DISABLED_QUICK);
return 0;
sync_free_meta:
/* safe to flush all the data */
sync_filesystem(sbi->sb);
retry_cnt = 0;
free_meta:
#ifdef CONFIG_QUOTA
f2fs_truncate_quota_inode_pages(sb);
......@@ -3402,6 +3447,8 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent)
* falls into an infinite loop in f2fs_sync_meta_pages().
*/
truncate_inode_pages_final(META_MAPPING(sbi));
/* evict some inodes being cached by GC */
evict_inodes(sb);
f2fs_unregister_sysfs(sbi);
free_root_inode:
dput(sb->s_root);
......@@ -3410,6 +3457,7 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent)
f2fs_release_ino_entry(sbi, true);
truncate_inode_pages_final(NODE_MAPPING(sbi));
iput(sbi->node_inode);
sbi->node_inode = NULL;
free_stats:
f2fs_destroy_stats(sbi);
free_nm:
......@@ -3422,6 +3470,7 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent)
free_meta_inode:
make_bad_inode(sbi->meta_inode);
iput(sbi->meta_inode);
sbi->meta_inode = NULL;
free_io_dummy:
mempool_destroy(sbi->write_io_dummy);
free_percpu:
......@@ -3443,8 +3492,8 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent)
kvfree(sbi);
/* give only one another chance */
if (retry) {
retry = false;
if (retry_cnt > 0 && skip_recovery) {
retry_cnt--;
shrink_dcache_sb(sb);
goto try_onemore;
}
......
......@@ -222,6 +222,8 @@ static ssize_t __sbi_store(struct f2fs_attr *a,
#ifdef CONFIG_F2FS_FAULT_INJECTION
if (a->struct_type == FAULT_INFO_TYPE && t >= (1 << FAULT_MAX))
return -EINVAL;
if (a->struct_type == FAULT_INFO_RATE && t >= UINT_MAX)
return -EINVAL;
#endif
if (a->struct_type == RESERVED_BLOCKS) {
spin_lock(&sbi->stat_lock);
......@@ -278,10 +280,16 @@ static ssize_t __sbi_store(struct f2fs_attr *a,
return count;
}
*ui = t;
if (!strcmp(a->attr.name, "iostat_enable") && *ui == 0)
f2fs_reset_iostat(sbi);
if (!strcmp(a->attr.name, "iostat_enable")) {
sbi->iostat_enable = !!t;
if (!sbi->iostat_enable)
f2fs_reset_iostat(sbi);
return count;
}
*ui = (unsigned int)t;
return count;
}
......@@ -418,6 +426,8 @@ F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, idle_interval, interval_time[REQ_TIME]);
F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, discard_idle_interval,
interval_time[DISCARD_TIME]);
F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, gc_idle_interval, interval_time[GC_TIME]);
F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info,
umount_discard_timeout, interval_time[UMOUNT_DISCARD_TIMEOUT]);
F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, iostat_enable, iostat_enable);
F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, readdir_ra, readdir_ra);
F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, gc_pin_file_thresh, gc_pin_file_threshold);
......@@ -475,6 +485,7 @@ static struct attribute *f2fs_attrs[] = {
ATTR_LIST(idle_interval),
ATTR_LIST(discard_idle_interval),
ATTR_LIST(gc_idle_interval),
ATTR_LIST(umount_discard_timeout),
ATTR_LIST(iostat_enable),
ATTR_LIST(readdir_ra),
ATTR_LIST(gc_pin_file_thresh),
......
......@@ -14,7 +14,7 @@
#include "trace.h"
static RADIX_TREE(pids, GFP_ATOMIC);
static struct mutex pids_lock;
static spinlock_t pids_lock;
static struct last_io_info last_io;
static inline void __print_last_io(void)
......@@ -58,23 +58,29 @@ void f2fs_trace_pid(struct page *page)
set_page_private(page, (unsigned long)pid);
retry:
if (radix_tree_preload(GFP_NOFS))
return;
mutex_lock(&pids_lock);
spin_lock(&pids_lock);
p = radix_tree_lookup(&pids, pid);
if (p == current)
goto out;
if (p)
radix_tree_delete(&pids, pid);
f2fs_radix_tree_insert(&pids, pid, current);
if (radix_tree_insert(&pids, pid, current)) {
spin_unlock(&pids_lock);
radix_tree_preload_end();
cond_resched();
goto retry;
}
trace_printk("%3x:%3x %4x %-16s\n",
MAJOR(inode->i_sb->s_dev), MINOR(inode->i_sb->s_dev),
pid, current->comm);
out:
mutex_unlock(&pids_lock);
spin_unlock(&pids_lock);
radix_tree_preload_end();
}
......@@ -119,7 +125,7 @@ void f2fs_trace_ios(struct f2fs_io_info *fio, int flush)
void f2fs_build_trace_ios(void)
{
mutex_init(&pids_lock);
spin_lock_init(&pids_lock);
}
#define PIDVEC_SIZE 128
......@@ -147,7 +153,7 @@ void f2fs_destroy_trace_ios(void)
pid_t next_pid = 0;
unsigned int found;
mutex_lock(&pids_lock);
spin_lock(&pids_lock);
while ((found = gang_lookup_pids(pid, next_pid, PIDVEC_SIZE))) {
unsigned idx;
......@@ -155,5 +161,5 @@ void f2fs_destroy_trace_ios(void)
for (idx = 0; idx < found; idx++)
radix_tree_delete(&pids, pid[idx]);
}
mutex_unlock(&pids_lock);
spin_unlock(&pids_lock);
}
......@@ -224,11 +224,11 @@ static struct f2fs_xattr_entry *__find_inline_xattr(struct inode *inode,
{
struct f2fs_xattr_entry *entry;
unsigned int inline_size = inline_xattr_size(inode);
void *max_addr = base_addr + inline_size;
list_for_each_xattr(entry, base_addr) {
if ((void *)entry + sizeof(__u32) > base_addr + inline_size ||
(void *)XATTR_NEXT_ENTRY(entry) + sizeof(__u32) >
base_addr + inline_size) {
if ((void *)entry + sizeof(__u32) > max_addr ||
(void *)XATTR_NEXT_ENTRY(entry) > max_addr) {
*last_addr = entry;
return NULL;
}
......@@ -239,6 +239,13 @@ static struct f2fs_xattr_entry *__find_inline_xattr(struct inode *inode,
if (!memcmp(entry->e_name, name, len))
break;
}
/* inline xattr header or entry across max inline xattr size */
if (IS_XATTR_LAST_ENTRY(entry) &&
(void *)entry + sizeof(__u32) > max_addr) {
*last_addr = entry;
return NULL;
}
return entry;
}
......@@ -340,7 +347,7 @@ static int lookup_all_xattrs(struct inode *inode, struct page *ipage,
*base_addr = txattr_addr;
return 0;
out:
kzfree(txattr_addr);
kvfree(txattr_addr);
return err;
}
......@@ -383,7 +390,7 @@ static int read_all_xattrs(struct inode *inode, struct page *ipage,
*base_addr = txattr_addr;
return 0;
fail:
kzfree(txattr_addr);
kvfree(txattr_addr);
return err;
}
......@@ -510,7 +517,7 @@ int f2fs_getxattr(struct inode *inode, int index, const char *name,
}
error = size;
out:
kzfree(base_addr);
kvfree(base_addr);
return error;
}
......@@ -538,7 +545,7 @@ ssize_t f2fs_listxattr(struct dentry *dentry, char *buffer, size_t buffer_size)
if (!handler || (handler->list && !handler->list(dentry)))
continue;
prefix = handler->prefix ?: handler->name;
prefix = xattr_prefix(handler);
prefix_len = strlen(prefix);
size = prefix_len + entry->e_name_len + 1;
if (buffer) {
......@@ -556,7 +563,7 @@ ssize_t f2fs_listxattr(struct dentry *dentry, char *buffer, size_t buffer_size)
}
error = buffer_size - rest;
cleanup:
kzfree(base_addr);
kvfree(base_addr);
return error;
}
......@@ -687,7 +694,7 @@ static int __f2fs_setxattr(struct inode *inode, int index,
if (!error && S_ISDIR(inode->i_mode))
set_sbi_flag(F2FS_I_SB(inode), SBI_NEED_CP);
exit:
kzfree(base_addr);
kvfree(base_addr);
return error;
}
......
......@@ -78,6 +78,12 @@ struct f2fs_xattr_entry {
sizeof(struct f2fs_xattr_header) - \
sizeof(struct f2fs_xattr_entry))
#define MAX_INLINE_XATTR_SIZE \
(DEF_ADDRS_PER_INODE - \
F2FS_TOTAL_EXTRA_ATTR_SIZE / sizeof(__le32) - \
DEF_INLINE_RESERVED_SIZE - \
MIN_INLINE_DENTRY_SIZE / sizeof(__le32))
/*
* On-disk structure of f2fs_xattr
* We use inline xattrs space + 1 block for xattr.
......
......@@ -116,6 +116,7 @@ struct f2fs_super_block {
/*
* For checkpoint
*/
#define CP_DISABLED_QUICK_FLAG 0x00002000
#define CP_DISABLED_FLAG 0x00001000
#define CP_QUOTA_NEED_FSCK_FLAG 0x00000800
#define CP_LARGE_NAT_BITMAP_FLAG 0x00000400
......@@ -186,7 +187,7 @@ struct f2fs_orphan_block {
struct f2fs_extent {
__le32 fofs; /* start file offset of the extent */
__le32 blk; /* start block address of the extent */
__le32 len; /* lengh of the extent */
__le32 len; /* length of the extent */
} __packed;
#define F2FS_NAME_LEN 255
......@@ -284,7 +285,7 @@ enum {
struct node_footer {
__le32 nid; /* node id */
__le32 ino; /* inode nunmber */
__le32 ino; /* inode number */
__le32 flag; /* include cold/fsync/dentry marks and offset */
__le64 cp_ver; /* checkpoint version */
__le32 next_blkaddr; /* next node page block address */
......@@ -489,12 +490,12 @@ typedef __le32 f2fs_hash_t;
/*
* space utilization of regular dentry and inline dentry (w/o extra reservation)
* regular dentry inline dentry
* bitmap 1 * 27 = 27 1 * 23 = 23
* reserved 1 * 3 = 3 1 * 7 = 7
* dentry 11 * 214 = 2354 11 * 182 = 2002
* filename 8 * 214 = 1712 8 * 182 = 1456
* total 4096 3488
* regular dentry inline dentry (def) inline dentry (min)
* bitmap 1 * 27 = 27 1 * 23 = 23 1 * 1 = 1
* reserved 1 * 3 = 3 1 * 7 = 7 1 * 1 = 1
* dentry 11 * 214 = 2354 11 * 182 = 2002 11 * 2 = 22
* filename 8 * 214 = 1712 8 * 182 = 1456 8 * 2 = 16
* total 4096 3488 40
*
* Note: there are more reserved space in inline dentry than in regular
* dentry, when converting inline dentry we should handle this carefully.
......@@ -506,12 +507,13 @@ typedef __le32 f2fs_hash_t;
#define SIZE_OF_RESERVED (PAGE_SIZE - ((SIZE_OF_DIR_ENTRY + \
F2FS_SLOT_LEN) * \
NR_DENTRY_IN_BLOCK + SIZE_OF_DENTRY_BITMAP))
#define MIN_INLINE_DENTRY_SIZE 40 /* just include '.' and '..' entries */
/* One directory entry slot representing F2FS_SLOT_LEN-sized file name */
struct f2fs_dir_entry {
__le32 hash_code; /* hash code of file name */
__le32 ino; /* inode number */
__le16 name_len; /* lengh of file name */
__le16 name_len; /* length of file name */
__u8 file_type; /* file type */
} __packed;
......
......@@ -149,6 +149,17 @@ TRACE_DEFINE_ENUM(CP_TRIMMED);
{ CP_SPEC_LOG_NUM, "log type is 2" }, \
{ CP_RECOVER_DIR, "dir needs recovery" })
#define show_shutdown_mode(type) \
__print_symbolic(type, \
{ F2FS_GOING_DOWN_FULLSYNC, "full sync" }, \
{ F2FS_GOING_DOWN_METASYNC, "meta sync" }, \
{ F2FS_GOING_DOWN_NOSYNC, "no sync" }, \
{ F2FS_GOING_DOWN_METAFLUSH, "meta flush" }, \
{ F2FS_GOING_DOWN_NEED_FSCK, "need fsck" })
struct f2fs_sb_info;
struct f2fs_io_info;
struct extent_info;
struct victim_sel_policy;
struct f2fs_map_blocks;
......@@ -533,6 +544,9 @@ TRACE_EVENT(f2fs_map_blocks,
__field(block_t, m_lblk)
__field(block_t, m_pblk)
__field(unsigned int, m_len)
__field(unsigned int, m_flags)
__field(int, m_seg_type)
__field(bool, m_may_create)
__field(int, ret)
),
......@@ -542,15 +556,22 @@ TRACE_EVENT(f2fs_map_blocks,
__entry->m_lblk = map->m_lblk;
__entry->m_pblk = map->m_pblk;
__entry->m_len = map->m_len;
__entry->m_flags = map->m_flags;
__entry->m_seg_type = map->m_seg_type;
__entry->m_may_create = map->m_may_create;
__entry->ret = ret;
),
TP_printk("dev = (%d,%d), ino = %lu, file offset = %llu, "
"start blkaddr = 0x%llx, len = 0x%llx, err = %d",
"start blkaddr = 0x%llx, len = 0x%llx, flags = %u,"
"seg_type = %d, may_create = %d, err = %d",
show_dev_ino(__entry),
(unsigned long long)__entry->m_lblk,
(unsigned long long)__entry->m_pblk,
(unsigned long long)__entry->m_len,
__entry->m_flags,
__entry->m_seg_type,
__entry->m_may_create,
__entry->ret)
);
......@@ -1616,6 +1637,30 @@ DEFINE_EVENT(f2fs_sync_dirty_inodes, f2fs_sync_dirty_inodes_exit,
TP_ARGS(sb, type, count)
);
TRACE_EVENT(f2fs_shutdown,
TP_PROTO(struct f2fs_sb_info *sbi, unsigned int mode, int ret),
TP_ARGS(sbi, mode, ret),
TP_STRUCT__entry(
__field(dev_t, dev)
__field(unsigned int, mode)
__field(int, ret)
),
TP_fast_assign(
__entry->dev = sbi->sb->s_dev;
__entry->mode = mode;
__entry->ret = ret;
),
TP_printk("dev = (%d,%d), mode: %s, ret:%d",
show_dev(__entry->dev),
show_shutdown_mode(__entry->mode),
__entry->ret)
);
#endif /* _TRACE_F2FS_H */
/* This part must be outside protection */
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册