提交 f01c30de 编写于 作者: L Linus Torvalds

Merge tag 'vfs-5.10-fixes-2' of git://git.kernel.org/pub/scm/fs/xfs/xfs-linux

Pull fs freeze fix and cleanups from Darrick Wong:
 "A single vfs fix for 5.10, along with two subsequent cleanups.

  A very long time ago, a hack was added to the vfs fs freeze protection
  code to work around lockdep complaints about XFS, which would try to
  run a transaction (which requires intwrite protection) to finalize an
  xfs freeze (by which time the vfs had already taken intwrite).

  Fast forward a few years, and XFS fixed the recursive intwrite problem
  on its own, and the hack became unnecessary. Fast forward almost a
  decade, and latent bugs in the code converting this hack from freeze
  flags to freeze locks combine with lockdep bugs to make this reproduce
  frequently enough to notice page faults racing with freeze.

  Since the hack is unnecessary and causes thread race errors, just get
  rid of it completely. Making this kind of vfs change midway through a
  cycle makes me nervous, but a large enough number of the usual
  VFS/ext4/XFS/btrfs suspects have said this looks good and solves a
  real problem vector.

  And once that removal is done, __sb_start_write is now simple enough
  that it becomes possible to refactor the function into smaller,
  simpler static inline helpers in linux/fs.h. The cleanup is
  straightforward.

  Summary:

   - Finally remove the "convert to trylock" weirdness in the fs freezer
     code. It was necessary 10 years ago to deal with nested
     transactions in XFS, but we've long since removed that; and now
     this is causing subtle race conditions when lockdep goes offline
     and sb_start_* aren't prepared to retry a trylock failure.

   - Minor cleanups of the sb_start_* fs freeze helpers"

* tag 'vfs-5.10-fixes-2' of git://git.kernel.org/pub/scm/fs/xfs/xfs-linux:
  vfs: move __sb_{start,end}_write* to fs.h
  vfs: separate __sb_start_write into blocking and non-blocking helpers
  vfs: remove lockdep bogosity in __sb_start_write
......@@ -1572,7 +1572,7 @@ static int aio_write(struct kiocb *req, const struct iocb *iocb,
* we return to userspace.
*/
if (S_ISREG(file_inode(file)->i_mode)) {
__sb_start_write(file_inode(file)->i_sb, SB_FREEZE_WRITE, true);
sb_start_write(file_inode(file)->i_sb);
__sb_writers_release(file_inode(file)->i_sb, SB_FREEZE_WRITE);
}
req->ki_flags |= IOCB_WRITE;
......
......@@ -3547,8 +3547,7 @@ static int io_write(struct io_kiocb *req, bool force_nonblock,
* we return to userspace.
*/
if (req->flags & REQ_F_ISREG) {
__sb_start_write(file_inode(req->file)->i_sb,
SB_FREEZE_WRITE, true);
sb_start_write(file_inode(req->file)->i_sb);
__sb_writers_release(file_inode(req->file)->i_sb,
SB_FREEZE_WRITE);
}
......
......@@ -1631,55 +1631,6 @@ int super_setup_bdi(struct super_block *sb)
}
EXPORT_SYMBOL(super_setup_bdi);
/*
* This is an internal function, please use sb_end_{write,pagefault,intwrite}
* instead.
*/
void __sb_end_write(struct super_block *sb, int level)
{
percpu_up_read(sb->s_writers.rw_sem + level-1);
}
EXPORT_SYMBOL(__sb_end_write);
/*
* This is an internal function, please use sb_start_{write,pagefault,intwrite}
* instead.
*/
int __sb_start_write(struct super_block *sb, int level, bool wait)
{
bool force_trylock = false;
int ret = 1;
#ifdef CONFIG_LOCKDEP
/*
* We want lockdep to tell us about possible deadlocks with freezing
* but it's it bit tricky to properly instrument it. Getting a freeze
* protection works as getting a read lock but there are subtle
* problems. XFS for example gets freeze protection on internal level
* twice in some cases, which is OK only because we already hold a
* freeze protection also on higher level. Due to these cases we have
* to use wait == F (trylock mode) which must not fail.
*/
if (wait) {
int i;
for (i = 0; i < level - 1; i++)
if (percpu_rwsem_is_held(sb->s_writers.rw_sem + i)) {
force_trylock = true;
break;
}
}
#endif
if (wait && !force_trylock)
percpu_down_read(sb->s_writers.rw_sem + level-1);
else
ret = percpu_down_read_trylock(sb->s_writers.rw_sem + level-1);
WARN_ON(force_trylock && !ret);
return ret;
}
EXPORT_SYMBOL(__sb_start_write);
/**
* sb_wait_write - wait until all writers to given file system finish
* @sb: the super for which we wait
......
......@@ -1580,8 +1580,24 @@ extern struct timespec64 current_time(struct inode *inode);
* Snapshotting support.
*/
void __sb_end_write(struct super_block *sb, int level);
int __sb_start_write(struct super_block *sb, int level, bool wait);
/*
* These are internal functions, please use sb_start_{write,pagefault,intwrite}
* instead.
*/
static inline void __sb_end_write(struct super_block *sb, int level)
{
percpu_up_read(sb->s_writers.rw_sem + level-1);
}
static inline void __sb_start_write(struct super_block *sb, int level)
{
percpu_down_read(sb->s_writers.rw_sem + level - 1);
}
static inline bool __sb_start_write_trylock(struct super_block *sb, int level)
{
return percpu_down_read_trylock(sb->s_writers.rw_sem + level - 1);
}
#define __sb_writers_acquired(sb, lev) \
percpu_rwsem_acquire(&(sb)->s_writers.rw_sem[(lev)-1], 1, _THIS_IP_)
......@@ -1645,12 +1661,12 @@ static inline void sb_end_intwrite(struct super_block *sb)
*/
static inline void sb_start_write(struct super_block *sb)
{
__sb_start_write(sb, SB_FREEZE_WRITE, true);
__sb_start_write(sb, SB_FREEZE_WRITE);
}
static inline int sb_start_write_trylock(struct super_block *sb)
static inline bool sb_start_write_trylock(struct super_block *sb)
{
return __sb_start_write(sb, SB_FREEZE_WRITE, false);
return __sb_start_write_trylock(sb, SB_FREEZE_WRITE);
}
/**
......@@ -1674,7 +1690,7 @@ static inline int sb_start_write_trylock(struct super_block *sb)
*/
static inline void sb_start_pagefault(struct super_block *sb)
{
__sb_start_write(sb, SB_FREEZE_PAGEFAULT, true);
__sb_start_write(sb, SB_FREEZE_PAGEFAULT);
}
/*
......@@ -1692,12 +1708,12 @@ static inline void sb_start_pagefault(struct super_block *sb)
*/
static inline void sb_start_intwrite(struct super_block *sb)
{
__sb_start_write(sb, SB_FREEZE_FS, true);
__sb_start_write(sb, SB_FREEZE_FS);
}
static inline int sb_start_intwrite_trylock(struct super_block *sb)
static inline bool sb_start_intwrite_trylock(struct super_block *sb)
{
return __sb_start_write(sb, SB_FREEZE_FS, false);
return __sb_start_write_trylock(sb, SB_FREEZE_FS);
}
......@@ -2756,14 +2772,14 @@ static inline void file_start_write(struct file *file)
{
if (!S_ISREG(file_inode(file)->i_mode))
return;
__sb_start_write(file_inode(file)->i_sb, SB_FREEZE_WRITE, true);
sb_start_write(file_inode(file)->i_sb);
}
static inline bool file_start_write_trylock(struct file *file)
{
if (!S_ISREG(file_inode(file)->i_mode))
return true;
return __sb_start_write(file_inode(file)->i_sb, SB_FREEZE_WRITE, false);
return sb_start_write_trylock(file_inode(file)->i_sb);
}
static inline void file_end_write(struct file *file)
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册