提交 75b96f0e 编写于 作者: L Linus Torvalds

Merge tag 'fuse-update-5.15' of git://git.kernel.org/pub/scm/linux/kernel/git/mszeredi/fuse

Pull fuse updates from Miklos Szeredi:

 - Allow mounting an active fuse device. Previously the fuse device
   would always be mounted during initialization, and sharing a fuse
   superblock was only possible through mount or namespace cloning

 - Fix data flushing in syncfs (virtiofs only)

 - Fix data flushing in copy_file_range()

 - Fix a possible deadlock in atomic O_TRUNC

 - Misc fixes and cleanups

* tag 'fuse-update-5.15' of git://git.kernel.org/pub/scm/linux/kernel/git/mszeredi/fuse:
  fuse: remove unused arg in fuse_write_file_get()
  fuse: wait for writepages in syncfs
  fuse: flush extending writes
  fuse: truncate pagecache on atomic_o_trunc
  fuse: allow sharing existing sb
  fuse: move fget() to fuse_get_tree()
  fuse: move option checking into fuse_fill_super()
  fuse: name fs_context consistently
  fuse: fix use after free in fuse_read_interrupt()
......@@ -328,7 +328,7 @@ void fuse_ctl_remove_conn(struct fuse_conn *fc)
drop_nlink(d_inode(fuse_control_sb->s_root));
}
static int fuse_ctl_fill_super(struct super_block *sb, struct fs_context *fctx)
static int fuse_ctl_fill_super(struct super_block *sb, struct fs_context *fsc)
{
static const struct tree_descr empty_descr = {""};
struct fuse_conn *fc;
......@@ -354,18 +354,18 @@ static int fuse_ctl_fill_super(struct super_block *sb, struct fs_context *fctx)
return 0;
}
static int fuse_ctl_get_tree(struct fs_context *fc)
static int fuse_ctl_get_tree(struct fs_context *fsc)
{
return get_tree_single(fc, fuse_ctl_fill_super);
return get_tree_single(fsc, fuse_ctl_fill_super);
}
static const struct fs_context_operations fuse_ctl_context_ops = {
.get_tree = fuse_ctl_get_tree,
};
static int fuse_ctl_init_fs_context(struct fs_context *fc)
static int fuse_ctl_init_fs_context(struct fs_context *fsc)
{
fc->ops = &fuse_ctl_context_ops;
fsc->ops = &fuse_ctl_context_ops;
return 0;
}
......
......@@ -288,10 +288,10 @@ void fuse_request_end(struct fuse_req *req)
/*
* test_and_set_bit() implies smp_mb() between bit
* changing and below intr_entry check. Pairs with
* changing and below FR_INTERRUPTED check. Pairs with
* smp_mb() from queue_interrupt().
*/
if (!list_empty(&req->intr_entry)) {
if (test_bit(FR_INTERRUPTED, &req->flags)) {
spin_lock(&fiq->lock);
list_del_init(&req->intr_entry);
spin_unlock(&fiq->lock);
......
......@@ -198,12 +198,11 @@ void fuse_finish_open(struct inode *inode, struct file *file)
struct fuse_file *ff = file->private_data;
struct fuse_conn *fc = get_fuse_conn(inode);
if (!(ff->open_flags & FOPEN_KEEP_CACHE))
invalidate_inode_pages2(inode->i_mapping);
if (ff->open_flags & FOPEN_STREAM)
stream_open(inode, file);
else if (ff->open_flags & FOPEN_NONSEEKABLE)
nonseekable_open(inode, file);
if (fc->atomic_o_trunc && (file->f_flags & O_TRUNC)) {
struct fuse_inode *fi = get_fuse_inode(inode);
......@@ -211,10 +210,14 @@ void fuse_finish_open(struct inode *inode, struct file *file)
fi->attr_version = atomic64_inc_return(&fc->attr_version);
i_size_write(inode, 0);
spin_unlock(&fi->lock);
truncate_pagecache(inode, 0);
fuse_invalidate_attr(inode);
if (fc->writeback_cache)
file_update_time(file);
} else if (!(ff->open_flags & FOPEN_KEEP_CACHE)) {
invalidate_inode_pages2(inode->i_mapping);
}
if ((file->f_mode & FMODE_WRITE) && fc->writeback_cache)
fuse_link_write_file(file);
}
......@@ -389,6 +392,7 @@ struct fuse_writepage_args {
struct list_head queue_entry;
struct fuse_writepage_args *next;
struct inode *inode;
struct fuse_sync_bucket *bucket;
};
static struct fuse_writepage_args *fuse_find_writeback(struct fuse_inode *fi,
......@@ -1608,6 +1612,9 @@ static void fuse_writepage_free(struct fuse_writepage_args *wpa)
struct fuse_args_pages *ap = &wpa->ia.ap;
int i;
if (wpa->bucket)
fuse_sync_bucket_dec(wpa->bucket);
for (i = 0; i < ap->num_pages; i++)
__free_page(ap->pages[i]);
......@@ -1813,8 +1820,7 @@ static void fuse_writepage_end(struct fuse_mount *fm, struct fuse_args *args,
fuse_writepage_free(wpa);
}
static struct fuse_file *__fuse_write_file_get(struct fuse_conn *fc,
struct fuse_inode *fi)
static struct fuse_file *__fuse_write_file_get(struct fuse_inode *fi)
{
struct fuse_file *ff = NULL;
......@@ -1829,22 +1835,20 @@ static struct fuse_file *__fuse_write_file_get(struct fuse_conn *fc,
return ff;
}
static struct fuse_file *fuse_write_file_get(struct fuse_conn *fc,
struct fuse_inode *fi)
static struct fuse_file *fuse_write_file_get(struct fuse_inode *fi)
{
struct fuse_file *ff = __fuse_write_file_get(fc, fi);
struct fuse_file *ff = __fuse_write_file_get(fi);
WARN_ON(!ff);
return ff;
}
int fuse_write_inode(struct inode *inode, struct writeback_control *wbc)
{
struct fuse_conn *fc = get_fuse_conn(inode);
struct fuse_inode *fi = get_fuse_inode(inode);
struct fuse_file *ff;
int err;
ff = __fuse_write_file_get(fc, fi);
ff = __fuse_write_file_get(fi);
err = fuse_flush_times(inode, ff);
if (ff)
fuse_file_put(ff, false, false);
......@@ -1871,6 +1875,20 @@ static struct fuse_writepage_args *fuse_writepage_args_alloc(void)
}
static void fuse_writepage_add_to_bucket(struct fuse_conn *fc,
struct fuse_writepage_args *wpa)
{
if (!fc->sync_fs)
return;
rcu_read_lock();
/* Prevent resurrection of dead bucket in unlikely race with syncfs */
do {
wpa->bucket = rcu_dereference(fc->curr_bucket);
} while (unlikely(!atomic_inc_not_zero(&wpa->bucket->count)));
rcu_read_unlock();
}
static int fuse_writepage_locked(struct page *page)
{
struct address_space *mapping = page->mapping;
......@@ -1894,10 +1912,11 @@ static int fuse_writepage_locked(struct page *page)
goto err_free;
error = -EIO;
wpa->ia.ff = fuse_write_file_get(fc, fi);
wpa->ia.ff = fuse_write_file_get(fi);
if (!wpa->ia.ff)
goto err_nofile;
fuse_writepage_add_to_bucket(fc, wpa);
fuse_write_args_fill(&wpa->ia, wpa->ia.ff, page_offset(page), 0);
copy_highpage(tmp_page, page);
......@@ -2113,7 +2132,7 @@ static int fuse_writepages_fill(struct page *page,
if (!data->ff) {
err = -EIO;
data->ff = fuse_write_file_get(fc, fi);
data->ff = fuse_write_file_get(fi);
if (!data->ff)
goto out_unlock;
}
......@@ -2148,6 +2167,8 @@ static int fuse_writepages_fill(struct page *page,
__free_page(tmp_page);
goto out_unlock;
}
fuse_writepage_add_to_bucket(fc, wpa);
data->max_pages = 1;
ap = &wpa->ia.ap;
......@@ -2881,7 +2902,7 @@ fuse_direct_IO(struct kiocb *iocb, struct iov_iter *iter)
static int fuse_writeback_range(struct inode *inode, loff_t start, loff_t end)
{
int err = filemap_write_and_wait_range(inode->i_mapping, start, end);
int err = filemap_write_and_wait_range(inode->i_mapping, start, -1);
if (!err)
fuse_sync_writes(inode);
......
......@@ -482,6 +482,7 @@ struct fuse_dev {
struct fuse_fs_context {
int fd;
struct file *file;
unsigned int rootmode;
kuid_t user_id;
kgid_t group_id;
......@@ -508,6 +509,13 @@ struct fuse_fs_context {
void **fudptr;
};
struct fuse_sync_bucket {
/* count is a possible scalability bottleneck */
atomic_t count;
wait_queue_head_t waitq;
struct rcu_head rcu;
};
/**
* A Fuse connection.
*
......@@ -800,6 +808,9 @@ struct fuse_conn {
/** List of filesystems using this connection */
struct list_head mounts;
/* New writepages go into this bucket */
struct fuse_sync_bucket __rcu *curr_bucket;
};
/*
......@@ -903,6 +914,15 @@ static inline void fuse_page_descs_length_init(struct fuse_page_desc *descs,
descs[i].length = PAGE_SIZE - descs[i].offset;
}
static inline void fuse_sync_bucket_dec(struct fuse_sync_bucket *bucket)
{
/* Need RCU protection to prevent use after free after the decrement */
rcu_read_lock();
if (atomic_dec_and_test(&bucket->count))
wake_up(&bucket->waitq);
rcu_read_unlock();
}
/** Device operations */
extern const struct file_operations fuse_dev_operations;
......
......@@ -137,12 +137,12 @@ static void fuse_evict_inode(struct inode *inode)
}
}
static int fuse_reconfigure(struct fs_context *fc)
static int fuse_reconfigure(struct fs_context *fsc)
{
struct super_block *sb = fc->root->d_sb;
struct super_block *sb = fsc->root->d_sb;
sync_filesystem(sb);
if (fc->sb_flags & SB_MANDLOCK)
if (fsc->sb_flags & SB_MANDLOCK)
return -EINVAL;
return 0;
......@@ -505,6 +505,57 @@ static int fuse_statfs(struct dentry *dentry, struct kstatfs *buf)
return err;
}
static struct fuse_sync_bucket *fuse_sync_bucket_alloc(void)
{
struct fuse_sync_bucket *bucket;
bucket = kzalloc(sizeof(*bucket), GFP_KERNEL | __GFP_NOFAIL);
if (bucket) {
init_waitqueue_head(&bucket->waitq);
/* Initial active count */
atomic_set(&bucket->count, 1);
}
return bucket;
}
static void fuse_sync_fs_writes(struct fuse_conn *fc)
{
struct fuse_sync_bucket *bucket, *new_bucket;
int count;
new_bucket = fuse_sync_bucket_alloc();
spin_lock(&fc->lock);
bucket = rcu_dereference_protected(fc->curr_bucket, 1);
count = atomic_read(&bucket->count);
WARN_ON(count < 1);
/* No outstanding writes? */
if (count == 1) {
spin_unlock(&fc->lock);
kfree(new_bucket);
return;
}
/*
* Completion of new bucket depends on completion of this bucket, so add
* one more count.
*/
atomic_inc(&new_bucket->count);
rcu_assign_pointer(fc->curr_bucket, new_bucket);
spin_unlock(&fc->lock);
/*
* Drop initial active count. At this point if all writes in this and
* ancestor buckets complete, the count will go to zero and this task
* will be woken up.
*/
atomic_dec(&bucket->count);
wait_event(bucket->waitq, atomic_read(&bucket->count) == 0);
/* Drop temp count on descendant bucket */
fuse_sync_bucket_dec(new_bucket);
kfree_rcu(bucket, rcu);
}
static int fuse_sync_fs(struct super_block *sb, int wait)
{
struct fuse_mount *fm = get_fuse_mount_super(sb);
......@@ -527,6 +578,8 @@ static int fuse_sync_fs(struct super_block *sb, int wait)
if (!fc->sync_fs)
return 0;
fuse_sync_fs_writes(fc);
memset(&inarg, 0, sizeof(inarg));
args.in_numargs = 1;
args.in_args[0].size = sizeof(inarg);
......@@ -572,38 +625,38 @@ static const struct fs_parameter_spec fuse_fs_parameters[] = {
{}
};
static int fuse_parse_param(struct fs_context *fc, struct fs_parameter *param)
static int fuse_parse_param(struct fs_context *fsc, struct fs_parameter *param)
{
struct fs_parse_result result;
struct fuse_fs_context *ctx = fc->fs_private;
struct fuse_fs_context *ctx = fsc->fs_private;
int opt;
if (fc->purpose == FS_CONTEXT_FOR_RECONFIGURE) {
if (fsc->purpose == FS_CONTEXT_FOR_RECONFIGURE) {
/*
* Ignore options coming from mount(MS_REMOUNT) for backward
* compatibility.
*/
if (fc->oldapi)
if (fsc->oldapi)
return 0;
return invalfc(fc, "No changes allowed in reconfigure");
return invalfc(fsc, "No changes allowed in reconfigure");
}
opt = fs_parse(fc, fuse_fs_parameters, param, &result);
opt = fs_parse(fsc, fuse_fs_parameters, param, &result);
if (opt < 0)
return opt;
switch (opt) {
case OPT_SOURCE:
if (fc->source)
return invalfc(fc, "Multiple sources specified");
fc->source = param->string;
if (fsc->source)
return invalfc(fsc, "Multiple sources specified");
fsc->source = param->string;
param->string = NULL;
break;
case OPT_SUBTYPE:
if (ctx->subtype)
return invalfc(fc, "Multiple subtypes specified");
return invalfc(fsc, "Multiple subtypes specified");
ctx->subtype = param->string;
param->string = NULL;
return 0;
......@@ -615,22 +668,22 @@ static int fuse_parse_param(struct fs_context *fc, struct fs_parameter *param)
case OPT_ROOTMODE:
if (!fuse_valid_type(result.uint_32))
return invalfc(fc, "Invalid rootmode");
return invalfc(fsc, "Invalid rootmode");
ctx->rootmode = result.uint_32;
ctx->rootmode_present = true;
break;
case OPT_USER_ID:
ctx->user_id = make_kuid(fc->user_ns, result.uint_32);
ctx->user_id = make_kuid(fsc->user_ns, result.uint_32);
if (!uid_valid(ctx->user_id))
return invalfc(fc, "Invalid user_id");
return invalfc(fsc, "Invalid user_id");
ctx->user_id_present = true;
break;
case OPT_GROUP_ID:
ctx->group_id = make_kgid(fc->user_ns, result.uint_32);
ctx->group_id = make_kgid(fsc->user_ns, result.uint_32);
if (!gid_valid(ctx->group_id))
return invalfc(fc, "Invalid group_id");
return invalfc(fsc, "Invalid group_id");
ctx->group_id_present = true;
break;
......@@ -648,7 +701,7 @@ static int fuse_parse_param(struct fs_context *fc, struct fs_parameter *param)
case OPT_BLKSIZE:
if (!ctx->is_bdev)
return invalfc(fc, "blksize only supported for fuseblk");
return invalfc(fsc, "blksize only supported for fuseblk");
ctx->blksize = result.uint_32;
break;
......@@ -659,9 +712,9 @@ static int fuse_parse_param(struct fs_context *fc, struct fs_parameter *param)
return 0;
}
static void fuse_free_fc(struct fs_context *fc)
static void fuse_free_fsc(struct fs_context *fsc)
{
struct fuse_fs_context *ctx = fc->fs_private;
struct fuse_fs_context *ctx = fsc->fs_private;
if (ctx) {
kfree(ctx->subtype);
......@@ -762,6 +815,7 @@ void fuse_conn_put(struct fuse_conn *fc)
{
if (refcount_dec_and_test(&fc->count)) {
struct fuse_iqueue *fiq = &fc->iq;
struct fuse_sync_bucket *bucket;
if (IS_ENABLED(CONFIG_FUSE_DAX))
fuse_dax_conn_free(fc);
......@@ -769,6 +823,11 @@ void fuse_conn_put(struct fuse_conn *fc)
fiq->ops->release(fiq);
put_pid_ns(fc->pid_ns);
put_user_ns(fc->user_ns);
bucket = rcu_dereference_protected(fc->curr_bucket, 1);
if (bucket) {
WARN_ON(atomic_read(&bucket->count) != 1);
kfree(bucket);
}
fc->release(fc);
}
}
......@@ -1417,6 +1476,7 @@ int fuse_fill_super_common(struct super_block *sb, struct fuse_fs_context *ctx)
if (sb->s_flags & SB_MANDLOCK)
goto err;
rcu_assign_pointer(fc->curr_bucket, fuse_sync_bucket_alloc());
fuse_sb_defaults(sb);
if (ctx->is_bdev) {
......@@ -1508,34 +1568,33 @@ EXPORT_SYMBOL_GPL(fuse_fill_super_common);
static int fuse_fill_super(struct super_block *sb, struct fs_context *fsc)
{
struct fuse_fs_context *ctx = fsc->fs_private;
struct file *file;
int err;
struct fuse_conn *fc;
struct fuse_mount *fm;
err = -EINVAL;
file = fget(ctx->fd);
if (!file)
goto err;
if (!ctx->file || !ctx->rootmode_present ||
!ctx->user_id_present || !ctx->group_id_present)
return -EINVAL;
/*
* Require mount to happen from the same user namespace which
* opened /dev/fuse to prevent potential attacks.
*/
if ((file->f_op != &fuse_dev_operations) ||
(file->f_cred->user_ns != sb->s_user_ns))
goto err_fput;
ctx->fudptr = &file->private_data;
err = -EINVAL;
if ((ctx->file->f_op != &fuse_dev_operations) ||
(ctx->file->f_cred->user_ns != sb->s_user_ns))
goto err;
ctx->fudptr = &ctx->file->private_data;
fc = kmalloc(sizeof(*fc), GFP_KERNEL);
err = -ENOMEM;
if (!fc)
goto err_fput;
goto err;
fm = kzalloc(sizeof(*fm), GFP_KERNEL);
if (!fm) {
kfree(fc);
goto err_fput;
goto err;
}
fuse_conn_init(fc, fm, sb->s_user_ns, &fuse_dev_fiq_ops, NULL);
......@@ -1546,12 +1605,8 @@ static int fuse_fill_super(struct super_block *sb, struct fs_context *fsc)
err = fuse_fill_super_common(sb, ctx);
if (err)
goto err_put_conn;
/*
* atomic_dec_and_test() in fput() provides the necessary
* memory barrier for file->private_data to be visible on all
* CPUs after this
*/
fput(file);
/* file->private_data shall be visible on all CPUs after this */
smp_mb();
fuse_send_init(get_fuse_mount_super(sb));
return 0;
......@@ -1559,30 +1614,68 @@ static int fuse_fill_super(struct super_block *sb, struct fs_context *fsc)
fuse_conn_put(fc);
kfree(fm);
sb->s_fs_info = NULL;
err_fput:
fput(file);
err:
return err;
}
static int fuse_get_tree(struct fs_context *fc)
/*
* This is the path where user supplied an already initialized fuse dev. In
* this case never create a new super if the old one is gone.
*/
static int fuse_set_no_super(struct super_block *sb, struct fs_context *fsc)
{
struct fuse_fs_context *ctx = fc->fs_private;
return -ENOTCONN;
}
if (!ctx->fd_present || !ctx->rootmode_present ||
!ctx->user_id_present || !ctx->group_id_present)
return -EINVAL;
static int fuse_test_super(struct super_block *sb, struct fs_context *fsc)
{
#ifdef CONFIG_BLOCK
if (ctx->is_bdev)
return get_tree_bdev(fc, fuse_fill_super);
#endif
return fsc->sget_key == get_fuse_conn_super(sb);
}
static int fuse_get_tree(struct fs_context *fsc)
{
struct fuse_fs_context *ctx = fsc->fs_private;
struct fuse_dev *fud;
struct super_block *sb;
int err;
return get_tree_nodev(fc, fuse_fill_super);
if (ctx->fd_present)
ctx->file = fget(ctx->fd);
if (IS_ENABLED(CONFIG_BLOCK) && ctx->is_bdev) {
err = get_tree_bdev(fsc, fuse_fill_super);
goto out_fput;
}
/*
* While block dev mount can be initialized with a dummy device fd
* (found by device name), normal fuse mounts can't
*/
if (!ctx->file)
return -EINVAL;
/*
* Allow creating a fuse mount with an already initialized fuse
* connection
*/
fud = READ_ONCE(ctx->file->private_data);
if (ctx->file->f_op == &fuse_dev_operations && fud) {
fsc->sget_key = fud->fc;
sb = sget_fc(fsc, fuse_test_super, fuse_set_no_super);
err = PTR_ERR_OR_ZERO(sb);
if (!IS_ERR(sb))
fsc->root = dget(sb->s_root);
} else {
err = get_tree_nodev(fsc, fuse_fill_super);
}
out_fput:
if (ctx->file)
fput(ctx->file);
return err;
}
static const struct fs_context_operations fuse_context_ops = {
.free = fuse_free_fc,
.free = fuse_free_fsc,
.parse_param = fuse_parse_param,
.reconfigure = fuse_reconfigure,
.get_tree = fuse_get_tree,
......@@ -1591,7 +1684,7 @@ static const struct fs_context_operations fuse_context_ops = {
/*
* Set up the filesystem mount context.
*/
static int fuse_init_fs_context(struct fs_context *fc)
static int fuse_init_fs_context(struct fs_context *fsc)
{
struct fuse_fs_context *ctx;
......@@ -1604,14 +1697,14 @@ static int fuse_init_fs_context(struct fs_context *fc)
ctx->legacy_opts_show = true;
#ifdef CONFIG_BLOCK
if (fc->fs_type == &fuseblk_fs_type) {
if (fsc->fs_type == &fuseblk_fs_type) {
ctx->is_bdev = true;
ctx->destroy = true;
}
#endif
fc->fs_private = ctx;
fc->ops = &fuse_context_ops;
fsc->fs_private = ctx;
fsc->ops = &fuse_context_ops;
return 0;
}
......
......@@ -97,14 +97,14 @@ static const struct fs_parameter_spec virtio_fs_parameters[] = {
{}
};
static int virtio_fs_parse_param(struct fs_context *fc,
static int virtio_fs_parse_param(struct fs_context *fsc,
struct fs_parameter *param)
{
struct fs_parse_result result;
struct fuse_fs_context *ctx = fc->fs_private;
struct fuse_fs_context *ctx = fsc->fs_private;
int opt;
opt = fs_parse(fc, virtio_fs_parameters, param, &result);
opt = fs_parse(fsc, virtio_fs_parameters, param, &result);
if (opt < 0)
return opt;
......@@ -119,9 +119,9 @@ static int virtio_fs_parse_param(struct fs_context *fc,
return 0;
}
static void virtio_fs_free_fc(struct fs_context *fc)
static void virtio_fs_free_fsc(struct fs_context *fsc)
{
struct fuse_fs_context *ctx = fc->fs_private;
struct fuse_fs_context *ctx = fsc->fs_private;
kfree(ctx);
}
......@@ -1488,7 +1488,7 @@ static int virtio_fs_get_tree(struct fs_context *fsc)
}
static const struct fs_context_operations virtio_fs_context_ops = {
.free = virtio_fs_free_fc,
.free = virtio_fs_free_fsc,
.parse_param = virtio_fs_parse_param,
.get_tree = virtio_fs_get_tree,
};
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册