inode.c 45.3 KB
Newer Older
M
Miklos Szeredi 已提交
1 2
/*
  FUSE: Filesystem in Userspace
M
Miklos Szeredi 已提交
3
  Copyright (C) 2001-2008  Miklos Szeredi <miklos@szeredi.hu>
M
Miklos Szeredi 已提交
4 5 6 7 8 9 10 11 12 13 14 15 16

  This program can be distributed under the terms of the GNU GPL.
  See the file COPYING.
*/

#include "fuse_i.h"

#include <linux/pagemap.h>
#include <linux/slab.h>
#include <linux/file.h>
#include <linux/seq_file.h>
#include <linux/init.h>
#include <linux/module.h>
17
#include <linux/moduleparam.h>
18 19
#include <linux/fs_context.h>
#include <linux/fs_parser.h>
M
Miklos Szeredi 已提交
20
#include <linux/statfs.h>
21
#include <linux/random.h>
A
Alexey Dobriyan 已提交
22
#include <linux/sched.h>
M
Miklos Szeredi 已提交
23
#include <linux/exportfs.h>
S
Seth Forshee 已提交
24
#include <linux/posix_acl.h>
25
#include <linux/pid_namespace.h>
M
Miklos Szeredi 已提交
26 27 28 29 30

MODULE_AUTHOR("Miklos Szeredi <miklos@szeredi.hu>");
MODULE_DESCRIPTION("Filesystem in Userspace");
MODULE_LICENSE("GPL");

31
static struct kmem_cache *fuse_inode_cachep;
32 33
struct list_head fuse_conn_list;
DEFINE_MUTEX(fuse_mutex);
M
Miklos Szeredi 已提交
34

35
static int set_global_limit(const char *val, const struct kernel_param *kp);
36

37
unsigned max_user_bgreq;
38 39 40 41 42 43 44
module_param_call(max_user_bgreq, set_global_limit, param_get_uint,
		  &max_user_bgreq, 0644);
__MODULE_PARM_TYPE(max_user_bgreq, "uint");
MODULE_PARM_DESC(max_user_bgreq,
 "Global limit for the maximum number of backgrounded requests an "
 "unprivileged user can set");

45
unsigned max_user_congthresh;
46 47 48 49 50 51 52
module_param_call(max_user_congthresh, set_global_limit, param_get_uint,
		  &max_user_congthresh, 0644);
__MODULE_PARM_TYPE(max_user_congthresh, "uint");
MODULE_PARM_DESC(max_user_congthresh,
 "Global limit for the maximum congestion threshold an "
 "unprivileged user can set");

M
Miklos Szeredi 已提交
53 54
#define FUSE_SUPER_MAGIC 0x65735546

M
Miklos Szeredi 已提交
55 56
#define FUSE_DEFAULT_BLKSIZE 512

57 58 59 60 61 62
/** Maximum number of outstanding background requests */
#define FUSE_DEFAULT_MAX_BACKGROUND 12

/** Congestion starts at 75% of maximum */
#define FUSE_DEFAULT_CONGESTION_THRESHOLD (FUSE_DEFAULT_MAX_BACKGROUND * 3 / 4)

63 64 65 66
#ifdef CONFIG_BLOCK
static struct file_system_type fuseblk_fs_type;
#endif

67
struct fuse_forget_link *fuse_alloc_forget(void)
68
{
69
	return kzalloc(sizeof(struct fuse_forget_link), GFP_KERNEL_ACCOUNT);
70 71
}

M
Miklos Szeredi 已提交
72 73 74 75
static struct inode *fuse_alloc_inode(struct super_block *sb)
{
	struct fuse_inode *fi;

Z
zhangliguang 已提交
76 77
	fi = kmem_cache_alloc(fuse_inode_cachep, GFP_KERNEL);
	if (!fi)
M
Miklos Szeredi 已提交
78 79
		return NULL;

M
Miklos Szeredi 已提交
80
	fi->i_time = 0;
81
	fi->inval_mask = 0;
M
Miklos Szeredi 已提交
82
	fi->nodeid = 0;
83
	fi->nlookup = 0;
84
	fi->attr_version = 0;
85
	fi->orig_ino = 0;
86
	fi->state = 0;
87
	mutex_init(&fi->mutex);
88
	spin_lock_init(&fi->lock);
89
	fi->forget = fuse_alloc_forget();
90 91 92 93 94
	if (!fi->forget)
		goto out_free;

	if (IS_ENABLED(CONFIG_FUSE_DAX) && !fuse_dax_inode_alloc(sb, fi))
		goto out_free_forget;
M
Miklos Szeredi 已提交
95

Z
zhangliguang 已提交
96
	return &fi->inode;
97 98 99 100 101 102

out_free_forget:
	kfree(fi->forget);
out_free:
	kmem_cache_free(fuse_inode_cachep, fi);
	return NULL;
M
Miklos Szeredi 已提交
103 104
}

A
Al Viro 已提交
105
static void fuse_free_inode(struct inode *inode)
M
Miklos Szeredi 已提交
106
{
107
	struct fuse_inode *fi = get_fuse_inode(inode);
A
Al Viro 已提交
108

109
	mutex_destroy(&fi->mutex);
110
	kfree(fi->forget);
111 112 113
#ifdef CONFIG_FUSE_DAX
	kfree(fi->dax);
#endif
A
Al Viro 已提交
114
	kmem_cache_free(fuse_inode_cachep, fi);
M
Miklos Szeredi 已提交
115 116
}

117
static void fuse_evict_inode(struct inode *inode)
M
Miklos Szeredi 已提交
118
{
A
Al Viro 已提交
119 120
	struct fuse_inode *fi = get_fuse_inode(inode);

121
	truncate_inode_pages_final(&inode->i_data);
122
	clear_inode(inode);
123
	if (inode->i_sb->s_flags & SB_ACTIVE) {
M
Miklos Szeredi 已提交
124
		struct fuse_conn *fc = get_fuse_conn(inode);
125 126 127

		if (FUSE_IS_DAX(inode))
			fuse_dax_inode_cleanup(inode);
128 129 130 131 132
		if (fi->nlookup) {
			fuse_queue_forget(fc, fi->forget, fi->nodeid,
					  fi->nlookup);
			fi->forget = NULL;
		}
133
	}
M
Miklos Szeredi 已提交
134
	if (S_ISREG(inode->i_mode) && !fuse_is_bad(inode)) {
A
Al Viro 已提交
135 136 137
		WARN_ON(!list_empty(&fi->write_files));
		WARN_ON(!list_empty(&fi->queued_writes));
	}
M
Miklos Szeredi 已提交
138 139
}

140
static int fuse_reconfigure(struct fs_context *fsc)
141
{
142
	struct super_block *sb = fsc->root->d_sb;
143

144
	sync_filesystem(sb);
145
	if (fsc->sb_flags & SB_MANDLOCK)
146 147 148 149 150
		return -EINVAL;

	return 0;
}

151 152 153 154 155 156 157 158 159 160 161 162
/*
 * ino_t is 32-bits on 32-bit arch. We have to squash the 64-bit value down
 * so that it will fit.
 */
static ino_t fuse_squash_ino(u64 ino64)
{
	ino_t ino = (ino_t) ino64;
	if (sizeof(ino_t) < sizeof(u64))
		ino ^= ino64 >> (sizeof(u64) - sizeof(ino_t)) * 8;
	return ino;
}

M
Miklos Szeredi 已提交
163 164
void fuse_change_attributes_common(struct inode *inode, struct fuse_attr *attr,
				   u64 attr_valid)
M
Miklos Szeredi 已提交
165
{
M
Miklos Szeredi 已提交
166
	struct fuse_conn *fc = get_fuse_conn(inode);
167
	struct fuse_inode *fi = get_fuse_inode(inode);
M
Miklos Szeredi 已提交
168

169 170
	lockdep_assert_held(&fi->lock);

171
	fi->attr_version = atomic64_inc_return(&fc->attr_version);
172
	fi->i_time = attr_valid;
173
	WRITE_ONCE(fi->inval_mask, 0);
174

175
	inode->i_ino     = fuse_squash_ino(attr->ino);
176
	inode->i_mode    = (inode->i_mode & S_IFMT) | (attr->mode & 07777);
M
Miklos Szeredi 已提交
177
	set_nlink(inode, attr->nlink);
178 179
	inode->i_uid     = make_kuid(fc->user_ns, attr->uid);
	inode->i_gid     = make_kgid(fc->user_ns, attr->gid);
M
Miklos Szeredi 已提交
180 181 182
	inode->i_blocks  = attr->blocks;
	inode->i_atime.tv_sec   = attr->atime;
	inode->i_atime.tv_nsec  = attr->atimensec;
M
Maxim Patlasov 已提交
183 184 185 186
	/* mtime from server may be stale due to local buffered write */
	if (!fc->writeback_cache || !S_ISREG(inode->i_mode)) {
		inode->i_mtime.tv_sec   = attr->mtime;
		inode->i_mtime.tv_nsec  = attr->mtimensec;
M
Maxim Patlasov 已提交
187 188
		inode->i_ctime.tv_sec   = attr->ctime;
		inode->i_ctime.tv_nsec  = attr->ctimensec;
M
Maxim Patlasov 已提交
189
	}
190

191 192 193 194 195
	if (attr->blksize != 0)
		inode->i_blkbits = ilog2(attr->blksize);
	else
		inode->i_blkbits = inode->i_sb->s_blocksize_bits;

196 197 198 199 200 201
	/*
	 * Don't set the sticky bit in i_mode, unless we want the VFS
	 * to check permissions.  This prevents failures due to the
	 * check in may_delete().
	 */
	fi->orig_i_mode = inode->i_mode;
M
Miklos Szeredi 已提交
202
	if (!fc->default_permissions)
203
		inode->i_mode &= ~S_ISVTX;
204 205

	fi->orig_ino = attr->ino;
206 207 208 209 210 211 212 213 214 215

	/*
	 * We are refreshing inode data and it is possible that another
	 * client set suid/sgid or security.capability xattr. So clear
	 * S_NOSEC. Ideally, we could have cleared it only if suid/sgid
	 * was set or if security.capability xattr was set. But we don't
	 * know if security.capability has been set or not. So clear it
	 * anyway. Its less efficient but should be safe.
	 */
	inode->i_flags &= ~S_NOSEC;
M
Miklos Szeredi 已提交
216 217 218 219 220 221 222
}

void fuse_change_attributes(struct inode *inode, struct fuse_attr *attr,
			    u64 attr_valid, u64 attr_version)
{
	struct fuse_conn *fc = get_fuse_conn(inode);
	struct fuse_inode *fi = get_fuse_inode(inode);
P
Pavel Emelyanov 已提交
223
	bool is_wb = fc->writeback_cache;
M
Miklos Szeredi 已提交
224
	loff_t oldsize;
225
	struct timespec64 old_mtime;
M
Miklos Szeredi 已提交
226

227
	spin_lock(&fi->lock);
228 229
	if ((attr_version != 0 && fi->attr_version > attr_version) ||
	    test_bit(FUSE_I_SIZE_UNSTABLE, &fi->state)) {
230
		spin_unlock(&fi->lock);
M
Miklos Szeredi 已提交
231 232 233
		return;
	}

234
	old_mtime = inode->i_mtime;
M
Miklos Szeredi 已提交
235
	fuse_change_attributes_common(inode, attr, attr_valid);
236

237
	oldsize = inode->i_size;
P
Pavel Emelyanov 已提交
238 239 240 241 242 243 244
	/*
	 * In case of writeback_cache enabled, the cached writes beyond EOF
	 * extend local i_size without keeping userspace server in sync. So,
	 * attr->size coming from server can be stale. We cannot trust it.
	 */
	if (!is_wb || !S_ISREG(inode->i_mode))
		i_size_write(inode, attr->size);
245
	spin_unlock(&fi->lock);
246

P
Pavel Emelyanov 已提交
247
	if (!is_wb && S_ISREG(inode->i_mode)) {
248 249 250
		bool inval = false;

		if (oldsize != attr->size) {
251
			truncate_pagecache(inode, attr->size);
252 253
			if (!fc->explicit_inval_data)
				inval = true;
254
		} else if (fc->auto_inval_data) {
255
			struct timespec64 new_mtime = {
256 257 258 259 260 261 262 263
				.tv_sec = attr->mtime,
				.tv_nsec = attr->mtimensec,
			};

			/*
			 * Auto inval mode also checks and invalidates if mtime
			 * has changed.
			 */
264
			if (!timespec64_equal(&old_mtime, &new_mtime))
265 266 267 268 269
				inval = true;
		}

		if (inval)
			invalidate_inode_pages2(inode->i_mapping);
270
	}
M
Miklos Szeredi 已提交
271 272 273 274 275
}

static void fuse_init_inode(struct inode *inode, struct fuse_attr *attr)
{
	inode->i_mode = attr->mode & S_IFMT;
M
Miklos Szeredi 已提交
276
	inode->i_size = attr->size;
M
Maxim Patlasov 已提交
277 278
	inode->i_mtime.tv_sec  = attr->mtime;
	inode->i_mtime.tv_nsec = attr->mtimensec;
M
Maxim Patlasov 已提交
279 280
	inode->i_ctime.tv_sec  = attr->ctime;
	inode->i_ctime.tv_nsec = attr->ctimensec;
281 282
	if (S_ISREG(inode->i_mode)) {
		fuse_init_common(inode);
M
Miklos Szeredi 已提交
283
		fuse_init_file_inode(inode);
284 285 286 287 288 289 290 291 292
	} else if (S_ISDIR(inode->i_mode))
		fuse_init_dir(inode);
	else if (S_ISLNK(inode->i_mode))
		fuse_init_symlink(inode);
	else if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode) ||
		 S_ISFIFO(inode->i_mode) || S_ISSOCK(inode->i_mode)) {
		fuse_init_common(inode);
		init_special_inode(inode, inode->i_mode,
				   new_decode_dev(attr->rdev));
293 294
	} else
		BUG();
M
Miklos Szeredi 已提交
295 296
}

297
static int fuse_inode_eq(struct inode *inode, void *_nodeidp)
M
Miklos Szeredi 已提交
298
{
M
Miklos Szeredi 已提交
299
	u64 nodeid = *(u64 *) _nodeidp;
M
Miklos Szeredi 已提交
300 301 302 303 304 305 306 307
	if (get_node_id(inode) == nodeid)
		return 1;
	else
		return 0;
}

static int fuse_inode_set(struct inode *inode, void *_nodeidp)
{
M
Miklos Szeredi 已提交
308
	u64 nodeid = *(u64 *) _nodeidp;
M
Miklos Szeredi 已提交
309 310 311 312
	get_fuse_inode(inode)->nodeid = nodeid;
	return 0;
}

M
Miklos Szeredi 已提交
313
struct inode *fuse_iget(struct super_block *sb, u64 nodeid,
314 315
			int generation, struct fuse_attr *attr,
			u64 attr_valid, u64 attr_version)
M
Miklos Szeredi 已提交
316 317
{
	struct inode *inode;
318
	struct fuse_inode *fi;
M
Miklos Szeredi 已提交
319 320
	struct fuse_conn *fc = get_fuse_conn_super(sb);

M
Max Reitz 已提交
321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340
	/*
	 * Auto mount points get their node id from the submount root, which is
	 * not a unique identifier within this filesystem.
	 *
	 * To avoid conflicts, do not place submount points into the inode hash
	 * table.
	 */
	if (fc->auto_submounts && (attr->flags & FUSE_ATTR_SUBMOUNT) &&
	    S_ISDIR(attr->mode)) {
		inode = new_inode(sb);
		if (!inode)
			return NULL;

		fuse_init_inode(inode, attr);
		get_fuse_inode(inode)->nodeid = nodeid;
		inode->i_flags |= S_AUTOMOUNT;
		goto done;
	}

retry:
M
Miklos Szeredi 已提交
341 342 343 344 345
	inode = iget5_locked(sb, nodeid, fuse_inode_eq, fuse_inode_set, &nodeid);
	if (!inode)
		return NULL;

	if ((inode->i_state & I_NEW)) {
M
Maxim Patlasov 已提交
346
		inode->i_flags |= S_NOATIME;
347
		if (!fc->writeback_cache || !S_ISREG(attr->mode))
M
Maxim Patlasov 已提交
348
			inode->i_flags |= S_NOCMTIME;
M
Miklos Szeredi 已提交
349 350 351
		inode->i_generation = generation;
		fuse_init_inode(inode, attr);
		unlock_new_inode(inode);
352 353
	} else if (fuse_stale_inode(inode, generation, attr)) {
		/* nodeid was reused, any I/O on the old inode should fail */
M
Miklos Szeredi 已提交
354
		fuse_make_bad(inode);
M
Miklos Szeredi 已提交
355 356 357
		iput(inode);
		goto retry;
	}
M
Max Reitz 已提交
358
done:
359
	fi = get_fuse_inode(inode);
360
	spin_lock(&fi->lock);
M
Miklos Szeredi 已提交
361
	fi->nlookup++;
362
	spin_unlock(&fi->lock);
363 364
	fuse_change_attributes(inode, attr, attr_valid, attr_version);

M
Miklos Szeredi 已提交
365 366 367
	return inode;
}

368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390
struct inode *fuse_ilookup(struct fuse_conn *fc, u64 nodeid,
			   struct fuse_mount **fm)
{
	struct fuse_mount *fm_iter;
	struct inode *inode;

	WARN_ON(!rwsem_is_locked(&fc->killsb));
	list_for_each_entry(fm_iter, &fc->mounts, fc_entry) {
		if (!fm_iter->sb)
			continue;

		inode = ilookup5(fm_iter->sb, nodeid, fuse_inode_eq, &nodeid);
		if (inode) {
			if (fm)
				*fm = fm_iter;
			return inode;
		}
	}

	return NULL;
}

int fuse_reverse_inval_inode(struct fuse_conn *fc, u64 nodeid,
J
John Muir 已提交
391 392
			     loff_t offset, loff_t len)
{
393
	struct fuse_inode *fi;
J
John Muir 已提交
394 395 396 397
	struct inode *inode;
	pgoff_t pg_start;
	pgoff_t pg_end;

398
	inode = fuse_ilookup(fc, nodeid, NULL);
J
John Muir 已提交
399 400 401
	if (!inode)
		return -ENOENT;

402 403 404 405 406
	fi = get_fuse_inode(inode);
	spin_lock(&fi->lock);
	fi->attr_version = atomic64_inc_return(&fc->attr_version);
	spin_unlock(&fi->lock);

J
John Muir 已提交
407
	fuse_invalidate_attr(inode);
S
Seth Forshee 已提交
408
	forget_all_cached_acls(inode);
J
John Muir 已提交
409
	if (offset >= 0) {
410
		pg_start = offset >> PAGE_SHIFT;
J
John Muir 已提交
411 412 413
		if (len <= 0)
			pg_end = -1;
		else
414
			pg_end = (offset + len - 1) >> PAGE_SHIFT;
J
John Muir 已提交
415 416 417 418 419 420 421
		invalidate_inode_pages2_range(inode->i_mapping,
					      pg_start, pg_end);
	}
	iput(inode);
	return 0;
}

422
bool fuse_lock_inode(struct inode *inode)
423
{
424 425 426
	bool locked = false;

	if (!get_fuse_conn(inode)->parallel_dirops) {
427
		mutex_lock(&get_fuse_inode(inode)->mutex);
428 429 430 431
		locked = true;
	}

	return locked;
432 433
}

434
void fuse_unlock_inode(struct inode *inode, bool locked)
435
{
436
	if (locked)
437 438 439
		mutex_unlock(&get_fuse_inode(inode)->mutex);
}

440
static void fuse_umount_begin(struct super_block *sb)
441
{
442 443 444 445
	struct fuse_conn *fc = get_fuse_conn_super(sb);

	if (!fc->no_force_umount)
		fuse_abort_conn(fc);
446 447
}

448
static void fuse_send_destroy(struct fuse_mount *fm)
449
{
450
	if (fm->fc->conn_init) {
451 452 453 454 455
		FUSE_ARGS(args);

		args.opcode = FUSE_DESTROY;
		args.force = true;
		args.nocreds = true;
456
		fuse_simple_request(fm, &args);
457 458 459
	}
}

460 461
static void fuse_put_super(struct super_block *sb)
{
462
	struct fuse_mount *fm = get_fuse_mount_super(sb);
463

464 465
	fuse_conn_put(fm->fc);
	kfree(fm);
M
Miklos Szeredi 已提交
466 467
}

468 469 470 471
static void convert_fuse_statfs(struct kstatfs *stbuf, struct fuse_kstatfs *attr)
{
	stbuf->f_type    = FUSE_SUPER_MAGIC;
	stbuf->f_bsize   = attr->bsize;
472
	stbuf->f_frsize  = attr->frsize;
473 474 475 476 477 478 479 480 481
	stbuf->f_blocks  = attr->blocks;
	stbuf->f_bfree   = attr->bfree;
	stbuf->f_bavail  = attr->bavail;
	stbuf->f_files   = attr->files;
	stbuf->f_ffree   = attr->ffree;
	stbuf->f_namelen = attr->namelen;
	/* fsid is left zero */
}

482
static int fuse_statfs(struct dentry *dentry, struct kstatfs *buf)
483
{
484
	struct super_block *sb = dentry->d_sb;
485
	struct fuse_mount *fm = get_fuse_mount_super(sb);
486
	FUSE_ARGS(args);
487 488 489
	struct fuse_statfs_out outarg;
	int err;

490
	if (!fuse_allow_current_process(fm->fc)) {
M
Miklos Szeredi 已提交
491 492 493 494
		buf->f_type = FUSE_SUPER_MAGIC;
		return 0;
	}

495
	memset(&outarg, 0, sizeof(outarg));
496 497 498 499 500 501
	args.in_numargs = 0;
	args.opcode = FUSE_STATFS;
	args.nodeid = get_node_id(d_inode(dentry));
	args.out_numargs = 1;
	args.out_args[0].size = sizeof(outarg);
	args.out_args[0].value = &outarg;
502
	err = fuse_simple_request(fm, &args);
503 504 505 506 507
	if (!err)
		convert_fuse_statfs(buf, &outarg.st);
	return err;
}

508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558
static struct fuse_sync_bucket *fuse_sync_bucket_alloc(void)
{
	struct fuse_sync_bucket *bucket;

	bucket = kzalloc(sizeof(*bucket), GFP_KERNEL | __GFP_NOFAIL);
	if (bucket) {
		init_waitqueue_head(&bucket->waitq);
		/* Initial active count */
		atomic_set(&bucket->count, 1);
	}
	return bucket;
}

static void fuse_sync_fs_writes(struct fuse_conn *fc)
{
	struct fuse_sync_bucket *bucket, *new_bucket;
	int count;

	new_bucket = fuse_sync_bucket_alloc();
	spin_lock(&fc->lock);
	bucket = rcu_dereference_protected(fc->curr_bucket, 1);
	count = atomic_read(&bucket->count);
	WARN_ON(count < 1);
	/* No outstanding writes? */
	if (count == 1) {
		spin_unlock(&fc->lock);
		kfree(new_bucket);
		return;
	}

	/*
	 * Completion of new bucket depends on completion of this bucket, so add
	 * one more count.
	 */
	atomic_inc(&new_bucket->count);
	rcu_assign_pointer(fc->curr_bucket, new_bucket);
	spin_unlock(&fc->lock);
	/*
	 * Drop initial active count.  At this point if all writes in this and
	 * ancestor buckets complete, the count will go to zero and this task
	 * will be woken up.
	 */
	atomic_dec(&bucket->count);

	wait_event(bucket->waitq, atomic_read(&bucket->count) == 0);

	/* Drop temp count on descendant bucket */
	fuse_sync_bucket_dec(new_bucket);
	kfree_rcu(bucket, rcu);
}

559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580
static int fuse_sync_fs(struct super_block *sb, int wait)
{
	struct fuse_mount *fm = get_fuse_mount_super(sb);
	struct fuse_conn *fc = fm->fc;
	struct fuse_syncfs_in inarg;
	FUSE_ARGS(args);
	int err;

	/*
	 * Userspace cannot handle the wait == 0 case.  Avoid a
	 * gratuitous roundtrip.
	 */
	if (!wait)
		return 0;

	/* The filesystem is being unmounted.  Nothing to do. */
	if (!sb->s_root)
		return 0;

	if (!fc->sync_fs)
		return 0;

581 582
	fuse_sync_fs_writes(fc);

583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599
	memset(&inarg, 0, sizeof(inarg));
	args.in_numargs = 1;
	args.in_args[0].size = sizeof(inarg);
	args.in_args[0].value = &inarg;
	args.opcode = FUSE_SYNCFS;
	args.nodeid = get_node_id(sb->s_root->d_inode);
	args.out_numargs = 0;

	err = fuse_simple_request(fm, &args);
	if (err == -ENOSYS) {
		fc->sync_fs = 0;
		err = 0;
	}

	return err;
}

M
Miklos Szeredi 已提交
600
enum {
601 602
	OPT_SOURCE,
	OPT_SUBTYPE,
M
Miklos Szeredi 已提交
603 604 605
	OPT_FD,
	OPT_ROOTMODE,
	OPT_USER_ID,
606
	OPT_GROUP_ID,
M
Miklos Szeredi 已提交
607 608
	OPT_DEFAULT_PERMISSIONS,
	OPT_ALLOW_OTHER,
609
	OPT_MAX_READ,
M
Miklos Szeredi 已提交
610
	OPT_BLKSIZE,
M
Miklos Szeredi 已提交
611 612 613
	OPT_ERR
};

614
static const struct fs_parameter_spec fuse_fs_parameters[] = {
615 616 617 618 619 620 621 622 623
	fsparam_string	("source",		OPT_SOURCE),
	fsparam_u32	("fd",			OPT_FD),
	fsparam_u32oct	("rootmode",		OPT_ROOTMODE),
	fsparam_u32	("user_id",		OPT_USER_ID),
	fsparam_u32	("group_id",		OPT_GROUP_ID),
	fsparam_flag	("default_permissions",	OPT_DEFAULT_PERMISSIONS),
	fsparam_flag	("allow_other",		OPT_ALLOW_OTHER),
	fsparam_u32	("max_read",		OPT_MAX_READ),
	fsparam_u32	("blksize",		OPT_BLKSIZE),
624
	fsparam_string	("subtype",		OPT_SUBTYPE),
625 626 627
	{}
};

628
static int fuse_parse_param(struct fs_context *fsc, struct fs_parameter *param)
629
{
630
	struct fs_parse_result result;
631
	struct fuse_fs_context *ctx = fsc->fs_private;
632 633
	int opt;

634
	if (fsc->purpose == FS_CONTEXT_FOR_RECONFIGURE) {
635 636 637 638
		/*
		 * Ignore options coming from mount(MS_REMOUNT) for backward
		 * compatibility.
		 */
639
		if (fsc->oldapi)
640 641
			return 0;

642
		return invalfc(fsc, "No changes allowed in reconfigure");
643
	}
644

645
	opt = fs_parse(fsc, fuse_fs_parameters, param, &result);
646 647 648 649 650
	if (opt < 0)
		return opt;

	switch (opt) {
	case OPT_SOURCE:
651 652 653
		if (fsc->source)
			return invalfc(fsc, "Multiple sources specified");
		fsc->source = param->string;
654 655 656 657 658
		param->string = NULL;
		break;

	case OPT_SUBTYPE:
		if (ctx->subtype)
659
			return invalfc(fsc, "Multiple subtypes specified");
660 661 662 663 664 665
		ctx->subtype = param->string;
		param->string = NULL;
		return 0;

	case OPT_FD:
		ctx->fd = result.uint_32;
666
		ctx->fd_present = true;
667 668 669 670
		break;

	case OPT_ROOTMODE:
		if (!fuse_valid_type(result.uint_32))
671
			return invalfc(fsc, "Invalid rootmode");
672
		ctx->rootmode = result.uint_32;
673
		ctx->rootmode_present = true;
674 675 676
		break;

	case OPT_USER_ID:
677
		ctx->user_id = make_kuid(fsc->user_ns, result.uint_32);
678
		if (!uid_valid(ctx->user_id))
679
			return invalfc(fsc, "Invalid user_id");
680
		ctx->user_id_present = true;
681 682 683
		break;

	case OPT_GROUP_ID:
684
		ctx->group_id = make_kgid(fsc->user_ns, result.uint_32);
685
		if (!gid_valid(ctx->group_id))
686
			return invalfc(fsc, "Invalid group_id");
687
		ctx->group_id_present = true;
688 689 690
		break;

	case OPT_DEFAULT_PERMISSIONS:
691
		ctx->default_permissions = true;
692 693 694
		break;

	case OPT_ALLOW_OTHER:
695
		ctx->allow_other = true;
696 697 698 699 700 701 702 703
		break;

	case OPT_MAX_READ:
		ctx->max_read = result.uint_32;
		break;

	case OPT_BLKSIZE:
		if (!ctx->is_bdev)
704
			return invalfc(fsc, "blksize only supported for fuseblk");
705 706 707 708 709
		ctx->blksize = result.uint_32;
		break;

	default:
		return -EINVAL;
710
	}
711 712

	return 0;
713 714
}

715
static void fuse_free_fsc(struct fs_context *fsc)
M
Miklos Szeredi 已提交
716
{
717
	struct fuse_fs_context *ctx = fsc->fs_private;
718

719 720 721 722
	if (ctx) {
		kfree(ctx->subtype);
		kfree(ctx);
	}
M
Miklos Szeredi 已提交
723 724
}

725
static int fuse_show_options(struct seq_file *m, struct dentry *root)
M
Miklos Szeredi 已提交
726
{
727 728
	struct super_block *sb = root->d_sb;
	struct fuse_conn *fc = get_fuse_conn_super(sb);
M
Miklos Szeredi 已提交
729

730 731 732 733 734 735 736 737 738 739 740 741 742 743
	if (fc->legacy_opts_show) {
		seq_printf(m, ",user_id=%u",
			   from_kuid_munged(fc->user_ns, fc->user_id));
		seq_printf(m, ",group_id=%u",
			   from_kgid_munged(fc->user_ns, fc->group_id));
		if (fc->default_permissions)
			seq_puts(m, ",default_permissions");
		if (fc->allow_other)
			seq_puts(m, ",allow_other");
		if (fc->max_read != ~0)
			seq_printf(m, ",max_read=%u", fc->max_read);
		if (sb->s_bdev && sb->s_blocksize != FUSE_DEFAULT_BLKSIZE)
			seq_printf(m, ",blksize=%lu", sb->s_blocksize);
	}
744 745 746 747
#ifdef CONFIG_FUSE_DAX
	if (fc->dax)
		seq_puts(m, ",dax");
#endif
748

M
Miklos Szeredi 已提交
749 750 751
	return 0;
}

752 753 754
static void fuse_iqueue_init(struct fuse_iqueue *fiq,
			     const struct fuse_iqueue_ops *ops,
			     void *priv)
M
Miklos Szeredi 已提交
755 756
{
	memset(fiq, 0, sizeof(struct fuse_iqueue));
757
	spin_lock_init(&fiq->lock);
M
Miklos Szeredi 已提交
758 759 760 761
	init_waitqueue_head(&fiq->waitq);
	INIT_LIST_HEAD(&fiq->pending);
	INIT_LIST_HEAD(&fiq->interrupts);
	fiq->forget_list_tail = &fiq->forget_list_head;
762
	fiq->connected = 1;
763 764
	fiq->ops = ops;
	fiq->priv = priv;
M
Miklos Szeredi 已提交
765 766
}

767 768
static void fuse_pqueue_init(struct fuse_pqueue *fpq)
{
769 770
	unsigned int i;

M
Miklos Szeredi 已提交
771
	spin_lock_init(&fpq->lock);
772 773
	for (i = 0; i < FUSE_PQ_HASH_SIZE; i++)
		INIT_LIST_HEAD(&fpq->processing[i]);
774
	INIT_LIST_HEAD(&fpq->io);
775
	fpq->connected = 1;
776 777
}

778 779
void fuse_conn_init(struct fuse_conn *fc, struct fuse_mount *fm,
		    struct user_namespace *user_ns,
780
		    const struct fuse_iqueue_ops *fiq_ops, void *fiq_priv)
M
Miklos Szeredi 已提交
781
{
782 783
	memset(fc, 0, sizeof(*fc));
	spin_lock_init(&fc->lock);
K
Kirill Tkhai 已提交
784
	spin_lock_init(&fc->bg_lock);
J
John Muir 已提交
785
	init_rwsem(&fc->killsb);
786
	refcount_set(&fc->count, 1);
787
	atomic_set(&fc->dev_count, 1);
788
	init_waitqueue_head(&fc->blocked_waitq);
789
	fuse_iqueue_init(&fc->iq, fiq_ops, fiq_priv);
790 791
	INIT_LIST_HEAD(&fc->bg_queue);
	INIT_LIST_HEAD(&fc->entry);
792
	INIT_LIST_HEAD(&fc->devices);
793
	atomic_set(&fc->num_waiting, 0);
794 795
	fc->max_background = FUSE_DEFAULT_MAX_BACKGROUND;
	fc->congestion_threshold = FUSE_DEFAULT_CONGESTION_THRESHOLD;
M
Miklos Szeredi 已提交
796
	atomic64_set(&fc->khctr, 0);
797
	fc->polled_files = RB_ROOT;
798
	fc->blocked = 0;
M
Maxim Patlasov 已提交
799
	fc->initialized = 0;
800
	fc->connected = 1;
801
	atomic64_set(&fc->attr_version, 1);
802
	get_random_bytes(&fc->scramble_key, sizeof(fc->scramble_key));
803
	fc->pid_ns = get_pid_ns(task_active_pid_ns(current));
804
	fc->user_ns = get_user_ns(user_ns);
M
Miklos Szeredi 已提交
805
	fc->max_pages = FUSE_DEFAULT_MAX_PAGES_PER_REQ;
806
	fc->max_pages_limit = FUSE_MAX_MAX_PAGES;
807 808 809 810

	INIT_LIST_HEAD(&fc->mounts);
	list_add(&fm->fc_entry, &fc->mounts);
	fm->fc = fc;
M
Miklos Szeredi 已提交
811
}
812
EXPORT_SYMBOL_GPL(fuse_conn_init);
M
Miklos Szeredi 已提交
813

814 815
void fuse_conn_put(struct fuse_conn *fc)
{
816
	if (refcount_dec_and_test(&fc->count)) {
817
		struct fuse_iqueue *fiq = &fc->iq;
818
		struct fuse_sync_bucket *bucket;
819

820 821
		if (IS_ENABLED(CONFIG_FUSE_DAX))
			fuse_dax_conn_free(fc);
822 823
		if (fiq->ops->release)
			fiq->ops->release(fiq);
824
		put_pid_ns(fc->pid_ns);
825
		put_user_ns(fc->user_ns);
826 827 828 829 830
		bucket = rcu_dereference_protected(fc->curr_bucket, 1);
		if (bucket) {
			WARN_ON(atomic_read(&bucket->count) != 1);
			kfree(bucket);
		}
T
Tejun Heo 已提交
831
		fc->release(fc);
832
	}
833
}
834
EXPORT_SYMBOL_GPL(fuse_conn_put);
835 836 837

struct fuse_conn *fuse_conn_get(struct fuse_conn *fc)
{
838
	refcount_inc(&fc->count);
839 840
	return fc;
}
841
EXPORT_SYMBOL_GPL(fuse_conn_get);
842

843
static struct inode *fuse_get_root_inode(struct super_block *sb, unsigned mode)
M
Miklos Szeredi 已提交
844 845 846 847 848 849
{
	struct fuse_attr attr;
	memset(&attr, 0, sizeof(attr));

	attr.mode = mode;
	attr.ino = FUSE_ROOT_ID;
850
	attr.nlink = 1;
851
	return fuse_iget(sb, 1, 0, &attr, 0, 0);
M
Miklos Szeredi 已提交
852 853
}

M
Miklos Szeredi 已提交
854
struct fuse_inode_handle {
M
Miklos Szeredi 已提交
855 856 857 858 859 860 861
	u64 nodeid;
	u32 generation;
};

static struct dentry *fuse_get_dentry(struct super_block *sb,
				      struct fuse_inode_handle *handle)
{
862
	struct fuse_conn *fc = get_fuse_conn_super(sb);
M
Miklos Szeredi 已提交
863 864 865 866 867 868 869 870
	struct inode *inode;
	struct dentry *entry;
	int err = -ESTALE;

	if (handle->nodeid == 0)
		goto out_err;

	inode = ilookup5(sb, handle->nodeid, fuse_inode_eq, &handle->nodeid);
871 872
	if (!inode) {
		struct fuse_entry_out outarg;
A
Al Viro 已提交
873
		const struct qstr name = QSTR_INIT(".", 1);
874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889

		if (!fc->export_support)
			goto out_err;

		err = fuse_lookup_name(sb, handle->nodeid, &name, &outarg,
				       &inode);
		if (err && err != -ENOENT)
			goto out_err;
		if (err || !inode) {
			err = -ESTALE;
			goto out_err;
		}
		err = -EIO;
		if (get_node_id(inode) != handle->nodeid)
			goto out_iput;
	}
M
Miklos Szeredi 已提交
890 891 892 893
	err = -ESTALE;
	if (inode->i_generation != handle->generation)
		goto out_iput;

894
	entry = d_obtain_alias(inode);
A
Al Viro 已提交
895
	if (!IS_ERR(entry) && get_node_id(inode) != FUSE_ROOT_ID)
M
Miklos Szeredi 已提交
896 897 898 899 900 901 902 903 904 905
		fuse_invalidate_entry_cache(entry);

	return entry;

 out_iput:
	iput(inode);
 out_err:
	return ERR_PTR(err);
}

A
Al Viro 已提交
906 907
static int fuse_encode_fh(struct inode *inode, u32 *fh, int *max_len,
			   struct inode *parent)
M
Miklos Szeredi 已提交
908
{
A
Al Viro 已提交
909
	int len = parent ? 6 : 3;
M
Miklos Szeredi 已提交
910 911 912
	u64 nodeid;
	u32 generation;

913 914
	if (*max_len < len) {
		*max_len = len;
915
		return  FILEID_INVALID;
916
	}
M
Miklos Szeredi 已提交
917 918 919 920 921 922 923 924

	nodeid = get_fuse_inode(inode)->nodeid;
	generation = inode->i_generation;

	fh[0] = (u32)(nodeid >> 32);
	fh[1] = (u32)(nodeid & 0xffffffff);
	fh[2] = generation;

A
Al Viro 已提交
925
	if (parent) {
M
Miklos Szeredi 已提交
926 927 928 929 930 931 932 933 934
		nodeid = get_fuse_inode(parent)->nodeid;
		generation = parent->i_generation;

		fh[3] = (u32)(nodeid >> 32);
		fh[4] = (u32)(nodeid & 0xffffffff);
		fh[5] = generation;
	}

	*max_len = len;
A
Al Viro 已提交
935
	return parent ? 0x82 : 0x81;
M
Miklos Szeredi 已提交
936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965
}

static struct dentry *fuse_fh_to_dentry(struct super_block *sb,
		struct fid *fid, int fh_len, int fh_type)
{
	struct fuse_inode_handle handle;

	if ((fh_type != 0x81 && fh_type != 0x82) || fh_len < 3)
		return NULL;

	handle.nodeid = (u64) fid->raw[0] << 32;
	handle.nodeid |= (u64) fid->raw[1];
	handle.generation = fid->raw[2];
	return fuse_get_dentry(sb, &handle);
}

static struct dentry *fuse_fh_to_parent(struct super_block *sb,
		struct fid *fid, int fh_len, int fh_type)
{
	struct fuse_inode_handle parent;

	if (fh_type != 0x82 || fh_len < 6)
		return NULL;

	parent.nodeid = (u64) fid->raw[3] << 32;
	parent.nodeid |= (u64) fid->raw[4];
	parent.generation = fid->raw[5];
	return fuse_get_dentry(sb, &parent);
}

966 967
static struct dentry *fuse_get_parent(struct dentry *child)
{
968
	struct inode *child_inode = d_inode(child);
969 970 971 972 973 974 975 976 977 978
	struct fuse_conn *fc = get_fuse_conn(child_inode);
	struct inode *inode;
	struct dentry *parent;
	struct fuse_entry_out outarg;
	int err;

	if (!fc->export_support)
		return ERR_PTR(-ESTALE);

	err = fuse_lookup_name(child_inode->i_sb, get_node_id(child_inode),
A
Al Viro 已提交
979
			       &dotdot_name, &outarg, &inode);
980 981 982
	if (err) {
		if (err == -ENOENT)
			return ERR_PTR(-ESTALE);
983 984
		return ERR_PTR(err);
	}
985 986

	parent = d_obtain_alias(inode);
A
Al Viro 已提交
987
	if (!IS_ERR(parent) && get_node_id(inode) != FUSE_ROOT_ID)
988 989 990 991
		fuse_invalidate_entry_cache(parent);

	return parent;
}
M
Miklos Szeredi 已提交
992 993 994 995 996

static const struct export_operations fuse_export_operations = {
	.fh_to_dentry	= fuse_fh_to_dentry,
	.fh_to_parent	= fuse_fh_to_parent,
	.encode_fh	= fuse_encode_fh,
997
	.get_parent	= fuse_get_parent,
M
Miklos Szeredi 已提交
998 999
};

1000
static const struct super_operations fuse_super_operations = {
M
Miklos Szeredi 已提交
1001
	.alloc_inode    = fuse_alloc_inode,
A
Al Viro 已提交
1002
	.free_inode     = fuse_free_inode,
1003
	.evict_inode	= fuse_evict_inode,
M
Miklos Szeredi 已提交
1004
	.write_inode	= fuse_write_inode,
M
Miklos Szeredi 已提交
1005
	.drop_inode	= generic_delete_inode,
M
Miklos Szeredi 已提交
1006
	.put_super	= fuse_put_super,
1007
	.umount_begin	= fuse_umount_begin,
1008
	.statfs		= fuse_statfs,
1009
	.sync_fs	= fuse_sync_fs,
M
Miklos Szeredi 已提交
1010 1011 1012
	.show_options	= fuse_show_options,
};

1013 1014
static void sanitize_global_limit(unsigned *limit)
{
M
Miklos Szeredi 已提交
1015 1016 1017 1018
	/*
	 * The default maximum number of async requests is calculated to consume
	 * 1/2^13 of the total memory, assuming 392 bytes per request.
	 */
1019
	if (*limit == 0)
M
Miklos Szeredi 已提交
1020
		*limit = ((totalram_pages() << PAGE_SHIFT) >> 13) / 392;
1021 1022 1023 1024 1025

	if (*limit >= 1 << 16)
		*limit = (1 << 16) - 1;
}

1026
static int set_global_limit(const char *val, const struct kernel_param *kp)
1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048
{
	int rv;

	rv = param_set_uint(val, kp);
	if (rv)
		return rv;

	sanitize_global_limit((unsigned *)kp->arg);

	return 0;
}

static void process_init_limits(struct fuse_conn *fc, struct fuse_init_out *arg)
{
	int cap_sys_admin = capable(CAP_SYS_ADMIN);

	if (arg->minor < 13)
		return;

	sanitize_global_limit(&max_user_bgreq);
	sanitize_global_limit(&max_user_congthresh);

K
Kirill Tkhai 已提交
1049
	spin_lock(&fc->bg_lock);
1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062
	if (arg->max_background) {
		fc->max_background = arg->max_background;

		if (!cap_sys_admin && fc->max_background > max_user_bgreq)
			fc->max_background = max_user_bgreq;
	}
	if (arg->congestion_threshold) {
		fc->congestion_threshold = arg->congestion_threshold;

		if (!cap_sys_admin &&
		    fc->congestion_threshold > max_user_congthresh)
			fc->congestion_threshold = max_user_congthresh;
	}
K
Kirill Tkhai 已提交
1063
	spin_unlock(&fc->bg_lock);
1064 1065
}

1066 1067 1068 1069 1070 1071
struct fuse_init_args {
	struct fuse_args args;
	struct fuse_init_in in;
	struct fuse_init_out out;
};

1072
static void process_init_reply(struct fuse_mount *fm, struct fuse_args *args,
1073
			       int error)
1074
{
1075
	struct fuse_conn *fc = fm->fc;
1076 1077
	struct fuse_init_args *ia = container_of(args, typeof(*ia), args);
	struct fuse_init_out *arg = &ia->out;
1078
	bool ok = true;
1079

1080
	if (error || arg->major != FUSE_KERNEL_VERSION)
1081
		ok = false;
1082
	else {
1083 1084
		unsigned long ra_pages;

1085 1086
		process_init_limits(fc, arg);

1087
		if (arg->minor >= 6) {
1088
			ra_pages = arg->max_readahead / PAGE_SIZE;
1089 1090
			if (arg->flags & FUSE_ASYNC_READ)
				fc->async_read = 1;
1091 1092
			if (!(arg->flags & FUSE_POSIX_LOCKS))
				fc->no_lock = 1;
M
Miklos Szeredi 已提交
1093 1094 1095
			if (arg->minor >= 17) {
				if (!(arg->flags & FUSE_FLOCK_LOCKS))
					fc->no_flock = 1;
M
Miklos Szeredi 已提交
1096 1097 1098
			} else {
				if (!(arg->flags & FUSE_POSIX_LOCKS))
					fc->no_flock = 1;
M
Miklos Szeredi 已提交
1099
			}
1100 1101
			if (arg->flags & FUSE_ATOMIC_O_TRUNC)
				fc->atomic_o_trunc = 1;
1102 1103 1104 1105 1106
			if (arg->minor >= 9) {
				/* LOOKUP has dependency on proto version */
				if (arg->flags & FUSE_EXPORT_SUPPORT)
					fc->export_support = 1;
			}
1107 1108
			if (arg->flags & FUSE_BIG_WRITES)
				fc->big_writes = 1;
1109 1110
			if (arg->flags & FUSE_DONT_MASK)
				fc->dont_mask = 1;
1111 1112
			if (arg->flags & FUSE_AUTO_INVAL_DATA)
				fc->auto_inval_data = 1;
1113 1114
			else if (arg->flags & FUSE_EXPLICIT_INVAL_DATA)
				fc->explicit_inval_data = 1;
1115
			if (arg->flags & FUSE_DO_READDIRPLUS) {
1116
				fc->do_readdirplus = 1;
1117 1118 1119
				if (arg->flags & FUSE_READDIRPLUS_AUTO)
					fc->readdirplus_auto = 1;
			}
1120 1121
			if (arg->flags & FUSE_ASYNC_DIO)
				fc->async_dio = 1;
P
Pavel Emelyanov 已提交
1122 1123
			if (arg->flags & FUSE_WRITEBACK_CACHE)
				fc->writeback_cache = 1;
1124 1125
			if (arg->flags & FUSE_PARALLEL_DIROPS)
				fc->parallel_dirops = 1;
1126 1127
			if (arg->flags & FUSE_HANDLE_KILLPRIV)
				fc->handle_killpriv = 1;
1128
			if (arg->time_gran && arg->time_gran <= 1000000000)
1129
				fm->sb->s_time_gran = arg->time_gran;
S
Seth Forshee 已提交
1130
			if ((arg->flags & FUSE_POSIX_ACL)) {
M
Miklos Szeredi 已提交
1131
				fc->default_permissions = 1;
S
Seth Forshee 已提交
1132
				fc->posix_acl = 1;
1133
				fm->sb->s_xattr = fuse_acl_xattr_handlers;
S
Seth Forshee 已提交
1134
			}
D
Dan Schatzberg 已提交
1135 1136
			if (arg->flags & FUSE_CACHE_SYMLINKS)
				fc->cache_symlinks = 1;
1137 1138
			if (arg->flags & FUSE_ABORT_ERROR)
				fc->abort_err = 1;
1139 1140
			if (arg->flags & FUSE_MAX_PAGES) {
				fc->max_pages =
1141
					min_t(unsigned int, fc->max_pages_limit,
1142 1143
					max_t(unsigned int, arg->max_pages, 1));
			}
1144 1145 1146 1147 1148
			if (IS_ENABLED(CONFIG_FUSE_DAX) &&
			    arg->flags & FUSE_MAP_ALIGNMENT &&
			    !fuse_dax_check_alignment(fc, arg->map_alignment)) {
				ok = false;
			}
1149
			if (arg->flags & FUSE_HANDLE_KILLPRIV_V2) {
1150
				fc->handle_killpriv_v2 = 1;
1151 1152
				fm->sb->s_flags |= SB_NOSEC;
			}
V
Vivek Goyal 已提交
1153 1154
			if (arg->flags & FUSE_SETXATTR_EXT)
				fc->setxattr_ext = 1;
1155
		} else {
1156
			ra_pages = fc->max_read / PAGE_SIZE;
1157
			fc->no_lock = 1;
M
Miklos Szeredi 已提交
1158
			fc->no_flock = 1;
1159
		}
1160

1161 1162
		fm->sb->s_bdi->ra_pages =
				min(fm->sb->s_bdi->ra_pages, ra_pages);
1163 1164
		fc->minor = arg->minor;
		fc->max_write = arg->minor < 5 ? 4096 : arg->max_write;
1165
		fc->max_write = max_t(unsigned, 4096, fc->max_write);
1166
		fc->conn_init = 1;
1167
	}
1168 1169
	kfree(ia);

1170 1171 1172 1173 1174
	if (!ok) {
		fc->conn_init = 0;
		fc->conn_error = 1;
	}

1175
	fuse_set_initialized(fc);
1176
	wake_up_all(&fc->blocked_waitq);
1177 1178
}

1179
void fuse_send_init(struct fuse_mount *fm)
1180
{
1181
	struct fuse_init_args *ia;
M
Miklos Szeredi 已提交
1182

1183 1184 1185 1186
	ia = kzalloc(sizeof(*ia), GFP_KERNEL | __GFP_NOFAIL);

	ia->in.major = FUSE_KERNEL_VERSION;
	ia->in.minor = FUSE_KERNEL_MINOR_VERSION;
1187
	ia->in.max_readahead = fm->sb->s_bdi->ra_pages * PAGE_SIZE;
1188 1189
	ia->in.flags |=
		FUSE_ASYNC_READ | FUSE_POSIX_LOCKS | FUSE_ATOMIC_O_TRUNC |
M
Miklos Szeredi 已提交
1190
		FUSE_EXPORT_SUPPORT | FUSE_BIG_WRITES | FUSE_DONT_MASK |
M
Miklos Szeredi 已提交
1191
		FUSE_SPLICE_WRITE | FUSE_SPLICE_MOVE | FUSE_SPLICE_READ |
1192
		FUSE_FLOCK_LOCKS | FUSE_HAS_IOCTL_DIR | FUSE_AUTO_INVAL_DATA |
P
Pavel Emelyanov 已提交
1193
		FUSE_DO_READDIRPLUS | FUSE_READDIRPLUS_AUTO | FUSE_ASYNC_DIO |
1194
		FUSE_WRITEBACK_CACHE | FUSE_NO_OPEN_SUPPORT |
1195
		FUSE_PARALLEL_DIROPS | FUSE_HANDLE_KILLPRIV | FUSE_POSIX_ACL |
1196
		FUSE_ABORT_ERROR | FUSE_MAX_PAGES | FUSE_CACHE_SYMLINKS |
1197
		FUSE_NO_OPENDIR_SUPPORT | FUSE_EXPLICIT_INVAL_DATA |
V
Vivek Goyal 已提交
1198
		FUSE_HANDLE_KILLPRIV_V2 | FUSE_SETXATTR_EXT;
1199
#ifdef CONFIG_FUSE_DAX
1200
	if (fm->fc->dax)
1201 1202
		ia->in.flags |= FUSE_MAP_ALIGNMENT;
#endif
M
Max Reitz 已提交
1203 1204 1205
	if (fm->fc->auto_submounts)
		ia->in.flags |= FUSE_SUBMOUNTS;

1206 1207 1208 1209 1210
	ia->args.opcode = FUSE_INIT;
	ia->args.in_numargs = 1;
	ia->args.in_args[0].size = sizeof(ia->in);
	ia->args.in_args[0].value = &ia->in;
	ia->args.out_numargs = 1;
D
Daniel Mack 已提交
1211
	/* Variable length argument used for backward compatibility
1212 1213
	   with interface version < 7.5.  Rest of init_out is zeroed
	   by do_get_request(), so a short reply is not a problem */
1214
	ia->args.out_argvar = true;
1215 1216 1217 1218 1219 1220
	ia->args.out_args[0].size = sizeof(ia->out);
	ia->args.out_args[0].value = &ia->out;
	ia->args.force = true;
	ia->args.nocreds = true;
	ia->args.end = process_init_reply;

1221 1222
	if (fuse_simple_background(fm, &ia->args, GFP_KERNEL) != 0)
		process_init_reply(fm, &ia->args, -ENOTCONN);
1223
}
1224
EXPORT_SYMBOL_GPL(fuse_send_init);
1225

1226
void fuse_free_conn(struct fuse_conn *fc)
T
Tejun Heo 已提交
1227
{
1228
	WARN_ON(!list_empty(&fc->devices));
A
Al Viro 已提交
1229
	kfree_rcu(fc, rcu);
T
Tejun Heo 已提交
1230
}
1231
EXPORT_SYMBOL_GPL(fuse_free_conn);
T
Tejun Heo 已提交
1232

1233 1234 1235
static int fuse_bdi_init(struct fuse_conn *fc, struct super_block *sb)
{
	int err;
1236
	char *suffix = "";
1237

1238
	if (sb->s_bdev) {
1239
		suffix = "-fuseblk";
1240 1241 1242 1243 1244 1245 1246
		/*
		 * sb->s_bdi points to blkdev's bdi however we want to redirect
		 * it to our private bdi...
		 */
		bdi_put(sb->s_bdi);
		sb->s_bdi = &noop_backing_dev_info;
	}
1247 1248
	err = super_setup_bdi_name(sb, "%u:%u%s", MAJOR(fc->dev),
				   MINOR(fc->dev), suffix);
1249 1250 1251
	if (err)
		return err;

1252
	/* fuse does it's own writeback accounting */
1253 1254
	sb->s_bdi->capabilities &= ~BDI_CAP_WRITEBACK_ACCT;
	sb->s_bdi->capabilities |= BDI_CAP_STRICTLIMIT;
1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267

	/*
	 * For a single fuse filesystem use max 1% of dirty +
	 * writeback threshold.
	 *
	 * This gives about 1M of write buffer for memory maps on a
	 * machine with 1G and 10% dirty_ratio, which should be more
	 * than enough.
	 *
	 * Privileged users can raise it by writing to
	 *
	 *    /sys/class/bdi/<bdi>/max_ratio
	 */
1268
	bdi_set_max_ratio(sb->s_bdi, 1);
1269 1270 1271 1272

	return 0;
}

1273
struct fuse_dev *fuse_dev_alloc(void)
1274 1275
{
	struct fuse_dev *fud;
1276
	struct list_head *pq;
1277 1278

	fud = kzalloc(sizeof(struct fuse_dev), GFP_KERNEL);
1279 1280
	if (!fud)
		return NULL;
1281

1282 1283 1284 1285
	pq = kcalloc(FUSE_PQ_HASH_SIZE, sizeof(struct list_head), GFP_KERNEL);
	if (!pq) {
		kfree(fud);
		return NULL;
1286 1287
	}

1288 1289 1290
	fud->pq.processing = pq;
	fuse_pqueue_init(&fud->pq);

1291 1292 1293 1294 1295 1296 1297
	return fud;
}
EXPORT_SYMBOL_GPL(fuse_dev_alloc);

void fuse_dev_install(struct fuse_dev *fud, struct fuse_conn *fc)
{
	fud->fc = fuse_conn_get(fc);
1298 1299 1300
	spin_lock(&fc->lock);
	list_add_tail(&fud->entry, &fc->devices);
	spin_unlock(&fc->lock);
1301 1302
}
EXPORT_SYMBOL_GPL(fuse_dev_install);
1303

1304 1305 1306 1307 1308 1309 1310 1311 1312
struct fuse_dev *fuse_dev_alloc_install(struct fuse_conn *fc)
{
	struct fuse_dev *fud;

	fud = fuse_dev_alloc();
	if (!fud)
		return NULL;

	fuse_dev_install(fud, fc);
1313 1314
	return fud;
}
1315
EXPORT_SYMBOL_GPL(fuse_dev_alloc_install);
1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327

void fuse_dev_free(struct fuse_dev *fud)
{
	struct fuse_conn *fc = fud->fc;

	if (fc) {
		spin_lock(&fc->lock);
		list_del(&fud->entry);
		spin_unlock(&fc->lock);

		fuse_conn_put(fc);
	}
1328
	kfree(fud->pq.processing);
1329 1330 1331 1332
	kfree(fud);
}
EXPORT_SYMBOL_GPL(fuse_dev_free);

1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375
static void fuse_fill_attr_from_inode(struct fuse_attr *attr,
				      const struct fuse_inode *fi)
{
	*attr = (struct fuse_attr){
		.ino		= fi->inode.i_ino,
		.size		= fi->inode.i_size,
		.blocks		= fi->inode.i_blocks,
		.atime		= fi->inode.i_atime.tv_sec,
		.mtime		= fi->inode.i_mtime.tv_sec,
		.ctime		= fi->inode.i_ctime.tv_sec,
		.atimensec	= fi->inode.i_atime.tv_nsec,
		.mtimensec	= fi->inode.i_mtime.tv_nsec,
		.ctimensec	= fi->inode.i_ctime.tv_nsec,
		.mode		= fi->inode.i_mode,
		.nlink		= fi->inode.i_nlink,
		.uid		= fi->inode.i_uid.val,
		.gid		= fi->inode.i_gid.val,
		.rdev		= fi->inode.i_rdev,
		.blksize	= 1u << fi->inode.i_blkbits,
	};
}

static void fuse_sb_defaults(struct super_block *sb)
{
	sb->s_magic = FUSE_SUPER_MAGIC;
	sb->s_op = &fuse_super_operations;
	sb->s_xattr = fuse_xattr_handlers;
	sb->s_maxbytes = MAX_LFS_FILESIZE;
	sb->s_time_gran = 1;
	sb->s_export_op = &fuse_export_operations;
	sb->s_iflags |= SB_I_IMA_UNVERIFIABLE_SIGNATURE;
	if (sb->s_user_ns != &init_user_ns)
		sb->s_iflags |= SB_I_UNTRUSTED_MOUNTER;
	sb->s_flags &= ~(SB_NOSEC | SB_I_VERSION);

	/*
	 * If we are not in the initial user namespace posix
	 * acls must be translated.
	 */
	if (sb->s_user_ns != &init_user_ns)
		sb->s_xattr = fuse_no_acl_xattr_handlers;
}

1376 1377
static int fuse_fill_super_submount(struct super_block *sb,
				    struct fuse_inode *parent_fi)
1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413
{
	struct fuse_mount *fm = get_fuse_mount_super(sb);
	struct super_block *parent_sb = parent_fi->inode.i_sb;
	struct fuse_attr root_attr;
	struct inode *root;

	fuse_sb_defaults(sb);
	fm->sb = sb;

	WARN_ON(sb->s_bdi != &noop_backing_dev_info);
	sb->s_bdi = bdi_get(parent_sb->s_bdi);

	sb->s_xattr = parent_sb->s_xattr;
	sb->s_time_gran = parent_sb->s_time_gran;
	sb->s_blocksize = parent_sb->s_blocksize;
	sb->s_blocksize_bits = parent_sb->s_blocksize_bits;
	sb->s_subtype = kstrdup(parent_sb->s_subtype, GFP_KERNEL);
	if (parent_sb->s_subtype && !sb->s_subtype)
		return -ENOMEM;

	fuse_fill_attr_from_inode(&root_attr, parent_fi);
	root = fuse_iget(sb, parent_fi->nodeid, 0, &root_attr, 0, 0);
	/*
	 * This inode is just a duplicate, so it is not looked up and
	 * its nlookup should not be incremented.  fuse_iget() does
	 * that, though, so undo it here.
	 */
	get_fuse_inode(root)->nlookup--;
	sb->s_d_op = &fuse_dentry_operations;
	sb->s_root = d_make_root(root);
	if (!sb->s_root)
		return -ENOMEM;

	return 0;
}

1414
/* Filesystem context private data holds the FUSE inode of the mount point */
1415 1416
static int fuse_get_tree_submount(struct fs_context *fsc)
{
1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451
	struct fuse_mount *fm;
	struct fuse_inode *mp_fi = fsc->fs_private;
	struct fuse_conn *fc = get_fuse_conn(&mp_fi->inode);
	struct super_block *sb;
	int err;

	fm = kzalloc(sizeof(struct fuse_mount), GFP_KERNEL);
	if (!fm)
		return -ENOMEM;

	fsc->s_fs_info = fm;
	sb = sget_fc(fsc, NULL, set_anon_super_fc);
	if (IS_ERR(sb)) {
		kfree(fm);
		return PTR_ERR(sb);
	}
	fm->fc = fuse_conn_get(fc);

	/* Initialize superblock, making @mp_fi its root */
	err = fuse_fill_super_submount(sb, mp_fi);
	if (err) {
		fuse_conn_put(fc);
		kfree(fm);
		sb->s_fs_info = NULL;
		deactivate_locked_super(sb);
		return err;
	}

	down_write(&fc->killsb);
	list_add_tail(&fm->fc_entry, &fc->mounts);
	up_write(&fc->killsb);

	sb->s_flags |= SB_ACTIVE;
	fsc->root = dget(sb->s_root);

1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465
	return 0;
}

static const struct fs_context_operations fuse_context_submount_ops = {
	.get_tree	= fuse_get_tree_submount,
};

int fuse_init_fs_context_submount(struct fs_context *fsc)
{
	fsc->ops = &fuse_context_submount_ops;
	return 0;
}
EXPORT_SYMBOL_GPL(fuse_init_fs_context_submount);

1466
int fuse_fill_super_common(struct super_block *sb, struct fuse_fs_context *ctx)
M
Miklos Szeredi 已提交
1467
{
1468
	struct fuse_dev *fud = NULL;
1469 1470
	struct fuse_mount *fm = get_fuse_mount_super(sb);
	struct fuse_conn *fc = fm->fc;
M
Miklos Szeredi 已提交
1471
	struct inode *root;
1472
	struct dentry *root_dentry;
M
Miklos Szeredi 已提交
1473 1474
	int err;

1475
	err = -EINVAL;
1476
	if (sb->s_flags & SB_MANDLOCK)
1477
		goto err;
1478

1479
	rcu_assign_pointer(fc->curr_bucket, fuse_sync_bucket_alloc());
1480
	fuse_sb_defaults(sb);
A
Al Viro 已提交
1481

1482
	if (ctx->is_bdev) {
1483
#ifdef CONFIG_BLOCK
1484
		err = -EINVAL;
1485
		if (!sb_set_blocksize(sb, ctx->blksize))
1486
			goto err;
1487
#endif
M
Miklos Szeredi 已提交
1488
	} else {
1489 1490
		sb->s_blocksize = PAGE_SIZE;
		sb->s_blocksize_bits = PAGE_SHIFT;
M
Miklos Szeredi 已提交
1491
	}
1492 1493 1494

	sb->s_subtype = ctx->subtype;
	ctx->subtype = NULL;
1495 1496 1497 1498 1499
	if (IS_ENABLED(CONFIG_FUSE_DAX)) {
		err = fuse_dax_conn_alloc(fc, ctx->dax_dev);
		if (err)
			goto err;
	}
1500

1501 1502 1503 1504
	if (ctx->fudptr) {
		err = -ENOMEM;
		fud = fuse_dev_alloc_install(fc);
		if (!fud)
1505
			goto err_free_dax;
1506
	}
1507

1508
	fc->dev = sb->s_dev;
1509
	fm->sb = sb;
1510 1511
	err = fuse_bdi_init(fc, sb);
	if (err)
1512
		goto err_dev_free;
1513

1514
	/* Handle umasking inside the fuse code */
1515
	if (sb->s_flags & SB_POSIXACL)
1516
		fc->dont_mask = 1;
1517
	sb->s_flags |= SB_POSIXACL;
1518

1519 1520 1521 1522
	fc->default_permissions = ctx->default_permissions;
	fc->allow_other = ctx->allow_other;
	fc->user_id = ctx->user_id;
	fc->group_id = ctx->group_id;
1523
	fc->legacy_opts_show = ctx->legacy_opts_show;
1524
	fc->max_read = max_t(unsigned int, 4096, ctx->max_read);
1525
	fc->destroy = ctx->destroy;
1526 1527
	fc->no_control = ctx->no_control;
	fc->no_force_umount = ctx->no_force_umount;
1528

M
Miklos Szeredi 已提交
1529
	err = -ENOMEM;
1530
	root = fuse_get_root_inode(sb, ctx->rootmode);
1531
	sb->s_d_op = &fuse_root_dentry_operations;
1532 1533
	root_dentry = d_make_root(root);
	if (!root_dentry)
1534
		goto err_dev_free;
1535
	/* Root dentry doesn't have .d_revalidate */
A
Al Viro 已提交
1536
	sb->s_d_op = &fuse_dentry_operations;
1537

1538
	mutex_lock(&fuse_mutex);
1539
	err = -EINVAL;
1540
	if (ctx->fudptr && *ctx->fudptr)
1541
		goto err_unlock;
1542

1543 1544 1545 1546 1547
	err = fuse_ctl_add_conn(fc);
	if (err)
		goto err_unlock;

	list_add_tail(&fc->entry, &fuse_conn_list);
1548
	sb->s_root = root_dentry;
1549 1550
	if (ctx->fudptr)
		*ctx->fudptr = fud;
1551
	mutex_unlock(&fuse_mutex);
1552 1553 1554 1555 1556 1557
	return 0;

 err_unlock:
	mutex_unlock(&fuse_mutex);
	dput(root_dentry);
 err_dev_free:
1558 1559
	if (fud)
		fuse_dev_free(fud);
1560 1561 1562
 err_free_dax:
	if (IS_ENABLED(CONFIG_FUSE_DAX))
		fuse_dax_conn_free(fc);
1563 1564 1565 1566 1567 1568 1569 1570 1571 1572
 err:
	return err;
}
EXPORT_SYMBOL_GPL(fuse_fill_super_common);

static int fuse_fill_super(struct super_block *sb, struct fs_context *fsc)
{
	struct fuse_fs_context *ctx = fsc->fs_private;
	int err;
	struct fuse_conn *fc;
1573
	struct fuse_mount *fm;
1574

1575
	if (!ctx->file || !ctx->rootmode_present ||
1576 1577
	    !ctx->user_id_present || !ctx->group_id_present)
		return -EINVAL;
1578 1579 1580 1581 1582

	/*
	 * Require mount to happen from the same user namespace which
	 * opened /dev/fuse to prevent potential attacks.
	 */
1583 1584 1585 1586 1587
	err = -EINVAL;
	if ((ctx->file->f_op != &fuse_dev_operations) ||
	    (ctx->file->f_cred->user_ns != sb->s_user_ns))
		goto err;
	ctx->fudptr = &ctx->file->private_data;
1588 1589 1590 1591

	fc = kmalloc(sizeof(*fc), GFP_KERNEL);
	err = -ENOMEM;
	if (!fc)
1592
		goto err;
1593

1594 1595 1596
	fm = kzalloc(sizeof(*fm), GFP_KERNEL);
	if (!fm) {
		kfree(fc);
1597
		goto err;
1598 1599 1600
	}

	fuse_conn_init(fc, fm, sb->s_user_ns, &fuse_dev_fiq_ops, NULL);
1601
	fc->release = fuse_free_conn;
1602 1603

	sb->s_fs_info = fm;
1604 1605 1606 1607

	err = fuse_fill_super_common(sb, ctx);
	if (err)
		goto err_put_conn;
1608 1609
	/* file->private_data shall be visible on all CPUs after this */
	smp_mb();
1610
	fuse_send_init(get_fuse_mount_super(sb));
M
Miklos Szeredi 已提交
1611 1612
	return 0;

1613
 err_put_conn:
1614 1615
	fuse_conn_put(fc);
	kfree(fm);
1616
	sb->s_fs_info = NULL;
1617
 err:
M
Miklos Szeredi 已提交
1618 1619 1620
	return err;
}

M
Miklos Szeredi 已提交
1621 1622 1623 1624 1625
/*
 * This is the path where user supplied an already initialized fuse dev.  In
 * this case never create a new super if the old one is gone.
 */
static int fuse_set_no_super(struct super_block *sb, struct fs_context *fsc)
M
Miklos Szeredi 已提交
1626
{
M
Miklos Szeredi 已提交
1627 1628
	return -ENOTCONN;
}
1629

M
Miklos Szeredi 已提交
1630 1631
static int fuse_test_super(struct super_block *sb, struct fs_context *fsc)
{
1632

M
Miklos Szeredi 已提交
1633 1634 1635
	return fsc->sget_key == get_fuse_conn_super(sb);
}

1636
static int fuse_get_tree(struct fs_context *fsc)
M
Miklos Szeredi 已提交
1637
{
1638
	struct fuse_fs_context *ctx = fsc->fs_private;
M
Miklos Szeredi 已提交
1639 1640
	struct fuse_dev *fud;
	struct super_block *sb;
1641
	int err;
1642

1643 1644
	if (ctx->fd_present)
		ctx->file = fget(ctx->fd);
1645

1646
	if (IS_ENABLED(CONFIG_BLOCK) && ctx->is_bdev) {
1647 1648
		err = get_tree_bdev(fsc, fuse_fill_super);
		goto out_fput;
1649
	}
M
Miklos Szeredi 已提交
1650 1651 1652 1653 1654 1655
	/*
	 * While block dev mount can be initialized with a dummy device fd
	 * (found by device name), normal fuse mounts can't
	 */
	if (!ctx->file)
		return -EINVAL;
1656

M
Miklos Szeredi 已提交
1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670
	/*
	 * Allow creating a fuse mount with an already initialized fuse
	 * connection
	 */
	fud = READ_ONCE(ctx->file->private_data);
	if (ctx->file->f_op == &fuse_dev_operations && fud) {
		fsc->sget_key = fud->fc;
		sb = sget_fc(fsc, fuse_test_super, fuse_set_no_super);
		err = PTR_ERR_OR_ZERO(sb);
		if (!IS_ERR(sb))
			fsc->root = dget(sb->s_root);
	} else {
		err = get_tree_nodev(fsc, fuse_fill_super);
	}
1671 1672 1673 1674
out_fput:
	if (ctx->file)
		fput(ctx->file);
	return err;
1675 1676 1677
}

static const struct fs_context_operations fuse_context_ops = {
1678
	.free		= fuse_free_fsc,
1679
	.parse_param	= fuse_parse_param,
1680
	.reconfigure	= fuse_reconfigure,
1681 1682 1683 1684 1685 1686
	.get_tree	= fuse_get_tree,
};

/*
 * Set up the filesystem mount context.
 */
1687
static int fuse_init_fs_context(struct fs_context *fsc)
1688 1689 1690 1691 1692 1693 1694 1695 1696
{
	struct fuse_fs_context *ctx;

	ctx = kzalloc(sizeof(struct fuse_fs_context), GFP_KERNEL);
	if (!ctx)
		return -ENOMEM;

	ctx->max_read = ~0;
	ctx->blksize = FUSE_DEFAULT_BLKSIZE;
1697
	ctx->legacy_opts_show = true;
1698 1699

#ifdef CONFIG_BLOCK
1700
	if (fsc->fs_type == &fuseblk_fs_type) {
1701
		ctx->is_bdev = true;
1702 1703
		ctx->destroy = true;
	}
1704 1705
#endif

1706 1707
	fsc->fs_private = ctx;
	fsc->ops = &fuse_context_ops;
1708
	return 0;
M
Miklos Szeredi 已提交
1709 1710
}

1711
bool fuse_mount_remove(struct fuse_mount *fm)
J
John Muir 已提交
1712
{
1713 1714
	struct fuse_conn *fc = fm->fc;
	bool last = false;
J
John Muir 已提交
1715

1716 1717 1718 1719 1720
	down_write(&fc->killsb);
	list_del_init(&fm->fc_entry);
	if (list_empty(&fc->mounts))
		last = true;
	up_write(&fc->killsb);
1721

1722 1723 1724
	return last;
}
EXPORT_SYMBOL_GPL(fuse_mount_remove);
1725

1726 1727 1728 1729 1730 1731 1732 1733 1734
void fuse_conn_destroy(struct fuse_mount *fm)
{
	struct fuse_conn *fc = fm->fc;

	if (fc->destroy)
		fuse_send_destroy(fm);

	fuse_abort_conn(fc);
	fuse_wait_aborted(fc);
M
Miklos Szeredi 已提交
1735 1736 1737 1738 1739 1740

	if (!list_empty(&fc->entry)) {
		mutex_lock(&fuse_mutex);
		list_del(&fc->entry);
		fuse_ctl_remove_conn(fc);
		mutex_unlock(&fuse_mutex);
J
John Muir 已提交
1741
	}
1742
}
1743
EXPORT_SYMBOL_GPL(fuse_conn_destroy);
J
John Muir 已提交
1744

1745
static void fuse_sb_destroy(struct super_block *sb)
1746
{
1747 1748 1749 1750 1751 1752 1753 1754
	struct fuse_mount *fm = get_fuse_mount_super(sb);
	bool last;

	if (fm) {
		last = fuse_mount_remove(fm);
		if (last)
			fuse_conn_destroy(fm);
	}
1755 1756 1757 1758 1759
}

static void fuse_kill_sb_anon(struct super_block *sb)
{
	fuse_sb_destroy(sb);
J
John Muir 已提交
1760 1761 1762
	kill_anon_super(sb);
}

1763 1764 1765
static struct file_system_type fuse_fs_type = {
	.owner		= THIS_MODULE,
	.name		= "fuse",
1766
	.fs_flags	= FS_HAS_SUBTYPE | FS_USERNS_MOUNT,
1767
	.init_fs_context = fuse_init_fs_context,
1768
	.parameters	= fuse_fs_parameters,
J
John Muir 已提交
1769
	.kill_sb	= fuse_kill_sb_anon,
1770
};
1771
MODULE_ALIAS_FS("fuse");
1772 1773

#ifdef CONFIG_BLOCK
J
John Muir 已提交
1774 1775
static void fuse_kill_sb_blk(struct super_block *sb)
{
1776
	fuse_sb_destroy(sb);
J
John Muir 已提交
1777 1778 1779
	kill_block_super(sb);
}

1780 1781 1782
static struct file_system_type fuseblk_fs_type = {
	.owner		= THIS_MODULE,
	.name		= "fuseblk",
1783
	.init_fs_context = fuse_init_fs_context,
1784
	.parameters	= fuse_fs_parameters,
J
John Muir 已提交
1785
	.kill_sb	= fuse_kill_sb_blk,
A
Alexey Dobriyan 已提交
1786
	.fs_flags	= FS_REQUIRES_DEV | FS_HAS_SUBTYPE,
1787
};
1788
MODULE_ALIAS_FS("fuseblk");
1789

1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809
static inline int register_fuseblk(void)
{
	return register_filesystem(&fuseblk_fs_type);
}

static inline void unregister_fuseblk(void)
{
	unregister_filesystem(&fuseblk_fs_type);
}
#else
static inline int register_fuseblk(void)
{
	return 0;
}

static inline void unregister_fuseblk(void)
{
}
#endif

1810
static void fuse_inode_init_once(void *foo)
M
Miklos Szeredi 已提交
1811
{
M
Miklos Szeredi 已提交
1812
	struct inode *inode = foo;
M
Miklos Szeredi 已提交
1813

C
Christoph Lameter 已提交
1814
	inode_init_once(inode);
M
Miklos Szeredi 已提交
1815 1816 1817 1818 1819 1820
}

static int __init fuse_fs_init(void)
{
	int err;

1821
	fuse_inode_cachep = kmem_cache_create("fuse_inode",
1822 1823 1824
			sizeof(struct fuse_inode), 0,
			SLAB_HWCACHE_ALIGN|SLAB_ACCOUNT|SLAB_RECLAIM_ACCOUNT,
			fuse_inode_init_once);
1825 1826
	err = -ENOMEM;
	if (!fuse_inode_cachep)
1827 1828 1829 1830 1831 1832 1833 1834 1835
		goto out;

	err = register_fuseblk();
	if (err)
		goto out2;

	err = register_filesystem(&fuse_fs_type);
	if (err)
		goto out3;
1836 1837

	return 0;
M
Miklos Szeredi 已提交
1838

1839
 out3:
1840
	unregister_fuseblk();
1841 1842
 out2:
	kmem_cache_destroy(fuse_inode_cachep);
1843
 out:
M
Miklos Szeredi 已提交
1844 1845 1846 1847 1848 1849
	return err;
}

static void fuse_fs_cleanup(void)
{
	unregister_filesystem(&fuse_fs_type);
1850
	unregister_fuseblk();
1851 1852 1853 1854 1855 1856

	/*
	 * Make sure all delayed rcu free inodes are flushed before we
	 * destroy cache.
	 */
	rcu_barrier();
M
Miklos Szeredi 已提交
1857 1858 1859
	kmem_cache_destroy(fuse_inode_cachep);
}

1860 1861
static struct kobject *fuse_kobj;

1862 1863 1864 1865
static int fuse_sysfs_init(void)
{
	int err;

1866
	fuse_kobj = kobject_create_and_add("fuse", fs_kobj);
1867 1868
	if (!fuse_kobj) {
		err = -ENOMEM;
1869
		goto out_err;
1870
	}
1871

1872 1873
	err = sysfs_create_mount_point(fuse_kobj, "connections");
	if (err)
1874 1875 1876 1877 1878
		goto out_fuse_unregister;

	return 0;

 out_fuse_unregister:
1879
	kobject_put(fuse_kobj);
1880 1881 1882 1883 1884 1885
 out_err:
	return err;
}

static void fuse_sysfs_cleanup(void)
{
1886
	sysfs_remove_mount_point(fuse_kobj, "connections");
1887
	kobject_put(fuse_kobj);
1888 1889
}

M
Miklos Szeredi 已提交
1890 1891 1892 1893
static int __init fuse_init(void)
{
	int res;

K
Kirill Smelkov 已提交
1894 1895
	pr_info("init (API version %i.%i)\n",
		FUSE_KERNEL_VERSION, FUSE_KERNEL_MINOR_VERSION);
M
Miklos Szeredi 已提交
1896

1897
	INIT_LIST_HEAD(&fuse_conn_list);
M
Miklos Szeredi 已提交
1898 1899 1900 1901
	res = fuse_fs_init();
	if (res)
		goto err;

M
Miklos Szeredi 已提交
1902 1903 1904 1905
	res = fuse_dev_init();
	if (res)
		goto err_fs_cleanup;

1906 1907 1908 1909
	res = fuse_sysfs_init();
	if (res)
		goto err_dev_cleanup;

1910 1911 1912 1913
	res = fuse_ctl_init();
	if (res)
		goto err_sysfs_cleanup;

1914 1915 1916
	sanitize_global_limit(&max_user_bgreq);
	sanitize_global_limit(&max_user_congthresh);

M
Miklos Szeredi 已提交
1917 1918
	return 0;

1919 1920
 err_sysfs_cleanup:
	fuse_sysfs_cleanup();
1921 1922
 err_dev_cleanup:
	fuse_dev_cleanup();
M
Miklos Szeredi 已提交
1923 1924
 err_fs_cleanup:
	fuse_fs_cleanup();
M
Miklos Szeredi 已提交
1925 1926 1927 1928 1929 1930
 err:
	return res;
}

static void __exit fuse_exit(void)
{
K
Kirill Smelkov 已提交
1931
	pr_debug("exit\n");
M
Miklos Szeredi 已提交
1932

1933
	fuse_ctl_cleanup();
1934
	fuse_sysfs_cleanup();
M
Miklos Szeredi 已提交
1935
	fuse_fs_cleanup();
M
Miklos Szeredi 已提交
1936
	fuse_dev_cleanup();
M
Miklos Szeredi 已提交
1937 1938 1939 1940
}

module_init(fuse_init);
module_exit(fuse_exit);