inode.c 38.1 KB
Newer Older
M
Miklos Szeredi 已提交
1 2
/*
  FUSE: Filesystem in Userspace
M
Miklos Szeredi 已提交
3
  Copyright (C) 2001-2008  Miklos Szeredi <miklos@szeredi.hu>
M
Miklos Szeredi 已提交
4 5 6 7 8 9 10 11 12 13 14 15 16

  This program can be distributed under the terms of the GNU GPL.
  See the file COPYING.
*/

#include "fuse_i.h"

#include <linux/pagemap.h>
#include <linux/slab.h>
#include <linux/file.h>
#include <linux/seq_file.h>
#include <linux/init.h>
#include <linux/module.h>
17
#include <linux/moduleparam.h>
18 19
#include <linux/fs_context.h>
#include <linux/fs_parser.h>
M
Miklos Szeredi 已提交
20
#include <linux/statfs.h>
21
#include <linux/random.h>
A
Alexey Dobriyan 已提交
22
#include <linux/sched.h>
M
Miklos Szeredi 已提交
23
#include <linux/exportfs.h>
S
Seth Forshee 已提交
24
#include <linux/posix_acl.h>
25
#include <linux/pid_namespace.h>
M
Miklos Szeredi 已提交
26 27 28 29 30

MODULE_AUTHOR("Miklos Szeredi <miklos@szeredi.hu>");
MODULE_DESCRIPTION("Filesystem in Userspace");
MODULE_LICENSE("GPL");

31
static struct kmem_cache *fuse_inode_cachep;
32 33
struct list_head fuse_conn_list;
DEFINE_MUTEX(fuse_mutex);
M
Miklos Szeredi 已提交
34

35
static int set_global_limit(const char *val, const struct kernel_param *kp);
36

37
unsigned max_user_bgreq;
38 39 40 41 42 43 44
module_param_call(max_user_bgreq, set_global_limit, param_get_uint,
		  &max_user_bgreq, 0644);
__MODULE_PARM_TYPE(max_user_bgreq, "uint");
MODULE_PARM_DESC(max_user_bgreq,
 "Global limit for the maximum number of backgrounded requests an "
 "unprivileged user can set");

45
unsigned max_user_congthresh;
46 47 48 49 50 51 52
module_param_call(max_user_congthresh, set_global_limit, param_get_uint,
		  &max_user_congthresh, 0644);
__MODULE_PARM_TYPE(max_user_congthresh, "uint");
MODULE_PARM_DESC(max_user_congthresh,
 "Global limit for the maximum congestion threshold an "
 "unprivileged user can set");

M
Miklos Szeredi 已提交
53 54
#define FUSE_SUPER_MAGIC 0x65735546

M
Miklos Szeredi 已提交
55 56
#define FUSE_DEFAULT_BLKSIZE 512

57 58 59 60 61 62
/** Maximum number of outstanding background requests */
#define FUSE_DEFAULT_MAX_BACKGROUND 12

/** Congestion starts at 75% of maximum */
#define FUSE_DEFAULT_CONGESTION_THRESHOLD (FUSE_DEFAULT_MAX_BACKGROUND * 3 / 4)

63 64 65 66
#ifdef CONFIG_BLOCK
static struct file_system_type fuseblk_fs_type;
#endif

67
struct fuse_forget_link *fuse_alloc_forget(void)
68
{
69
	return kzalloc(sizeof(struct fuse_forget_link), GFP_KERNEL_ACCOUNT);
70 71
}

M
Miklos Szeredi 已提交
72 73 74 75
static struct inode *fuse_alloc_inode(struct super_block *sb)
{
	struct fuse_inode *fi;

Z
zhangliguang 已提交
76 77
	fi = kmem_cache_alloc(fuse_inode_cachep, GFP_KERNEL);
	if (!fi)
M
Miklos Szeredi 已提交
78 79
		return NULL;

M
Miklos Szeredi 已提交
80
	fi->i_time = 0;
81
	fi->inval_mask = 0;
M
Miklos Szeredi 已提交
82
	fi->nodeid = 0;
83
	fi->nlookup = 0;
84
	fi->attr_version = 0;
85
	fi->orig_ino = 0;
86
	fi->state = 0;
87
	mutex_init(&fi->mutex);
88
	init_rwsem(&fi->i_mmap_sem);
89
	spin_lock_init(&fi->lock);
90
	fi->forget = fuse_alloc_forget();
91 92 93 94 95
	if (!fi->forget)
		goto out_free;

	if (IS_ENABLED(CONFIG_FUSE_DAX) && !fuse_dax_inode_alloc(sb, fi))
		goto out_free_forget;
M
Miklos Szeredi 已提交
96

Z
zhangliguang 已提交
97
	return &fi->inode;
98 99 100 101 102 103

out_free_forget:
	kfree(fi->forget);
out_free:
	kmem_cache_free(fuse_inode_cachep, fi);
	return NULL;
M
Miklos Szeredi 已提交
104 105
}

A
Al Viro 已提交
106
static void fuse_free_inode(struct inode *inode)
M
Miklos Szeredi 已提交
107
{
108
	struct fuse_inode *fi = get_fuse_inode(inode);
A
Al Viro 已提交
109

110
	mutex_destroy(&fi->mutex);
111
	kfree(fi->forget);
112 113 114
#ifdef CONFIG_FUSE_DAX
	kfree(fi->dax);
#endif
A
Al Viro 已提交
115
	kmem_cache_free(fuse_inode_cachep, fi);
M
Miklos Szeredi 已提交
116 117
}

118
static void fuse_evict_inode(struct inode *inode)
M
Miklos Szeredi 已提交
119
{
A
Al Viro 已提交
120 121
	struct fuse_inode *fi = get_fuse_inode(inode);

122
	truncate_inode_pages_final(&inode->i_data);
123
	clear_inode(inode);
124
	if (inode->i_sb->s_flags & SB_ACTIVE) {
M
Miklos Szeredi 已提交
125
		struct fuse_conn *fc = get_fuse_conn(inode);
126 127 128

		if (FUSE_IS_DAX(inode))
			fuse_dax_inode_cleanup(inode);
129 130
		fuse_queue_forget(fc, fi->forget, fi->nodeid, fi->nlookup);
		fi->forget = NULL;
131
	}
A
Al Viro 已提交
132 133 134 135
	if (S_ISREG(inode->i_mode) && !is_bad_inode(inode)) {
		WARN_ON(!list_empty(&fi->write_files));
		WARN_ON(!list_empty(&fi->queued_writes));
	}
M
Miklos Szeredi 已提交
136 137
}

138
static int fuse_reconfigure(struct fs_context *fc)
139
{
140 141
	struct super_block *sb = fc->root->d_sb;

142
	sync_filesystem(sb);
143
	if (fc->sb_flags & SB_MANDLOCK)
144 145 146 147 148
		return -EINVAL;

	return 0;
}

149 150 151 152 153 154 155 156 157 158 159 160
/*
 * ino_t is 32-bits on 32-bit arch. We have to squash the 64-bit value down
 * so that it will fit.
 */
static ino_t fuse_squash_ino(u64 ino64)
{
	ino_t ino = (ino_t) ino64;
	if (sizeof(ino_t) < sizeof(u64))
		ino ^= ino64 >> (sizeof(u64) - sizeof(ino_t)) * 8;
	return ino;
}

M
Miklos Szeredi 已提交
161 162
void fuse_change_attributes_common(struct inode *inode, struct fuse_attr *attr,
				   u64 attr_valid)
M
Miklos Szeredi 已提交
163
{
M
Miklos Szeredi 已提交
164
	struct fuse_conn *fc = get_fuse_conn(inode);
165
	struct fuse_inode *fi = get_fuse_inode(inode);
M
Miklos Szeredi 已提交
166

167 168
	lockdep_assert_held(&fi->lock);

169
	fi->attr_version = atomic64_inc_return(&fc->attr_version);
170
	fi->i_time = attr_valid;
171
	WRITE_ONCE(fi->inval_mask, 0);
172

173
	inode->i_ino     = fuse_squash_ino(attr->ino);
174
	inode->i_mode    = (inode->i_mode & S_IFMT) | (attr->mode & 07777);
M
Miklos Szeredi 已提交
175
	set_nlink(inode, attr->nlink);
176 177
	inode->i_uid     = make_kuid(fc->user_ns, attr->uid);
	inode->i_gid     = make_kgid(fc->user_ns, attr->gid);
M
Miklos Szeredi 已提交
178 179 180
	inode->i_blocks  = attr->blocks;
	inode->i_atime.tv_sec   = attr->atime;
	inode->i_atime.tv_nsec  = attr->atimensec;
M
Maxim Patlasov 已提交
181 182 183 184
	/* mtime from server may be stale due to local buffered write */
	if (!fc->writeback_cache || !S_ISREG(inode->i_mode)) {
		inode->i_mtime.tv_sec   = attr->mtime;
		inode->i_mtime.tv_nsec  = attr->mtimensec;
M
Maxim Patlasov 已提交
185 186
		inode->i_ctime.tv_sec   = attr->ctime;
		inode->i_ctime.tv_nsec  = attr->ctimensec;
M
Maxim Patlasov 已提交
187
	}
188

189 190 191 192 193
	if (attr->blksize != 0)
		inode->i_blkbits = ilog2(attr->blksize);
	else
		inode->i_blkbits = inode->i_sb->s_blocksize_bits;

194 195 196 197 198 199
	/*
	 * Don't set the sticky bit in i_mode, unless we want the VFS
	 * to check permissions.  This prevents failures due to the
	 * check in may_delete().
	 */
	fi->orig_i_mode = inode->i_mode;
M
Miklos Szeredi 已提交
200
	if (!fc->default_permissions)
201
		inode->i_mode &= ~S_ISVTX;
202 203

	fi->orig_ino = attr->ino;
M
Miklos Szeredi 已提交
204 205 206 207 208 209 210
}

void fuse_change_attributes(struct inode *inode, struct fuse_attr *attr,
			    u64 attr_valid, u64 attr_version)
{
	struct fuse_conn *fc = get_fuse_conn(inode);
	struct fuse_inode *fi = get_fuse_inode(inode);
P
Pavel Emelyanov 已提交
211
	bool is_wb = fc->writeback_cache;
M
Miklos Szeredi 已提交
212
	loff_t oldsize;
213
	struct timespec64 old_mtime;
M
Miklos Szeredi 已提交
214

215
	spin_lock(&fi->lock);
216 217
	if ((attr_version != 0 && fi->attr_version > attr_version) ||
	    test_bit(FUSE_I_SIZE_UNSTABLE, &fi->state)) {
218
		spin_unlock(&fi->lock);
M
Miklos Szeredi 已提交
219 220 221
		return;
	}

222
	old_mtime = inode->i_mtime;
M
Miklos Szeredi 已提交
223
	fuse_change_attributes_common(inode, attr, attr_valid);
224

225
	oldsize = inode->i_size;
P
Pavel Emelyanov 已提交
226 227 228 229 230 231 232
	/*
	 * In case of writeback_cache enabled, the cached writes beyond EOF
	 * extend local i_size without keeping userspace server in sync. So,
	 * attr->size coming from server can be stale. We cannot trust it.
	 */
	if (!is_wb || !S_ISREG(inode->i_mode))
		i_size_write(inode, attr->size);
233
	spin_unlock(&fi->lock);
234

P
Pavel Emelyanov 已提交
235
	if (!is_wb && S_ISREG(inode->i_mode)) {
236 237 238
		bool inval = false;

		if (oldsize != attr->size) {
239
			truncate_pagecache(inode, attr->size);
240 241
			if (!fc->explicit_inval_data)
				inval = true;
242
		} else if (fc->auto_inval_data) {
243
			struct timespec64 new_mtime = {
244 245 246 247 248 249 250 251
				.tv_sec = attr->mtime,
				.tv_nsec = attr->mtimensec,
			};

			/*
			 * Auto inval mode also checks and invalidates if mtime
			 * has changed.
			 */
252
			if (!timespec64_equal(&old_mtime, &new_mtime))
253 254 255 256 257
				inval = true;
		}

		if (inval)
			invalidate_inode_pages2(inode->i_mapping);
258
	}
M
Miklos Szeredi 已提交
259 260 261 262 263
}

static void fuse_init_inode(struct inode *inode, struct fuse_attr *attr)
{
	inode->i_mode = attr->mode & S_IFMT;
M
Miklos Szeredi 已提交
264
	inode->i_size = attr->size;
M
Maxim Patlasov 已提交
265 266
	inode->i_mtime.tv_sec  = attr->mtime;
	inode->i_mtime.tv_nsec = attr->mtimensec;
M
Maxim Patlasov 已提交
267 268
	inode->i_ctime.tv_sec  = attr->ctime;
	inode->i_ctime.tv_nsec = attr->ctimensec;
269 270
	if (S_ISREG(inode->i_mode)) {
		fuse_init_common(inode);
M
Miklos Szeredi 已提交
271
		fuse_init_file_inode(inode);
272 273 274 275 276 277 278 279 280
	} else if (S_ISDIR(inode->i_mode))
		fuse_init_dir(inode);
	else if (S_ISLNK(inode->i_mode))
		fuse_init_symlink(inode);
	else if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode) ||
		 S_ISFIFO(inode->i_mode) || S_ISSOCK(inode->i_mode)) {
		fuse_init_common(inode);
		init_special_inode(inode, inode->i_mode,
				   new_decode_dev(attr->rdev));
281 282
	} else
		BUG();
M
Miklos Szeredi 已提交
283 284
}

285
static int fuse_inode_eq(struct inode *inode, void *_nodeidp)
M
Miklos Szeredi 已提交
286
{
M
Miklos Szeredi 已提交
287
	u64 nodeid = *(u64 *) _nodeidp;
M
Miklos Szeredi 已提交
288 289 290 291 292 293 294 295
	if (get_node_id(inode) == nodeid)
		return 1;
	else
		return 0;
}

static int fuse_inode_set(struct inode *inode, void *_nodeidp)
{
M
Miklos Szeredi 已提交
296
	u64 nodeid = *(u64 *) _nodeidp;
M
Miklos Szeredi 已提交
297 298 299 300
	get_fuse_inode(inode)->nodeid = nodeid;
	return 0;
}

M
Miklos Szeredi 已提交
301
struct inode *fuse_iget(struct super_block *sb, u64 nodeid,
302 303
			int generation, struct fuse_attr *attr,
			u64 attr_valid, u64 attr_version)
M
Miklos Szeredi 已提交
304 305
{
	struct inode *inode;
306
	struct fuse_inode *fi;
M
Miklos Szeredi 已提交
307 308 309 310 311 312 313 314
	struct fuse_conn *fc = get_fuse_conn_super(sb);

 retry:
	inode = iget5_locked(sb, nodeid, fuse_inode_eq, fuse_inode_set, &nodeid);
	if (!inode)
		return NULL;

	if ((inode->i_state & I_NEW)) {
M
Maxim Patlasov 已提交
315
		inode->i_flags |= S_NOATIME;
316
		if (!fc->writeback_cache || !S_ISREG(attr->mode))
M
Maxim Patlasov 已提交
317
			inode->i_flags |= S_NOCMTIME;
M
Miklos Szeredi 已提交
318 319 320 321 322 323 324 325 326 327
		inode->i_generation = generation;
		fuse_init_inode(inode, attr);
		unlock_new_inode(inode);
	} else if ((inode->i_mode ^ attr->mode) & S_IFMT) {
		/* Inode has changed type, any I/O on the old should fail */
		make_bad_inode(inode);
		iput(inode);
		goto retry;
	}

328
	fi = get_fuse_inode(inode);
329
	spin_lock(&fi->lock);
M
Miklos Szeredi 已提交
330
	fi->nlookup++;
331
	spin_unlock(&fi->lock);
332 333
	fuse_change_attributes(inode, attr, attr_valid, attr_version);

M
Miklos Szeredi 已提交
334 335 336
	return inode;
}

337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359
struct inode *fuse_ilookup(struct fuse_conn *fc, u64 nodeid,
			   struct fuse_mount **fm)
{
	struct fuse_mount *fm_iter;
	struct inode *inode;

	WARN_ON(!rwsem_is_locked(&fc->killsb));
	list_for_each_entry(fm_iter, &fc->mounts, fc_entry) {
		if (!fm_iter->sb)
			continue;

		inode = ilookup5(fm_iter->sb, nodeid, fuse_inode_eq, &nodeid);
		if (inode) {
			if (fm)
				*fm = fm_iter;
			return inode;
		}
	}

	return NULL;
}

int fuse_reverse_inval_inode(struct fuse_conn *fc, u64 nodeid,
J
John Muir 已提交
360 361
			     loff_t offset, loff_t len)
{
362
	struct fuse_inode *fi;
J
John Muir 已提交
363 364 365 366
	struct inode *inode;
	pgoff_t pg_start;
	pgoff_t pg_end;

367
	inode = fuse_ilookup(fc, nodeid, NULL);
J
John Muir 已提交
368 369 370
	if (!inode)
		return -ENOENT;

371 372 373 374 375
	fi = get_fuse_inode(inode);
	spin_lock(&fi->lock);
	fi->attr_version = atomic64_inc_return(&fc->attr_version);
	spin_unlock(&fi->lock);

J
John Muir 已提交
376
	fuse_invalidate_attr(inode);
S
Seth Forshee 已提交
377
	forget_all_cached_acls(inode);
J
John Muir 已提交
378
	if (offset >= 0) {
379
		pg_start = offset >> PAGE_SHIFT;
J
John Muir 已提交
380 381 382
		if (len <= 0)
			pg_end = -1;
		else
383
			pg_end = (offset + len - 1) >> PAGE_SHIFT;
J
John Muir 已提交
384 385 386 387 388 389 390
		invalidate_inode_pages2_range(inode->i_mapping,
					      pg_start, pg_end);
	}
	iput(inode);
	return 0;
}

391
bool fuse_lock_inode(struct inode *inode)
392
{
393 394 395
	bool locked = false;

	if (!get_fuse_conn(inode)->parallel_dirops) {
396
		mutex_lock(&get_fuse_inode(inode)->mutex);
397 398 399 400
		locked = true;
	}

	return locked;
401 402
}

403
void fuse_unlock_inode(struct inode *inode, bool locked)
404
{
405
	if (locked)
406 407 408
		mutex_unlock(&get_fuse_inode(inode)->mutex);
}

409
static void fuse_umount_begin(struct super_block *sb)
410
{
411 412 413 414
	struct fuse_conn *fc = get_fuse_conn_super(sb);

	if (!fc->no_force_umount)
		fuse_abort_conn(fc);
415 416
}

417
static void fuse_send_destroy(struct fuse_mount *fm)
418
{
419
	if (fm->fc->conn_init) {
420 421 422 423 424
		FUSE_ARGS(args);

		args.opcode = FUSE_DESTROY;
		args.force = true;
		args.nocreds = true;
425
		fuse_simple_request(fm, &args);
426 427 428
	}
}

429 430
static void fuse_put_super(struct super_block *sb)
{
431
	struct fuse_mount *fm = get_fuse_mount_super(sb);
432

433
	fuse_mount_put(fm);
M
Miklos Szeredi 已提交
434 435
}

436 437 438 439
static void convert_fuse_statfs(struct kstatfs *stbuf, struct fuse_kstatfs *attr)
{
	stbuf->f_type    = FUSE_SUPER_MAGIC;
	stbuf->f_bsize   = attr->bsize;
440
	stbuf->f_frsize  = attr->frsize;
441 442 443 444 445 446 447 448 449
	stbuf->f_blocks  = attr->blocks;
	stbuf->f_bfree   = attr->bfree;
	stbuf->f_bavail  = attr->bavail;
	stbuf->f_files   = attr->files;
	stbuf->f_ffree   = attr->ffree;
	stbuf->f_namelen = attr->namelen;
	/* fsid is left zero */
}

450
static int fuse_statfs(struct dentry *dentry, struct kstatfs *buf)
451
{
452
	struct super_block *sb = dentry->d_sb;
453
	struct fuse_mount *fm = get_fuse_mount_super(sb);
454
	FUSE_ARGS(args);
455 456 457
	struct fuse_statfs_out outarg;
	int err;

458
	if (!fuse_allow_current_process(fm->fc)) {
M
Miklos Szeredi 已提交
459 460 461 462
		buf->f_type = FUSE_SUPER_MAGIC;
		return 0;
	}

463
	memset(&outarg, 0, sizeof(outarg));
464 465 466 467 468 469
	args.in_numargs = 0;
	args.opcode = FUSE_STATFS;
	args.nodeid = get_node_id(d_inode(dentry));
	args.out_numargs = 1;
	args.out_args[0].size = sizeof(outarg);
	args.out_args[0].value = &outarg;
470
	err = fuse_simple_request(fm, &args);
471 472 473 474 475
	if (!err)
		convert_fuse_statfs(buf, &outarg.st);
	return err;
}

M
Miklos Szeredi 已提交
476
enum {
477 478
	OPT_SOURCE,
	OPT_SUBTYPE,
M
Miklos Szeredi 已提交
479 480 481
	OPT_FD,
	OPT_ROOTMODE,
	OPT_USER_ID,
482
	OPT_GROUP_ID,
M
Miklos Szeredi 已提交
483 484
	OPT_DEFAULT_PERMISSIONS,
	OPT_ALLOW_OTHER,
485
	OPT_MAX_READ,
M
Miklos Szeredi 已提交
486
	OPT_BLKSIZE,
M
Miklos Szeredi 已提交
487 488 489
	OPT_ERR
};

490
static const struct fs_parameter_spec fuse_fs_parameters[] = {
491 492 493 494 495 496 497 498 499
	fsparam_string	("source",		OPT_SOURCE),
	fsparam_u32	("fd",			OPT_FD),
	fsparam_u32oct	("rootmode",		OPT_ROOTMODE),
	fsparam_u32	("user_id",		OPT_USER_ID),
	fsparam_u32	("group_id",		OPT_GROUP_ID),
	fsparam_flag	("default_permissions",	OPT_DEFAULT_PERMISSIONS),
	fsparam_flag	("allow_other",		OPT_ALLOW_OTHER),
	fsparam_u32	("max_read",		OPT_MAX_READ),
	fsparam_u32	("blksize",		OPT_BLKSIZE),
500
	fsparam_string	("subtype",		OPT_SUBTYPE),
501 502 503 504
	{}
};

static int fuse_parse_param(struct fs_context *fc, struct fs_parameter *param)
505
{
506 507 508 509
	struct fs_parse_result result;
	struct fuse_fs_context *ctx = fc->fs_private;
	int opt;

510 511 512 513 514 515 516 517 518 519
	if (fc->purpose == FS_CONTEXT_FOR_RECONFIGURE) {
		/*
		 * Ignore options coming from mount(MS_REMOUNT) for backward
		 * compatibility.
		 */
		if (fc->oldapi)
			return 0;

		return invalfc(fc, "No changes allowed in reconfigure");
	}
520

521
	opt = fs_parse(fc, fuse_fs_parameters, param, &result);
522 523 524 525 526 527
	if (opt < 0)
		return opt;

	switch (opt) {
	case OPT_SOURCE:
		if (fc->source)
A
Al Viro 已提交
528
			return invalfc(fc, "Multiple sources specified");
529 530 531 532 533 534
		fc->source = param->string;
		param->string = NULL;
		break;

	case OPT_SUBTYPE:
		if (ctx->subtype)
A
Al Viro 已提交
535
			return invalfc(fc, "Multiple subtypes specified");
536 537 538 539 540 541
		ctx->subtype = param->string;
		param->string = NULL;
		return 0;

	case OPT_FD:
		ctx->fd = result.uint_32;
542
		ctx->fd_present = true;
543 544 545 546
		break;

	case OPT_ROOTMODE:
		if (!fuse_valid_type(result.uint_32))
A
Al Viro 已提交
547
			return invalfc(fc, "Invalid rootmode");
548
		ctx->rootmode = result.uint_32;
549
		ctx->rootmode_present = true;
550 551 552 553 554
		break;

	case OPT_USER_ID:
		ctx->user_id = make_kuid(fc->user_ns, result.uint_32);
		if (!uid_valid(ctx->user_id))
A
Al Viro 已提交
555
			return invalfc(fc, "Invalid user_id");
556
		ctx->user_id_present = true;
557 558 559 560 561
		break;

	case OPT_GROUP_ID:
		ctx->group_id = make_kgid(fc->user_ns, result.uint_32);
		if (!gid_valid(ctx->group_id))
A
Al Viro 已提交
562
			return invalfc(fc, "Invalid group_id");
563
		ctx->group_id_present = true;
564 565 566
		break;

	case OPT_DEFAULT_PERMISSIONS:
567
		ctx->default_permissions = true;
568 569 570
		break;

	case OPT_ALLOW_OTHER:
571
		ctx->allow_other = true;
572 573 574 575 576 577 578 579
		break;

	case OPT_MAX_READ:
		ctx->max_read = result.uint_32;
		break;

	case OPT_BLKSIZE:
		if (!ctx->is_bdev)
A
Al Viro 已提交
580
			return invalfc(fc, "blksize only supported for fuseblk");
581 582 583 584 585
		ctx->blksize = result.uint_32;
		break;

	default:
		return -EINVAL;
586
	}
587 588

	return 0;
589 590
}

591
static void fuse_free_fc(struct fs_context *fc)
M
Miklos Szeredi 已提交
592
{
593
	struct fuse_fs_context *ctx = fc->fs_private;
594

595 596 597 598
	if (ctx) {
		kfree(ctx->subtype);
		kfree(ctx);
	}
M
Miklos Szeredi 已提交
599 600
}

601
static int fuse_show_options(struct seq_file *m, struct dentry *root)
M
Miklos Szeredi 已提交
602
{
603 604
	struct super_block *sb = root->d_sb;
	struct fuse_conn *fc = get_fuse_conn_super(sb);
M
Miklos Szeredi 已提交
605

606 607 608 609 610 611 612 613 614 615 616 617 618 619
	if (fc->legacy_opts_show) {
		seq_printf(m, ",user_id=%u",
			   from_kuid_munged(fc->user_ns, fc->user_id));
		seq_printf(m, ",group_id=%u",
			   from_kgid_munged(fc->user_ns, fc->group_id));
		if (fc->default_permissions)
			seq_puts(m, ",default_permissions");
		if (fc->allow_other)
			seq_puts(m, ",allow_other");
		if (fc->max_read != ~0)
			seq_printf(m, ",max_read=%u", fc->max_read);
		if (sb->s_bdev && sb->s_blocksize != FUSE_DEFAULT_BLKSIZE)
			seq_printf(m, ",blksize=%lu", sb->s_blocksize);
	}
620 621 622 623 624
#ifdef CONFIG_FUSE_DAX
	if (fc->dax)
		seq_puts(m, ",dax");
#endif

M
Miklos Szeredi 已提交
625 626 627
	return 0;
}

628 629 630
static void fuse_iqueue_init(struct fuse_iqueue *fiq,
			     const struct fuse_iqueue_ops *ops,
			     void *priv)
M
Miklos Szeredi 已提交
631 632
{
	memset(fiq, 0, sizeof(struct fuse_iqueue));
633
	spin_lock_init(&fiq->lock);
M
Miklos Szeredi 已提交
634 635 636 637
	init_waitqueue_head(&fiq->waitq);
	INIT_LIST_HEAD(&fiq->pending);
	INIT_LIST_HEAD(&fiq->interrupts);
	fiq->forget_list_tail = &fiq->forget_list_head;
638
	fiq->connected = 1;
639 640
	fiq->ops = ops;
	fiq->priv = priv;
M
Miklos Szeredi 已提交
641 642
}

643 644
static void fuse_pqueue_init(struct fuse_pqueue *fpq)
{
645 646
	unsigned int i;

M
Miklos Szeredi 已提交
647
	spin_lock_init(&fpq->lock);
648 649
	for (i = 0; i < FUSE_PQ_HASH_SIZE; i++)
		INIT_LIST_HEAD(&fpq->processing[i]);
650
	INIT_LIST_HEAD(&fpq->io);
651
	fpq->connected = 1;
652 653
}

654 655
void fuse_conn_init(struct fuse_conn *fc, struct fuse_mount *fm,
		    struct user_namespace *user_ns,
656
		    const struct fuse_iqueue_ops *fiq_ops, void *fiq_priv)
M
Miklos Szeredi 已提交
657
{
658 659
	memset(fc, 0, sizeof(*fc));
	spin_lock_init(&fc->lock);
K
Kirill Tkhai 已提交
660
	spin_lock_init(&fc->bg_lock);
J
John Muir 已提交
661
	init_rwsem(&fc->killsb);
662
	refcount_set(&fc->count, 1);
663
	atomic_set(&fc->dev_count, 1);
664
	init_waitqueue_head(&fc->blocked_waitq);
665
	fuse_iqueue_init(&fc->iq, fiq_ops, fiq_priv);
666 667
	INIT_LIST_HEAD(&fc->bg_queue);
	INIT_LIST_HEAD(&fc->entry);
668
	INIT_LIST_HEAD(&fc->devices);
669
	atomic_set(&fc->num_waiting, 0);
670 671
	fc->max_background = FUSE_DEFAULT_MAX_BACKGROUND;
	fc->congestion_threshold = FUSE_DEFAULT_CONGESTION_THRESHOLD;
M
Miklos Szeredi 已提交
672
	atomic64_set(&fc->khctr, 0);
673
	fc->polled_files = RB_ROOT;
674
	fc->blocked = 0;
M
Maxim Patlasov 已提交
675
	fc->initialized = 0;
676
	fc->connected = 1;
677
	atomic64_set(&fc->attr_version, 1);
678
	get_random_bytes(&fc->scramble_key, sizeof(fc->scramble_key));
679
	fc->pid_ns = get_pid_ns(task_active_pid_ns(current));
680
	fc->user_ns = get_user_ns(user_ns);
M
Miklos Szeredi 已提交
681
	fc->max_pages = FUSE_DEFAULT_MAX_PAGES_PER_REQ;
682 683 684 685 686

	INIT_LIST_HEAD(&fc->mounts);
	list_add(&fm->fc_entry, &fc->mounts);
	fm->fc = fc;
	refcount_set(&fm->count, 1);
M
Miklos Szeredi 已提交
687
}
688
EXPORT_SYMBOL_GPL(fuse_conn_init);
M
Miklos Szeredi 已提交
689

690 691
void fuse_conn_put(struct fuse_conn *fc)
{
692
	if (refcount_dec_and_test(&fc->count)) {
693 694
		struct fuse_iqueue *fiq = &fc->iq;

695 696
		if (IS_ENABLED(CONFIG_FUSE_DAX))
			fuse_dax_conn_free(fc);
697 698
		if (fiq->ops->release)
			fiq->ops->release(fiq);
699
		put_pid_ns(fc->pid_ns);
700
		put_user_ns(fc->user_ns);
T
Tejun Heo 已提交
701
		fc->release(fc);
702
	}
703
}
704
EXPORT_SYMBOL_GPL(fuse_conn_put);
705 706 707

struct fuse_conn *fuse_conn_get(struct fuse_conn *fc)
{
708
	refcount_inc(&fc->count);
709 710
	return fc;
}
711
EXPORT_SYMBOL_GPL(fuse_conn_get);
712

713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729
void fuse_mount_put(struct fuse_mount *fm)
{
	if (refcount_dec_and_test(&fm->count)) {
		if (fm->fc)
			fuse_conn_put(fm->fc);
		kfree(fm);
	}
}
EXPORT_SYMBOL_GPL(fuse_mount_put);

struct fuse_mount *fuse_mount_get(struct fuse_mount *fm)
{
	refcount_inc(&fm->count);
	return fm;
}
EXPORT_SYMBOL_GPL(fuse_mount_get);

730
static struct inode *fuse_get_root_inode(struct super_block *sb, unsigned mode)
M
Miklos Szeredi 已提交
731 732 733 734 735 736
{
	struct fuse_attr attr;
	memset(&attr, 0, sizeof(attr));

	attr.mode = mode;
	attr.ino = FUSE_ROOT_ID;
737
	attr.nlink = 1;
738
	return fuse_iget(sb, 1, 0, &attr, 0, 0);
M
Miklos Szeredi 已提交
739 740
}

M
Miklos Szeredi 已提交
741
struct fuse_inode_handle {
M
Miklos Szeredi 已提交
742 743 744 745 746 747 748
	u64 nodeid;
	u32 generation;
};

static struct dentry *fuse_get_dentry(struct super_block *sb,
				      struct fuse_inode_handle *handle)
{
749
	struct fuse_conn *fc = get_fuse_conn_super(sb);
M
Miklos Szeredi 已提交
750 751 752 753 754 755 756 757
	struct inode *inode;
	struct dentry *entry;
	int err = -ESTALE;

	if (handle->nodeid == 0)
		goto out_err;

	inode = ilookup5(sb, handle->nodeid, fuse_inode_eq, &handle->nodeid);
758 759
	if (!inode) {
		struct fuse_entry_out outarg;
A
Al Viro 已提交
760
		const struct qstr name = QSTR_INIT(".", 1);
761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776

		if (!fc->export_support)
			goto out_err;

		err = fuse_lookup_name(sb, handle->nodeid, &name, &outarg,
				       &inode);
		if (err && err != -ENOENT)
			goto out_err;
		if (err || !inode) {
			err = -ESTALE;
			goto out_err;
		}
		err = -EIO;
		if (get_node_id(inode) != handle->nodeid)
			goto out_iput;
	}
M
Miklos Szeredi 已提交
777 778 779 780
	err = -ESTALE;
	if (inode->i_generation != handle->generation)
		goto out_iput;

781
	entry = d_obtain_alias(inode);
A
Al Viro 已提交
782
	if (!IS_ERR(entry) && get_node_id(inode) != FUSE_ROOT_ID)
M
Miklos Szeredi 已提交
783 784 785 786 787 788 789 790 791 792
		fuse_invalidate_entry_cache(entry);

	return entry;

 out_iput:
	iput(inode);
 out_err:
	return ERR_PTR(err);
}

A
Al Viro 已提交
793 794
static int fuse_encode_fh(struct inode *inode, u32 *fh, int *max_len,
			   struct inode *parent)
M
Miklos Szeredi 已提交
795
{
A
Al Viro 已提交
796
	int len = parent ? 6 : 3;
M
Miklos Szeredi 已提交
797 798 799
	u64 nodeid;
	u32 generation;

800 801
	if (*max_len < len) {
		*max_len = len;
802
		return  FILEID_INVALID;
803
	}
M
Miklos Szeredi 已提交
804 805 806 807 808 809 810 811

	nodeid = get_fuse_inode(inode)->nodeid;
	generation = inode->i_generation;

	fh[0] = (u32)(nodeid >> 32);
	fh[1] = (u32)(nodeid & 0xffffffff);
	fh[2] = generation;

A
Al Viro 已提交
812
	if (parent) {
M
Miklos Szeredi 已提交
813 814 815 816 817 818 819 820 821
		nodeid = get_fuse_inode(parent)->nodeid;
		generation = parent->i_generation;

		fh[3] = (u32)(nodeid >> 32);
		fh[4] = (u32)(nodeid & 0xffffffff);
		fh[5] = generation;
	}

	*max_len = len;
A
Al Viro 已提交
822
	return parent ? 0x82 : 0x81;
M
Miklos Szeredi 已提交
823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852
}

static struct dentry *fuse_fh_to_dentry(struct super_block *sb,
		struct fid *fid, int fh_len, int fh_type)
{
	struct fuse_inode_handle handle;

	if ((fh_type != 0x81 && fh_type != 0x82) || fh_len < 3)
		return NULL;

	handle.nodeid = (u64) fid->raw[0] << 32;
	handle.nodeid |= (u64) fid->raw[1];
	handle.generation = fid->raw[2];
	return fuse_get_dentry(sb, &handle);
}

static struct dentry *fuse_fh_to_parent(struct super_block *sb,
		struct fid *fid, int fh_len, int fh_type)
{
	struct fuse_inode_handle parent;

	if (fh_type != 0x82 || fh_len < 6)
		return NULL;

	parent.nodeid = (u64) fid->raw[3] << 32;
	parent.nodeid |= (u64) fid->raw[4];
	parent.generation = fid->raw[5];
	return fuse_get_dentry(sb, &parent);
}

853 854
static struct dentry *fuse_get_parent(struct dentry *child)
{
855
	struct inode *child_inode = d_inode(child);
856 857 858 859
	struct fuse_conn *fc = get_fuse_conn(child_inode);
	struct inode *inode;
	struct dentry *parent;
	struct fuse_entry_out outarg;
A
Al Viro 已提交
860
	const struct qstr name = QSTR_INIT("..", 2);
861 862 863 864 865 866 867
	int err;

	if (!fc->export_support)
		return ERR_PTR(-ESTALE);

	err = fuse_lookup_name(child_inode->i_sb, get_node_id(child_inode),
			       &name, &outarg, &inode);
868 869 870
	if (err) {
		if (err == -ENOENT)
			return ERR_PTR(-ESTALE);
871 872
		return ERR_PTR(err);
	}
873 874

	parent = d_obtain_alias(inode);
A
Al Viro 已提交
875
	if (!IS_ERR(parent) && get_node_id(inode) != FUSE_ROOT_ID)
876 877 878 879
		fuse_invalidate_entry_cache(parent);

	return parent;
}
M
Miklos Szeredi 已提交
880 881 882 883 884

static const struct export_operations fuse_export_operations = {
	.fh_to_dentry	= fuse_fh_to_dentry,
	.fh_to_parent	= fuse_fh_to_parent,
	.encode_fh	= fuse_encode_fh,
885
	.get_parent	= fuse_get_parent,
M
Miklos Szeredi 已提交
886 887
};

888
static const struct super_operations fuse_super_operations = {
M
Miklos Szeredi 已提交
889
	.alloc_inode    = fuse_alloc_inode,
A
Al Viro 已提交
890
	.free_inode     = fuse_free_inode,
891
	.evict_inode	= fuse_evict_inode,
M
Miklos Szeredi 已提交
892
	.write_inode	= fuse_write_inode,
M
Miklos Szeredi 已提交
893
	.drop_inode	= generic_delete_inode,
M
Miklos Szeredi 已提交
894
	.put_super	= fuse_put_super,
895
	.umount_begin	= fuse_umount_begin,
896
	.statfs		= fuse_statfs,
M
Miklos Szeredi 已提交
897 898 899
	.show_options	= fuse_show_options,
};

900 901
static void sanitize_global_limit(unsigned *limit)
{
M
Miklos Szeredi 已提交
902 903 904 905
	/*
	 * The default maximum number of async requests is calculated to consume
	 * 1/2^13 of the total memory, assuming 392 bytes per request.
	 */
906
	if (*limit == 0)
M
Miklos Szeredi 已提交
907
		*limit = ((totalram_pages() << PAGE_SHIFT) >> 13) / 392;
908 909 910 911 912

	if (*limit >= 1 << 16)
		*limit = (1 << 16) - 1;
}

913
static int set_global_limit(const char *val, const struct kernel_param *kp)
914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935
{
	int rv;

	rv = param_set_uint(val, kp);
	if (rv)
		return rv;

	sanitize_global_limit((unsigned *)kp->arg);

	return 0;
}

static void process_init_limits(struct fuse_conn *fc, struct fuse_init_out *arg)
{
	int cap_sys_admin = capable(CAP_SYS_ADMIN);

	if (arg->minor < 13)
		return;

	sanitize_global_limit(&max_user_bgreq);
	sanitize_global_limit(&max_user_congthresh);

K
Kirill Tkhai 已提交
936
	spin_lock(&fc->bg_lock);
937 938 939 940 941 942 943 944 945 946 947 948 949
	if (arg->max_background) {
		fc->max_background = arg->max_background;

		if (!cap_sys_admin && fc->max_background > max_user_bgreq)
			fc->max_background = max_user_bgreq;
	}
	if (arg->congestion_threshold) {
		fc->congestion_threshold = arg->congestion_threshold;

		if (!cap_sys_admin &&
		    fc->congestion_threshold > max_user_congthresh)
			fc->congestion_threshold = max_user_congthresh;
	}
K
Kirill Tkhai 已提交
950
	spin_unlock(&fc->bg_lock);
951 952
}

953 954 955 956 957 958
struct fuse_init_args {
	struct fuse_args args;
	struct fuse_init_in in;
	struct fuse_init_out out;
};

959
static void process_init_reply(struct fuse_mount *fm, struct fuse_args *args,
960
			       int error)
961
{
962
	struct fuse_conn *fc = fm->fc;
963 964
	struct fuse_init_args *ia = container_of(args, typeof(*ia), args);
	struct fuse_init_out *arg = &ia->out;
965
	bool ok = true;
966

967
	if (error || arg->major != FUSE_KERNEL_VERSION)
968
		ok = false;
969
	else {
970 971
		unsigned long ra_pages;

972 973
		process_init_limits(fc, arg);

974
		if (arg->minor >= 6) {
975
			ra_pages = arg->max_readahead / PAGE_SIZE;
976 977
			if (arg->flags & FUSE_ASYNC_READ)
				fc->async_read = 1;
978 979
			if (!(arg->flags & FUSE_POSIX_LOCKS))
				fc->no_lock = 1;
M
Miklos Szeredi 已提交
980 981 982
			if (arg->minor >= 17) {
				if (!(arg->flags & FUSE_FLOCK_LOCKS))
					fc->no_flock = 1;
M
Miklos Szeredi 已提交
983 984 985
			} else {
				if (!(arg->flags & FUSE_POSIX_LOCKS))
					fc->no_flock = 1;
M
Miklos Szeredi 已提交
986
			}
987 988
			if (arg->flags & FUSE_ATOMIC_O_TRUNC)
				fc->atomic_o_trunc = 1;
989 990 991 992 993
			if (arg->minor >= 9) {
				/* LOOKUP has dependency on proto version */
				if (arg->flags & FUSE_EXPORT_SUPPORT)
					fc->export_support = 1;
			}
994 995
			if (arg->flags & FUSE_BIG_WRITES)
				fc->big_writes = 1;
996 997
			if (arg->flags & FUSE_DONT_MASK)
				fc->dont_mask = 1;
998 999
			if (arg->flags & FUSE_AUTO_INVAL_DATA)
				fc->auto_inval_data = 1;
1000 1001
			else if (arg->flags & FUSE_EXPLICIT_INVAL_DATA)
				fc->explicit_inval_data = 1;
1002
			if (arg->flags & FUSE_DO_READDIRPLUS) {
1003
				fc->do_readdirplus = 1;
1004 1005 1006
				if (arg->flags & FUSE_READDIRPLUS_AUTO)
					fc->readdirplus_auto = 1;
			}
1007 1008
			if (arg->flags & FUSE_ASYNC_DIO)
				fc->async_dio = 1;
P
Pavel Emelyanov 已提交
1009 1010
			if (arg->flags & FUSE_WRITEBACK_CACHE)
				fc->writeback_cache = 1;
1011 1012
			if (arg->flags & FUSE_PARALLEL_DIROPS)
				fc->parallel_dirops = 1;
1013 1014
			if (arg->flags & FUSE_HANDLE_KILLPRIV)
				fc->handle_killpriv = 1;
1015
			if (arg->time_gran && arg->time_gran <= 1000000000)
1016
				fm->sb->s_time_gran = arg->time_gran;
S
Seth Forshee 已提交
1017
			if ((arg->flags & FUSE_POSIX_ACL)) {
M
Miklos Szeredi 已提交
1018
				fc->default_permissions = 1;
S
Seth Forshee 已提交
1019
				fc->posix_acl = 1;
1020
				fm->sb->s_xattr = fuse_acl_xattr_handlers;
S
Seth Forshee 已提交
1021
			}
D
Dan Schatzberg 已提交
1022 1023
			if (arg->flags & FUSE_CACHE_SYMLINKS)
				fc->cache_symlinks = 1;
1024 1025
			if (arg->flags & FUSE_ABORT_ERROR)
				fc->abort_err = 1;
1026 1027 1028 1029 1030
			if (arg->flags & FUSE_MAX_PAGES) {
				fc->max_pages =
					min_t(unsigned int, FUSE_MAX_MAX_PAGES,
					max_t(unsigned int, arg->max_pages, 1));
			}
1031 1032 1033 1034 1035
			if (IS_ENABLED(CONFIG_FUSE_DAX) &&
			    arg->flags & FUSE_MAP_ALIGNMENT &&
			    !fuse_dax_check_alignment(fc, arg->map_alignment)) {
				ok = false;
			}
1036
		} else {
1037
			ra_pages = fc->max_read / PAGE_SIZE;
1038
			fc->no_lock = 1;
M
Miklos Szeredi 已提交
1039
			fc->no_flock = 1;
1040
		}
1041

1042 1043
		fm->sb->s_bdi->ra_pages =
				min(fm->sb->s_bdi->ra_pages, ra_pages);
1044 1045
		fc->minor = arg->minor;
		fc->max_write = arg->minor < 5 ? 4096 : arg->max_write;
1046
		fc->max_write = max_t(unsigned, 4096, fc->max_write);
1047
		fc->conn_init = 1;
1048
	}
1049 1050
	kfree(ia);

1051 1052 1053 1054 1055
	if (!ok) {
		fc->conn_init = 0;
		fc->conn_error = 1;
	}

1056
	fuse_set_initialized(fc);
1057
	wake_up_all(&fc->blocked_waitq);
1058 1059
}

1060
void fuse_send_init(struct fuse_mount *fm)
1061
{
1062
	struct fuse_init_args *ia;
M
Miklos Szeredi 已提交
1063

1064 1065 1066 1067
	ia = kzalloc(sizeof(*ia), GFP_KERNEL | __GFP_NOFAIL);

	ia->in.major = FUSE_KERNEL_VERSION;
	ia->in.minor = FUSE_KERNEL_MINOR_VERSION;
1068
	ia->in.max_readahead = fm->sb->s_bdi->ra_pages * PAGE_SIZE;
1069 1070
	ia->in.flags |=
		FUSE_ASYNC_READ | FUSE_POSIX_LOCKS | FUSE_ATOMIC_O_TRUNC |
M
Miklos Szeredi 已提交
1071
		FUSE_EXPORT_SUPPORT | FUSE_BIG_WRITES | FUSE_DONT_MASK |
M
Miklos Szeredi 已提交
1072
		FUSE_SPLICE_WRITE | FUSE_SPLICE_MOVE | FUSE_SPLICE_READ |
1073
		FUSE_FLOCK_LOCKS | FUSE_HAS_IOCTL_DIR | FUSE_AUTO_INVAL_DATA |
P
Pavel Emelyanov 已提交
1074
		FUSE_DO_READDIRPLUS | FUSE_READDIRPLUS_AUTO | FUSE_ASYNC_DIO |
1075
		FUSE_WRITEBACK_CACHE | FUSE_NO_OPEN_SUPPORT |
1076
		FUSE_PARALLEL_DIROPS | FUSE_HANDLE_KILLPRIV | FUSE_POSIX_ACL |
1077
		FUSE_ABORT_ERROR | FUSE_MAX_PAGES | FUSE_CACHE_SYMLINKS |
1078
		FUSE_NO_OPENDIR_SUPPORT | FUSE_EXPLICIT_INVAL_DATA;
1079
#ifdef CONFIG_FUSE_DAX
1080
	if (fm->fc->dax)
1081 1082
		ia->in.flags |= FUSE_MAP_ALIGNMENT;
#endif
1083 1084 1085 1086 1087
	ia->args.opcode = FUSE_INIT;
	ia->args.in_numargs = 1;
	ia->args.in_args[0].size = sizeof(ia->in);
	ia->args.in_args[0].value = &ia->in;
	ia->args.out_numargs = 1;
D
Daniel Mack 已提交
1088
	/* Variable length argument used for backward compatibility
1089 1090
	   with interface version < 7.5.  Rest of init_out is zeroed
	   by do_get_request(), so a short reply is not a problem */
1091
	ia->args.out_argvar = true;
1092 1093 1094 1095 1096 1097
	ia->args.out_args[0].size = sizeof(ia->out);
	ia->args.out_args[0].value = &ia->out;
	ia->args.force = true;
	ia->args.nocreds = true;
	ia->args.end = process_init_reply;

1098 1099
	if (fuse_simple_background(fm, &ia->args, GFP_KERNEL) != 0)
		process_init_reply(fm, &ia->args, -ENOTCONN);
1100
}
1101
EXPORT_SYMBOL_GPL(fuse_send_init);
1102

1103
void fuse_free_conn(struct fuse_conn *fc)
T
Tejun Heo 已提交
1104
{
1105
	WARN_ON(!list_empty(&fc->devices));
A
Al Viro 已提交
1106
	kfree_rcu(fc, rcu);
T
Tejun Heo 已提交
1107
}
1108
EXPORT_SYMBOL_GPL(fuse_free_conn);
T
Tejun Heo 已提交
1109

1110 1111 1112
static int fuse_bdi_init(struct fuse_conn *fc, struct super_block *sb)
{
	int err;
1113
	char *suffix = "";
1114

1115
	if (sb->s_bdev) {
1116
		suffix = "-fuseblk";
1117 1118 1119 1120 1121 1122 1123
		/*
		 * sb->s_bdi points to blkdev's bdi however we want to redirect
		 * it to our private bdi...
		 */
		bdi_put(sb->s_bdi);
		sb->s_bdi = &noop_backing_dev_info;
	}
1124 1125
	err = super_setup_bdi_name(sb, "%u:%u%s", MAJOR(fc->dev),
				   MINOR(fc->dev), suffix);
1126 1127 1128
	if (err)
		return err;

1129
	sb->s_bdi->ra_pages = VM_READAHEAD_PAGES;
1130 1131
	/* fuse does it's own writeback accounting */
	sb->s_bdi->capabilities = BDI_CAP_NO_ACCT_WB | BDI_CAP_STRICTLIMIT;
1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144

	/*
	 * For a single fuse filesystem use max 1% of dirty +
	 * writeback threshold.
	 *
	 * This gives about 1M of write buffer for memory maps on a
	 * machine with 1G and 10% dirty_ratio, which should be more
	 * than enough.
	 *
	 * Privileged users can raise it by writing to
	 *
	 *    /sys/class/bdi/<bdi>/max_ratio
	 */
1145
	bdi_set_max_ratio(sb->s_bdi, 1);
1146 1147 1148 1149

	return 0;
}

1150
struct fuse_dev *fuse_dev_alloc(void)
1151 1152
{
	struct fuse_dev *fud;
1153
	struct list_head *pq;
1154 1155

	fud = kzalloc(sizeof(struct fuse_dev), GFP_KERNEL);
1156 1157
	if (!fud)
		return NULL;
1158

1159 1160 1161 1162
	pq = kcalloc(FUSE_PQ_HASH_SIZE, sizeof(struct list_head), GFP_KERNEL);
	if (!pq) {
		kfree(fud);
		return NULL;
1163 1164
	}

1165 1166 1167
	fud->pq.processing = pq;
	fuse_pqueue_init(&fud->pq);

1168 1169 1170 1171 1172 1173 1174
	return fud;
}
EXPORT_SYMBOL_GPL(fuse_dev_alloc);

void fuse_dev_install(struct fuse_dev *fud, struct fuse_conn *fc)
{
	fud->fc = fuse_conn_get(fc);
1175 1176 1177
	spin_lock(&fc->lock);
	list_add_tail(&fud->entry, &fc->devices);
	spin_unlock(&fc->lock);
1178 1179
}
EXPORT_SYMBOL_GPL(fuse_dev_install);
1180

1181 1182 1183 1184 1185 1186 1187 1188 1189
struct fuse_dev *fuse_dev_alloc_install(struct fuse_conn *fc)
{
	struct fuse_dev *fud;

	fud = fuse_dev_alloc();
	if (!fud)
		return NULL;

	fuse_dev_install(fud, fc);
1190 1191
	return fud;
}
1192
EXPORT_SYMBOL_GPL(fuse_dev_alloc_install);
1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204

void fuse_dev_free(struct fuse_dev *fud)
{
	struct fuse_conn *fc = fud->fc;

	if (fc) {
		spin_lock(&fc->lock);
		list_del(&fud->entry);
		spin_unlock(&fc->lock);

		fuse_conn_put(fc);
	}
1205
	kfree(fud->pq.processing);
1206 1207 1208 1209
	kfree(fud);
}
EXPORT_SYMBOL_GPL(fuse_dev_free);

1210
int fuse_fill_super_common(struct super_block *sb, struct fuse_fs_context *ctx)
M
Miklos Szeredi 已提交
1211
{
1212
	struct fuse_dev *fud = NULL;
1213 1214
	struct fuse_mount *fm = get_fuse_mount_super(sb);
	struct fuse_conn *fc = fm->fc;
M
Miklos Szeredi 已提交
1215
	struct inode *root;
1216
	struct dentry *root_dentry;
M
Miklos Szeredi 已提交
1217 1218
	int err;

1219
	err = -EINVAL;
1220
	if (sb->s_flags & SB_MANDLOCK)
1221
		goto err;
1222

1223
	sb->s_flags &= ~(SB_NOSEC | SB_I_VERSION);
A
Al Viro 已提交
1224

1225
	if (ctx->is_bdev) {
1226
#ifdef CONFIG_BLOCK
1227
		err = -EINVAL;
1228
		if (!sb_set_blocksize(sb, ctx->blksize))
1229
			goto err;
1230
#endif
M
Miklos Szeredi 已提交
1231
	} else {
1232 1233
		sb->s_blocksize = PAGE_SIZE;
		sb->s_blocksize_bits = PAGE_SHIFT;
M
Miklos Szeredi 已提交
1234
	}
1235 1236 1237

	sb->s_subtype = ctx->subtype;
	ctx->subtype = NULL;
M
Miklos Szeredi 已提交
1238 1239
	sb->s_magic = FUSE_SUPER_MAGIC;
	sb->s_op = &fuse_super_operations;
S
Seth Forshee 已提交
1240
	sb->s_xattr = fuse_xattr_handlers;
M
Miklos Szeredi 已提交
1241
	sb->s_maxbytes = MAX_LFS_FILESIZE;
1242
	sb->s_time_gran = 1;
M
Miklos Szeredi 已提交
1243
	sb->s_export_op = &fuse_export_operations;
1244 1245 1246
	sb->s_iflags |= SB_I_IMA_UNVERIFIABLE_SIGNATURE;
	if (sb->s_user_ns != &init_user_ns)
		sb->s_iflags |= SB_I_UNTRUSTED_MOUNTER;
M
Miklos Szeredi 已提交
1247

1248 1249 1250 1251 1252 1253 1254
	/*
	 * If we are not in the initial user namespace posix
	 * acls must be translated.
	 */
	if (sb->s_user_ns != &init_user_ns)
		sb->s_xattr = fuse_no_acl_xattr_handlers;

1255 1256 1257 1258 1259 1260
	if (IS_ENABLED(CONFIG_FUSE_DAX)) {
		err = fuse_dax_conn_alloc(fc, ctx->dax_dev);
		if (err)
			goto err;
	}

1261 1262 1263 1264
	if (ctx->fudptr) {
		err = -ENOMEM;
		fud = fuse_dev_alloc_install(fc);
		if (!fud)
1265
			goto err_free_dax;
1266
	}
1267

1268
	fc->dev = sb->s_dev;
1269
	fm->sb = sb;
1270 1271
	err = fuse_bdi_init(fc, sb);
	if (err)
1272
		goto err_dev_free;
1273

1274
	/* Handle umasking inside the fuse code */
1275
	if (sb->s_flags & SB_POSIXACL)
1276
		fc->dont_mask = 1;
1277
	sb->s_flags |= SB_POSIXACL;
1278

1279 1280 1281 1282
	fc->default_permissions = ctx->default_permissions;
	fc->allow_other = ctx->allow_other;
	fc->user_id = ctx->user_id;
	fc->group_id = ctx->group_id;
1283
	fc->legacy_opts_show = ctx->legacy_opts_show;
1284
	fc->max_read = max_t(unsigned, 4096, ctx->max_read);
1285
	fc->destroy = ctx->destroy;
1286 1287
	fc->no_control = ctx->no_control;
	fc->no_force_umount = ctx->no_force_umount;
1288

M
Miklos Szeredi 已提交
1289
	err = -ENOMEM;
1290
	root = fuse_get_root_inode(sb, ctx->rootmode);
1291
	sb->s_d_op = &fuse_root_dentry_operations;
1292 1293
	root_dentry = d_make_root(root);
	if (!root_dentry)
1294
		goto err_dev_free;
1295
	/* Root dentry doesn't have .d_revalidate */
A
Al Viro 已提交
1296
	sb->s_d_op = &fuse_dentry_operations;
1297

1298
	mutex_lock(&fuse_mutex);
1299
	err = -EINVAL;
1300
	if (ctx->fudptr && *ctx->fudptr)
1301
		goto err_unlock;
1302

1303 1304 1305 1306 1307
	err = fuse_ctl_add_conn(fc);
	if (err)
		goto err_unlock;

	list_add_tail(&fc->entry, &fuse_conn_list);
1308
	sb->s_root = root_dentry;
1309 1310
	if (ctx->fudptr)
		*ctx->fudptr = fud;
1311
	mutex_unlock(&fuse_mutex);
1312 1313 1314 1315 1316 1317
	return 0;

 err_unlock:
	mutex_unlock(&fuse_mutex);
	dput(root_dentry);
 err_dev_free:
1318 1319
	if (fud)
		fuse_dev_free(fud);
1320 1321 1322
 err_free_dax:
	if (IS_ENABLED(CONFIG_FUSE_DAX))
		fuse_dax_conn_free(fc);
1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333
 err:
	return err;
}
EXPORT_SYMBOL_GPL(fuse_fill_super_common);

static int fuse_fill_super(struct super_block *sb, struct fs_context *fsc)
{
	struct fuse_fs_context *ctx = fsc->fs_private;
	struct file *file;
	int err;
	struct fuse_conn *fc;
1334
	struct fuse_mount *fm;
1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354

	err = -EINVAL;
	file = fget(ctx->fd);
	if (!file)
		goto err;

	/*
	 * Require mount to happen from the same user namespace which
	 * opened /dev/fuse to prevent potential attacks.
	 */
	if ((file->f_op != &fuse_dev_operations) ||
	    (file->f_cred->user_ns != sb->s_user_ns))
		goto err_fput;
	ctx->fudptr = &file->private_data;

	fc = kmalloc(sizeof(*fc), GFP_KERNEL);
	err = -ENOMEM;
	if (!fc)
		goto err_fput;

1355 1356 1357 1358 1359 1360 1361
	fm = kzalloc(sizeof(*fm), GFP_KERNEL);
	if (!fm) {
		kfree(fc);
		goto err_fput;
	}

	fuse_conn_init(fc, fm, sb->s_user_ns, &fuse_dev_fiq_ops, NULL);
1362
	fc->release = fuse_free_conn;
1363 1364

	sb->s_fs_info = fm;
1365 1366 1367 1368

	err = fuse_fill_super_common(sb, ctx);
	if (err)
		goto err_put_conn;
M
Miklos Szeredi 已提交
1369 1370 1371 1372 1373 1374
	/*
	 * atomic_dec_and_test() in fput() provides the necessary
	 * memory barrier for file->private_data to be visible on all
	 * CPUs after this
	 */
	fput(file);
1375
	fuse_send_init(get_fuse_mount_super(sb));
M
Miklos Szeredi 已提交
1376 1377
	return 0;

1378
 err_put_conn:
1379
	fuse_mount_put(fm);
1380
	sb->s_fs_info = NULL;
1381 1382 1383
 err_fput:
	fput(file);
 err:
M
Miklos Szeredi 已提交
1384 1385 1386
	return err;
}

1387
static int fuse_get_tree(struct fs_context *fc)
M
Miklos Szeredi 已提交
1388
{
1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405
	struct fuse_fs_context *ctx = fc->fs_private;

	if (!ctx->fd_present || !ctx->rootmode_present ||
	    !ctx->user_id_present || !ctx->group_id_present)
		return -EINVAL;

#ifdef CONFIG_BLOCK
	if (ctx->is_bdev)
		return get_tree_bdev(fc, fuse_fill_super);
#endif

	return get_tree_nodev(fc, fuse_fill_super);
}

static const struct fs_context_operations fuse_context_ops = {
	.free		= fuse_free_fc,
	.parse_param	= fuse_parse_param,
1406
	.reconfigure	= fuse_reconfigure,
1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422
	.get_tree	= fuse_get_tree,
};

/*
 * Set up the filesystem mount context.
 */
static int fuse_init_fs_context(struct fs_context *fc)
{
	struct fuse_fs_context *ctx;

	ctx = kzalloc(sizeof(struct fuse_fs_context), GFP_KERNEL);
	if (!ctx)
		return -ENOMEM;

	ctx->max_read = ~0;
	ctx->blksize = FUSE_DEFAULT_BLKSIZE;
1423
	ctx->legacy_opts_show = true;
1424 1425

#ifdef CONFIG_BLOCK
1426
	if (fc->fs_type == &fuseblk_fs_type) {
1427
		ctx->is_bdev = true;
1428 1429
		ctx->destroy = true;
	}
1430 1431 1432 1433 1434
#endif

	fc->fs_private = ctx;
	fc->ops = &fuse_context_ops;
	return 0;
M
Miklos Szeredi 已提交
1435 1436
}

1437
bool fuse_mount_remove(struct fuse_mount *fm)
J
John Muir 已提交
1438
{
1439 1440
	struct fuse_conn *fc = fm->fc;
	bool last = false;
J
John Muir 已提交
1441

1442 1443 1444 1445 1446
	down_write(&fc->killsb);
	list_del_init(&fm->fc_entry);
	if (list_empty(&fc->mounts))
		last = true;
	up_write(&fc->killsb);
1447

1448 1449 1450
	return last;
}
EXPORT_SYMBOL_GPL(fuse_mount_remove);
1451

1452 1453 1454 1455 1456 1457 1458 1459 1460
void fuse_conn_destroy(struct fuse_mount *fm)
{
	struct fuse_conn *fc = fm->fc;

	if (fc->destroy)
		fuse_send_destroy(fm);

	fuse_abort_conn(fc);
	fuse_wait_aborted(fc);
1461
}
1462
EXPORT_SYMBOL_GPL(fuse_conn_destroy);
J
John Muir 已提交
1463

1464
static void fuse_kill_sb_anon(struct super_block *sb)
1465
{
1466 1467 1468 1469 1470 1471 1472 1473
	struct fuse_mount *fm = get_fuse_mount_super(sb);
	bool last;

	if (fm) {
		last = fuse_mount_remove(fm);
		if (last)
			fuse_conn_destroy(fm);
	}
J
John Muir 已提交
1474 1475 1476
	kill_anon_super(sb);
}

1477 1478 1479
static struct file_system_type fuse_fs_type = {
	.owner		= THIS_MODULE,
	.name		= "fuse",
1480
	.fs_flags	= FS_HAS_SUBTYPE | FS_USERNS_MOUNT,
1481
	.init_fs_context = fuse_init_fs_context,
1482
	.parameters	= fuse_fs_parameters,
J
John Muir 已提交
1483
	.kill_sb	= fuse_kill_sb_anon,
1484
};
1485
MODULE_ALIAS_FS("fuse");
1486 1487

#ifdef CONFIG_BLOCK
J
John Muir 已提交
1488 1489
static void fuse_kill_sb_blk(struct super_block *sb)
{
1490 1491 1492 1493 1494 1495 1496 1497
	struct fuse_mount *fm = get_fuse_mount_super(sb);
	bool last;

	if (fm) {
		last = fuse_mount_remove(fm);
		if (last)
			fuse_conn_destroy(fm);
	}
J
John Muir 已提交
1498 1499 1500
	kill_block_super(sb);
}

1501 1502 1503
static struct file_system_type fuseblk_fs_type = {
	.owner		= THIS_MODULE,
	.name		= "fuseblk",
1504
	.init_fs_context = fuse_init_fs_context,
1505
	.parameters	= fuse_fs_parameters,
J
John Muir 已提交
1506
	.kill_sb	= fuse_kill_sb_blk,
A
Alexey Dobriyan 已提交
1507
	.fs_flags	= FS_REQUIRES_DEV | FS_HAS_SUBTYPE,
1508
};
1509
MODULE_ALIAS_FS("fuseblk");
1510

1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530
static inline int register_fuseblk(void)
{
	return register_filesystem(&fuseblk_fs_type);
}

static inline void unregister_fuseblk(void)
{
	unregister_filesystem(&fuseblk_fs_type);
}
#else
static inline int register_fuseblk(void)
{
	return 0;
}

static inline void unregister_fuseblk(void)
{
}
#endif

1531
static void fuse_inode_init_once(void *foo)
M
Miklos Szeredi 已提交
1532
{
M
Miklos Szeredi 已提交
1533
	struct inode *inode = foo;
M
Miklos Szeredi 已提交
1534

C
Christoph Lameter 已提交
1535
	inode_init_once(inode);
M
Miklos Szeredi 已提交
1536 1537 1538 1539 1540 1541
}

static int __init fuse_fs_init(void)
{
	int err;

1542
	fuse_inode_cachep = kmem_cache_create("fuse_inode",
1543 1544 1545
			sizeof(struct fuse_inode), 0,
			SLAB_HWCACHE_ALIGN|SLAB_ACCOUNT|SLAB_RECLAIM_ACCOUNT,
			fuse_inode_init_once);
1546 1547
	err = -ENOMEM;
	if (!fuse_inode_cachep)
1548 1549 1550 1551 1552 1553 1554 1555 1556
		goto out;

	err = register_fuseblk();
	if (err)
		goto out2;

	err = register_filesystem(&fuse_fs_type);
	if (err)
		goto out3;
1557 1558

	return 0;
M
Miklos Szeredi 已提交
1559

1560
 out3:
1561
	unregister_fuseblk();
1562 1563
 out2:
	kmem_cache_destroy(fuse_inode_cachep);
1564
 out:
M
Miklos Szeredi 已提交
1565 1566 1567 1568 1569 1570
	return err;
}

static void fuse_fs_cleanup(void)
{
	unregister_filesystem(&fuse_fs_type);
1571
	unregister_fuseblk();
1572 1573 1574 1575 1576 1577

	/*
	 * Make sure all delayed rcu free inodes are flushed before we
	 * destroy cache.
	 */
	rcu_barrier();
M
Miklos Szeredi 已提交
1578 1579 1580
	kmem_cache_destroy(fuse_inode_cachep);
}

1581 1582
static struct kobject *fuse_kobj;

1583 1584 1585 1586
static int fuse_sysfs_init(void)
{
	int err;

1587
	fuse_kobj = kobject_create_and_add("fuse", fs_kobj);
1588 1589
	if (!fuse_kobj) {
		err = -ENOMEM;
1590
		goto out_err;
1591
	}
1592

1593 1594
	err = sysfs_create_mount_point(fuse_kobj, "connections");
	if (err)
1595 1596 1597 1598 1599
		goto out_fuse_unregister;

	return 0;

 out_fuse_unregister:
1600
	kobject_put(fuse_kobj);
1601 1602 1603 1604 1605 1606
 out_err:
	return err;
}

static void fuse_sysfs_cleanup(void)
{
1607
	sysfs_remove_mount_point(fuse_kobj, "connections");
1608
	kobject_put(fuse_kobj);
1609 1610
}

M
Miklos Szeredi 已提交
1611 1612 1613 1614
static int __init fuse_init(void)
{
	int res;

K
Kirill Smelkov 已提交
1615 1616
	pr_info("init (API version %i.%i)\n",
		FUSE_KERNEL_VERSION, FUSE_KERNEL_MINOR_VERSION);
M
Miklos Szeredi 已提交
1617

1618
	INIT_LIST_HEAD(&fuse_conn_list);
M
Miklos Szeredi 已提交
1619 1620 1621 1622
	res = fuse_fs_init();
	if (res)
		goto err;

M
Miklos Szeredi 已提交
1623 1624 1625 1626
	res = fuse_dev_init();
	if (res)
		goto err_fs_cleanup;

1627 1628 1629 1630
	res = fuse_sysfs_init();
	if (res)
		goto err_dev_cleanup;

1631 1632 1633 1634
	res = fuse_ctl_init();
	if (res)
		goto err_sysfs_cleanup;

1635 1636 1637
	sanitize_global_limit(&max_user_bgreq);
	sanitize_global_limit(&max_user_congthresh);

M
Miklos Szeredi 已提交
1638 1639
	return 0;

1640 1641
 err_sysfs_cleanup:
	fuse_sysfs_cleanup();
1642 1643
 err_dev_cleanup:
	fuse_dev_cleanup();
M
Miklos Szeredi 已提交
1644 1645
 err_fs_cleanup:
	fuse_fs_cleanup();
M
Miklos Szeredi 已提交
1646 1647 1648 1649 1650 1651
 err:
	return res;
}

static void __exit fuse_exit(void)
{
K
Kirill Smelkov 已提交
1652
	pr_debug("exit\n");
M
Miklos Szeredi 已提交
1653

1654
	fuse_ctl_cleanup();
1655
	fuse_sysfs_cleanup();
M
Miklos Szeredi 已提交
1656
	fuse_fs_cleanup();
M
Miklos Szeredi 已提交
1657
	fuse_dev_cleanup();
M
Miklos Szeredi 已提交
1658 1659 1660 1661
}

module_init(fuse_init);
module_exit(fuse_exit);