inode.c 36.8 KB
Newer Older
M
Miklos Szeredi 已提交
1 2
/*
  FUSE: Filesystem in Userspace
M
Miklos Szeredi 已提交
3
  Copyright (C) 2001-2008  Miklos Szeredi <miklos@szeredi.hu>
M
Miklos Szeredi 已提交
4 5 6 7 8 9 10 11 12 13 14 15 16

  This program can be distributed under the terms of the GNU GPL.
  See the file COPYING.
*/

#include "fuse_i.h"

#include <linux/pagemap.h>
#include <linux/slab.h>
#include <linux/file.h>
#include <linux/seq_file.h>
#include <linux/init.h>
#include <linux/module.h>
17
#include <linux/moduleparam.h>
18 19
#include <linux/fs_context.h>
#include <linux/fs_parser.h>
M
Miklos Szeredi 已提交
20
#include <linux/statfs.h>
21
#include <linux/random.h>
A
Alexey Dobriyan 已提交
22
#include <linux/sched.h>
M
Miklos Szeredi 已提交
23
#include <linux/exportfs.h>
S
Seth Forshee 已提交
24
#include <linux/posix_acl.h>
25
#include <linux/pid_namespace.h>
M
Miklos Szeredi 已提交
26 27 28 29 30

MODULE_AUTHOR("Miklos Szeredi <miklos@szeredi.hu>");
MODULE_DESCRIPTION("Filesystem in Userspace");
MODULE_LICENSE("GPL");

31
static struct kmem_cache *fuse_inode_cachep;
32 33
struct list_head fuse_conn_list;
DEFINE_MUTEX(fuse_mutex);
M
Miklos Szeredi 已提交
34

35
static int set_global_limit(const char *val, const struct kernel_param *kp);
36

37
unsigned max_user_bgreq;
38 39 40 41 42 43 44
module_param_call(max_user_bgreq, set_global_limit, param_get_uint,
		  &max_user_bgreq, 0644);
__MODULE_PARM_TYPE(max_user_bgreq, "uint");
MODULE_PARM_DESC(max_user_bgreq,
 "Global limit for the maximum number of backgrounded requests an "
 "unprivileged user can set");

45
unsigned max_user_congthresh;
46 47 48 49 50 51 52
module_param_call(max_user_congthresh, set_global_limit, param_get_uint,
		  &max_user_congthresh, 0644);
__MODULE_PARM_TYPE(max_user_congthresh, "uint");
MODULE_PARM_DESC(max_user_congthresh,
 "Global limit for the maximum congestion threshold an "
 "unprivileged user can set");

M
Miklos Szeredi 已提交
53 54
#define FUSE_SUPER_MAGIC 0x65735546

M
Miklos Szeredi 已提交
55 56
#define FUSE_DEFAULT_BLKSIZE 512

57 58 59 60 61 62
/** Maximum number of outstanding background requests */
#define FUSE_DEFAULT_MAX_BACKGROUND 12

/** Congestion starts at 75% of maximum */
#define FUSE_DEFAULT_CONGESTION_THRESHOLD (FUSE_DEFAULT_MAX_BACKGROUND * 3 / 4)

63 64 65 66
#ifdef CONFIG_BLOCK
static struct file_system_type fuseblk_fs_type;
#endif

67
struct fuse_forget_link *fuse_alloc_forget(void)
68
{
69
	return kzalloc(sizeof(struct fuse_forget_link), GFP_KERNEL_ACCOUNT);
70 71
}

M
Miklos Szeredi 已提交
72 73 74 75
static struct inode *fuse_alloc_inode(struct super_block *sb)
{
	struct fuse_inode *fi;

Z
zhangliguang 已提交
76 77
	fi = kmem_cache_alloc(fuse_inode_cachep, GFP_KERNEL);
	if (!fi)
M
Miklos Szeredi 已提交
78 79
		return NULL;

M
Miklos Szeredi 已提交
80
	fi->i_time = 0;
81
	fi->inval_mask = 0;
M
Miklos Szeredi 已提交
82
	fi->nodeid = 0;
83
	fi->nlookup = 0;
84
	fi->attr_version = 0;
85
	fi->orig_ino = 0;
86
	fi->state = 0;
87
	mutex_init(&fi->mutex);
88
	init_rwsem(&fi->i_mmap_sem);
89
	spin_lock_init(&fi->lock);
90
	fi->forget = fuse_alloc_forget();
91 92 93 94 95
	if (!fi->forget)
		goto out_free;

	if (IS_ENABLED(CONFIG_FUSE_DAX) && !fuse_dax_inode_alloc(sb, fi))
		goto out_free_forget;
M
Miklos Szeredi 已提交
96

Z
zhangliguang 已提交
97
	return &fi->inode;
98 99 100 101 102 103

out_free_forget:
	kfree(fi->forget);
out_free:
	kmem_cache_free(fuse_inode_cachep, fi);
	return NULL;
M
Miklos Szeredi 已提交
104 105
}

A
Al Viro 已提交
106
static void fuse_free_inode(struct inode *inode)
M
Miklos Szeredi 已提交
107
{
108
	struct fuse_inode *fi = get_fuse_inode(inode);
A
Al Viro 已提交
109

110
	mutex_destroy(&fi->mutex);
111
	kfree(fi->forget);
112 113 114
#ifdef CONFIG_FUSE_DAX
	kfree(fi->dax);
#endif
A
Al Viro 已提交
115
	kmem_cache_free(fuse_inode_cachep, fi);
M
Miklos Szeredi 已提交
116 117
}

118
static void fuse_evict_inode(struct inode *inode)
M
Miklos Szeredi 已提交
119
{
A
Al Viro 已提交
120 121
	struct fuse_inode *fi = get_fuse_inode(inode);

122
	truncate_inode_pages_final(&inode->i_data);
123
	clear_inode(inode);
124
	if (inode->i_sb->s_flags & SB_ACTIVE) {
M
Miklos Szeredi 已提交
125
		struct fuse_conn *fc = get_fuse_conn(inode);
126 127 128

		if (FUSE_IS_DAX(inode))
			fuse_dax_inode_cleanup(inode);
129 130
		fuse_queue_forget(fc, fi->forget, fi->nodeid, fi->nlookup);
		fi->forget = NULL;
131
	}
A
Al Viro 已提交
132 133 134 135
	if (S_ISREG(inode->i_mode) && !is_bad_inode(inode)) {
		WARN_ON(!list_empty(&fi->write_files));
		WARN_ON(!list_empty(&fi->queued_writes));
	}
M
Miklos Szeredi 已提交
136 137
}

138
static int fuse_reconfigure(struct fs_context *fc)
139
{
140 141
	struct super_block *sb = fc->root->d_sb;

142
	sync_filesystem(sb);
143
	if (fc->sb_flags & SB_MANDLOCK)
144 145 146 147 148
		return -EINVAL;

	return 0;
}

149 150 151 152 153 154 155 156 157 158 159 160
/*
 * ino_t is 32-bits on 32-bit arch. We have to squash the 64-bit value down
 * so that it will fit.
 */
static ino_t fuse_squash_ino(u64 ino64)
{
	ino_t ino = (ino_t) ino64;
	if (sizeof(ino_t) < sizeof(u64))
		ino ^= ino64 >> (sizeof(u64) - sizeof(ino_t)) * 8;
	return ino;
}

M
Miklos Szeredi 已提交
161 162
void fuse_change_attributes_common(struct inode *inode, struct fuse_attr *attr,
				   u64 attr_valid)
M
Miklos Szeredi 已提交
163
{
M
Miklos Szeredi 已提交
164
	struct fuse_conn *fc = get_fuse_conn(inode);
165
	struct fuse_inode *fi = get_fuse_inode(inode);
M
Miklos Szeredi 已提交
166

167 168
	lockdep_assert_held(&fi->lock);

169
	fi->attr_version = atomic64_inc_return(&fc->attr_version);
170
	fi->i_time = attr_valid;
171
	WRITE_ONCE(fi->inval_mask, 0);
172

173
	inode->i_ino     = fuse_squash_ino(attr->ino);
174
	inode->i_mode    = (inode->i_mode & S_IFMT) | (attr->mode & 07777);
M
Miklos Szeredi 已提交
175
	set_nlink(inode, attr->nlink);
176 177
	inode->i_uid     = make_kuid(fc->user_ns, attr->uid);
	inode->i_gid     = make_kgid(fc->user_ns, attr->gid);
M
Miklos Szeredi 已提交
178 179 180
	inode->i_blocks  = attr->blocks;
	inode->i_atime.tv_sec   = attr->atime;
	inode->i_atime.tv_nsec  = attr->atimensec;
M
Maxim Patlasov 已提交
181 182 183 184
	/* mtime from server may be stale due to local buffered write */
	if (!fc->writeback_cache || !S_ISREG(inode->i_mode)) {
		inode->i_mtime.tv_sec   = attr->mtime;
		inode->i_mtime.tv_nsec  = attr->mtimensec;
M
Maxim Patlasov 已提交
185 186
		inode->i_ctime.tv_sec   = attr->ctime;
		inode->i_ctime.tv_nsec  = attr->ctimensec;
M
Maxim Patlasov 已提交
187
	}
188

189 190 191 192 193
	if (attr->blksize != 0)
		inode->i_blkbits = ilog2(attr->blksize);
	else
		inode->i_blkbits = inode->i_sb->s_blocksize_bits;

194 195 196 197 198 199
	/*
	 * Don't set the sticky bit in i_mode, unless we want the VFS
	 * to check permissions.  This prevents failures due to the
	 * check in may_delete().
	 */
	fi->orig_i_mode = inode->i_mode;
M
Miklos Szeredi 已提交
200
	if (!fc->default_permissions)
201
		inode->i_mode &= ~S_ISVTX;
202 203

	fi->orig_ino = attr->ino;
M
Miklos Szeredi 已提交
204 205 206 207 208 209 210
}

void fuse_change_attributes(struct inode *inode, struct fuse_attr *attr,
			    u64 attr_valid, u64 attr_version)
{
	struct fuse_conn *fc = get_fuse_conn(inode);
	struct fuse_inode *fi = get_fuse_inode(inode);
P
Pavel Emelyanov 已提交
211
	bool is_wb = fc->writeback_cache;
M
Miklos Szeredi 已提交
212
	loff_t oldsize;
213
	struct timespec64 old_mtime;
M
Miklos Szeredi 已提交
214

215
	spin_lock(&fi->lock);
216 217
	if ((attr_version != 0 && fi->attr_version > attr_version) ||
	    test_bit(FUSE_I_SIZE_UNSTABLE, &fi->state)) {
218
		spin_unlock(&fi->lock);
M
Miklos Szeredi 已提交
219 220 221
		return;
	}

222
	old_mtime = inode->i_mtime;
M
Miklos Szeredi 已提交
223
	fuse_change_attributes_common(inode, attr, attr_valid);
224

225
	oldsize = inode->i_size;
P
Pavel Emelyanov 已提交
226 227 228 229 230 231 232
	/*
	 * In case of writeback_cache enabled, the cached writes beyond EOF
	 * extend local i_size without keeping userspace server in sync. So,
	 * attr->size coming from server can be stale. We cannot trust it.
	 */
	if (!is_wb || !S_ISREG(inode->i_mode))
		i_size_write(inode, attr->size);
233
	spin_unlock(&fi->lock);
234

P
Pavel Emelyanov 已提交
235
	if (!is_wb && S_ISREG(inode->i_mode)) {
236 237 238
		bool inval = false;

		if (oldsize != attr->size) {
239
			truncate_pagecache(inode, attr->size);
240 241
			if (!fc->explicit_inval_data)
				inval = true;
242
		} else if (fc->auto_inval_data) {
243
			struct timespec64 new_mtime = {
244 245 246 247 248 249 250 251
				.tv_sec = attr->mtime,
				.tv_nsec = attr->mtimensec,
			};

			/*
			 * Auto inval mode also checks and invalidates if mtime
			 * has changed.
			 */
252
			if (!timespec64_equal(&old_mtime, &new_mtime))
253 254 255 256 257
				inval = true;
		}

		if (inval)
			invalidate_inode_pages2(inode->i_mapping);
258
	}
M
Miklos Szeredi 已提交
259 260 261 262 263
}

static void fuse_init_inode(struct inode *inode, struct fuse_attr *attr)
{
	inode->i_mode = attr->mode & S_IFMT;
M
Miklos Szeredi 已提交
264
	inode->i_size = attr->size;
M
Maxim Patlasov 已提交
265 266
	inode->i_mtime.tv_sec  = attr->mtime;
	inode->i_mtime.tv_nsec = attr->mtimensec;
M
Maxim Patlasov 已提交
267 268
	inode->i_ctime.tv_sec  = attr->ctime;
	inode->i_ctime.tv_nsec = attr->ctimensec;
269 270
	if (S_ISREG(inode->i_mode)) {
		fuse_init_common(inode);
M
Miklos Szeredi 已提交
271
		fuse_init_file_inode(inode);
272 273 274 275 276 277 278 279 280
	} else if (S_ISDIR(inode->i_mode))
		fuse_init_dir(inode);
	else if (S_ISLNK(inode->i_mode))
		fuse_init_symlink(inode);
	else if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode) ||
		 S_ISFIFO(inode->i_mode) || S_ISSOCK(inode->i_mode)) {
		fuse_init_common(inode);
		init_special_inode(inode, inode->i_mode,
				   new_decode_dev(attr->rdev));
281 282
	} else
		BUG();
M
Miklos Szeredi 已提交
283 284
}

J
John Muir 已提交
285
int fuse_inode_eq(struct inode *inode, void *_nodeidp)
M
Miklos Szeredi 已提交
286
{
M
Miklos Szeredi 已提交
287
	u64 nodeid = *(u64 *) _nodeidp;
M
Miklos Szeredi 已提交
288 289 290 291 292 293 294 295
	if (get_node_id(inode) == nodeid)
		return 1;
	else
		return 0;
}

static int fuse_inode_set(struct inode *inode, void *_nodeidp)
{
M
Miklos Szeredi 已提交
296
	u64 nodeid = *(u64 *) _nodeidp;
M
Miklos Szeredi 已提交
297 298 299 300
	get_fuse_inode(inode)->nodeid = nodeid;
	return 0;
}

M
Miklos Szeredi 已提交
301
struct inode *fuse_iget(struct super_block *sb, u64 nodeid,
302 303
			int generation, struct fuse_attr *attr,
			u64 attr_valid, u64 attr_version)
M
Miklos Szeredi 已提交
304 305
{
	struct inode *inode;
306
	struct fuse_inode *fi;
M
Miklos Szeredi 已提交
307 308 309 310 311 312 313 314
	struct fuse_conn *fc = get_fuse_conn_super(sb);

 retry:
	inode = iget5_locked(sb, nodeid, fuse_inode_eq, fuse_inode_set, &nodeid);
	if (!inode)
		return NULL;

	if ((inode->i_state & I_NEW)) {
M
Maxim Patlasov 已提交
315
		inode->i_flags |= S_NOATIME;
316
		if (!fc->writeback_cache || !S_ISREG(attr->mode))
M
Maxim Patlasov 已提交
317
			inode->i_flags |= S_NOCMTIME;
M
Miklos Szeredi 已提交
318 319 320 321 322 323 324 325 326 327
		inode->i_generation = generation;
		fuse_init_inode(inode, attr);
		unlock_new_inode(inode);
	} else if ((inode->i_mode ^ attr->mode) & S_IFMT) {
		/* Inode has changed type, any I/O on the old should fail */
		make_bad_inode(inode);
		iput(inode);
		goto retry;
	}

328
	fi = get_fuse_inode(inode);
329
	spin_lock(&fi->lock);
M
Miklos Szeredi 已提交
330
	fi->nlookup++;
331
	spin_unlock(&fi->lock);
332 333
	fuse_change_attributes(inode, attr, attr_valid, attr_version);

M
Miklos Szeredi 已提交
334 335 336
	return inode;
}

J
John Muir 已提交
337 338 339
int fuse_reverse_inval_inode(struct super_block *sb, u64 nodeid,
			     loff_t offset, loff_t len)
{
340 341
	struct fuse_conn *fc = get_fuse_conn_super(sb);
	struct fuse_inode *fi;
J
John Muir 已提交
342 343 344 345 346 347 348 349
	struct inode *inode;
	pgoff_t pg_start;
	pgoff_t pg_end;

	inode = ilookup5(sb, nodeid, fuse_inode_eq, &nodeid);
	if (!inode)
		return -ENOENT;

350 351 352 353 354
	fi = get_fuse_inode(inode);
	spin_lock(&fi->lock);
	fi->attr_version = atomic64_inc_return(&fc->attr_version);
	spin_unlock(&fi->lock);

J
John Muir 已提交
355
	fuse_invalidate_attr(inode);
S
Seth Forshee 已提交
356
	forget_all_cached_acls(inode);
J
John Muir 已提交
357
	if (offset >= 0) {
358
		pg_start = offset >> PAGE_SHIFT;
J
John Muir 已提交
359 360 361
		if (len <= 0)
			pg_end = -1;
		else
362
			pg_end = (offset + len - 1) >> PAGE_SHIFT;
J
John Muir 已提交
363 364 365 366 367 368 369
		invalidate_inode_pages2_range(inode->i_mapping,
					      pg_start, pg_end);
	}
	iput(inode);
	return 0;
}

370
bool fuse_lock_inode(struct inode *inode)
371
{
372 373 374
	bool locked = false;

	if (!get_fuse_conn(inode)->parallel_dirops) {
375
		mutex_lock(&get_fuse_inode(inode)->mutex);
376 377 378 379
		locked = true;
	}

	return locked;
380 381
}

382
void fuse_unlock_inode(struct inode *inode, bool locked)
383
{
384
	if (locked)
385 386 387
		mutex_unlock(&get_fuse_inode(inode)->mutex);
}

388
static void fuse_umount_begin(struct super_block *sb)
389
{
390 391 392 393
	struct fuse_conn *fc = get_fuse_conn_super(sb);

	if (!fc->no_force_umount)
		fuse_abort_conn(fc);
394 395
}

396 397
static void fuse_send_destroy(struct fuse_conn *fc)
{
398 399 400 401 402 403 404
	if (fc->conn_init) {
		FUSE_ARGS(args);

		args.opcode = FUSE_DESTROY;
		args.force = true;
		args.nocreds = true;
		fuse_simple_request(fc, &args);
405 406 407
	}
}

408 409 410 411
static void fuse_put_super(struct super_block *sb)
{
	struct fuse_conn *fc = get_fuse_conn_super(sb);

M
Miklos Szeredi 已提交
412 413 414 415 416
	mutex_lock(&fuse_mutex);
	list_del(&fc->entry);
	fuse_ctl_remove_conn(fc);
	mutex_unlock(&fuse_mutex);

417
	fuse_conn_put(fc);
M
Miklos Szeredi 已提交
418 419
}

420 421 422 423
static void convert_fuse_statfs(struct kstatfs *stbuf, struct fuse_kstatfs *attr)
{
	stbuf->f_type    = FUSE_SUPER_MAGIC;
	stbuf->f_bsize   = attr->bsize;
424
	stbuf->f_frsize  = attr->frsize;
425 426 427 428 429 430 431 432 433
	stbuf->f_blocks  = attr->blocks;
	stbuf->f_bfree   = attr->bfree;
	stbuf->f_bavail  = attr->bavail;
	stbuf->f_files   = attr->files;
	stbuf->f_ffree   = attr->ffree;
	stbuf->f_namelen = attr->namelen;
	/* fsid is left zero */
}

434
static int fuse_statfs(struct dentry *dentry, struct kstatfs *buf)
435
{
436
	struct super_block *sb = dentry->d_sb;
437
	struct fuse_conn *fc = get_fuse_conn_super(sb);
438
	FUSE_ARGS(args);
439 440 441
	struct fuse_statfs_out outarg;
	int err;

442
	if (!fuse_allow_current_process(fc)) {
M
Miklos Szeredi 已提交
443 444 445 446
		buf->f_type = FUSE_SUPER_MAGIC;
		return 0;
	}

447
	memset(&outarg, 0, sizeof(outarg));
448 449 450 451 452 453
	args.in_numargs = 0;
	args.opcode = FUSE_STATFS;
	args.nodeid = get_node_id(d_inode(dentry));
	args.out_numargs = 1;
	args.out_args[0].size = sizeof(outarg);
	args.out_args[0].value = &outarg;
454
	err = fuse_simple_request(fc, &args);
455 456 457 458 459
	if (!err)
		convert_fuse_statfs(buf, &outarg.st);
	return err;
}

M
Miklos Szeredi 已提交
460
enum {
461 462
	OPT_SOURCE,
	OPT_SUBTYPE,
M
Miklos Szeredi 已提交
463 464 465
	OPT_FD,
	OPT_ROOTMODE,
	OPT_USER_ID,
466
	OPT_GROUP_ID,
M
Miklos Szeredi 已提交
467 468
	OPT_DEFAULT_PERMISSIONS,
	OPT_ALLOW_OTHER,
469
	OPT_MAX_READ,
M
Miklos Szeredi 已提交
470
	OPT_BLKSIZE,
M
Miklos Szeredi 已提交
471 472 473
	OPT_ERR
};

474
static const struct fs_parameter_spec fuse_fs_parameters[] = {
475 476 477 478 479 480 481 482 483
	fsparam_string	("source",		OPT_SOURCE),
	fsparam_u32	("fd",			OPT_FD),
	fsparam_u32oct	("rootmode",		OPT_ROOTMODE),
	fsparam_u32	("user_id",		OPT_USER_ID),
	fsparam_u32	("group_id",		OPT_GROUP_ID),
	fsparam_flag	("default_permissions",	OPT_DEFAULT_PERMISSIONS),
	fsparam_flag	("allow_other",		OPT_ALLOW_OTHER),
	fsparam_u32	("max_read",		OPT_MAX_READ),
	fsparam_u32	("blksize",		OPT_BLKSIZE),
484
	fsparam_string	("subtype",		OPT_SUBTYPE),
485 486 487 488
	{}
};

static int fuse_parse_param(struct fs_context *fc, struct fs_parameter *param)
489
{
490 491 492 493
	struct fs_parse_result result;
	struct fuse_fs_context *ctx = fc->fs_private;
	int opt;

494 495 496 497 498 499 500 501 502 503
	if (fc->purpose == FS_CONTEXT_FOR_RECONFIGURE) {
		/*
		 * Ignore options coming from mount(MS_REMOUNT) for backward
		 * compatibility.
		 */
		if (fc->oldapi)
			return 0;

		return invalfc(fc, "No changes allowed in reconfigure");
	}
504

505
	opt = fs_parse(fc, fuse_fs_parameters, param, &result);
506 507 508 509 510 511
	if (opt < 0)
		return opt;

	switch (opt) {
	case OPT_SOURCE:
		if (fc->source)
A
Al Viro 已提交
512
			return invalfc(fc, "Multiple sources specified");
513 514 515 516 517 518
		fc->source = param->string;
		param->string = NULL;
		break;

	case OPT_SUBTYPE:
		if (ctx->subtype)
A
Al Viro 已提交
519
			return invalfc(fc, "Multiple subtypes specified");
520 521 522 523 524 525
		ctx->subtype = param->string;
		param->string = NULL;
		return 0;

	case OPT_FD:
		ctx->fd = result.uint_32;
526
		ctx->fd_present = true;
527 528 529 530
		break;

	case OPT_ROOTMODE:
		if (!fuse_valid_type(result.uint_32))
A
Al Viro 已提交
531
			return invalfc(fc, "Invalid rootmode");
532
		ctx->rootmode = result.uint_32;
533
		ctx->rootmode_present = true;
534 535 536 537 538
		break;

	case OPT_USER_ID:
		ctx->user_id = make_kuid(fc->user_ns, result.uint_32);
		if (!uid_valid(ctx->user_id))
A
Al Viro 已提交
539
			return invalfc(fc, "Invalid user_id");
540
		ctx->user_id_present = true;
541 542 543 544 545
		break;

	case OPT_GROUP_ID:
		ctx->group_id = make_kgid(fc->user_ns, result.uint_32);
		if (!gid_valid(ctx->group_id))
A
Al Viro 已提交
546
			return invalfc(fc, "Invalid group_id");
547
		ctx->group_id_present = true;
548 549 550
		break;

	case OPT_DEFAULT_PERMISSIONS:
551
		ctx->default_permissions = true;
552 553 554
		break;

	case OPT_ALLOW_OTHER:
555
		ctx->allow_other = true;
556 557 558 559 560 561 562 563
		break;

	case OPT_MAX_READ:
		ctx->max_read = result.uint_32;
		break;

	case OPT_BLKSIZE:
		if (!ctx->is_bdev)
A
Al Viro 已提交
564
			return invalfc(fc, "blksize only supported for fuseblk");
565 566 567 568 569
		ctx->blksize = result.uint_32;
		break;

	default:
		return -EINVAL;
570
	}
571 572

	return 0;
573 574
}

575
static void fuse_free_fc(struct fs_context *fc)
M
Miklos Szeredi 已提交
576
{
577
	struct fuse_fs_context *ctx = fc->fs_private;
578

579 580 581 582
	if (ctx) {
		kfree(ctx->subtype);
		kfree(ctx);
	}
M
Miklos Szeredi 已提交
583 584
}

585
static int fuse_show_options(struct seq_file *m, struct dentry *root)
M
Miklos Szeredi 已提交
586
{
587 588
	struct super_block *sb = root->d_sb;
	struct fuse_conn *fc = get_fuse_conn_super(sb);
M
Miklos Szeredi 已提交
589

590 591 592 593 594 595 596 597 598 599 600 601 602 603
	if (fc->legacy_opts_show) {
		seq_printf(m, ",user_id=%u",
			   from_kuid_munged(fc->user_ns, fc->user_id));
		seq_printf(m, ",group_id=%u",
			   from_kgid_munged(fc->user_ns, fc->group_id));
		if (fc->default_permissions)
			seq_puts(m, ",default_permissions");
		if (fc->allow_other)
			seq_puts(m, ",allow_other");
		if (fc->max_read != ~0)
			seq_printf(m, ",max_read=%u", fc->max_read);
		if (sb->s_bdev && sb->s_blocksize != FUSE_DEFAULT_BLKSIZE)
			seq_printf(m, ",blksize=%lu", sb->s_blocksize);
	}
604 605 606 607 608
#ifdef CONFIG_FUSE_DAX
	if (fc->dax)
		seq_puts(m, ",dax");
#endif

M
Miklos Szeredi 已提交
609 610 611
	return 0;
}

612 613 614
static void fuse_iqueue_init(struct fuse_iqueue *fiq,
			     const struct fuse_iqueue_ops *ops,
			     void *priv)
M
Miklos Szeredi 已提交
615 616
{
	memset(fiq, 0, sizeof(struct fuse_iqueue));
617
	spin_lock_init(&fiq->lock);
M
Miklos Szeredi 已提交
618 619 620 621
	init_waitqueue_head(&fiq->waitq);
	INIT_LIST_HEAD(&fiq->pending);
	INIT_LIST_HEAD(&fiq->interrupts);
	fiq->forget_list_tail = &fiq->forget_list_head;
622
	fiq->connected = 1;
623 624
	fiq->ops = ops;
	fiq->priv = priv;
M
Miklos Szeredi 已提交
625 626
}

627 628
static void fuse_pqueue_init(struct fuse_pqueue *fpq)
{
629 630
	unsigned int i;

M
Miklos Szeredi 已提交
631
	spin_lock_init(&fpq->lock);
632 633
	for (i = 0; i < FUSE_PQ_HASH_SIZE; i++)
		INIT_LIST_HEAD(&fpq->processing[i]);
634
	INIT_LIST_HEAD(&fpq->io);
635
	fpq->connected = 1;
636 637
}

638 639
void fuse_conn_init(struct fuse_conn *fc, struct user_namespace *user_ns,
		    const struct fuse_iqueue_ops *fiq_ops, void *fiq_priv)
M
Miklos Szeredi 已提交
640
{
641 642
	memset(fc, 0, sizeof(*fc));
	spin_lock_init(&fc->lock);
K
Kirill Tkhai 已提交
643
	spin_lock_init(&fc->bg_lock);
J
John Muir 已提交
644
	init_rwsem(&fc->killsb);
645
	refcount_set(&fc->count, 1);
646
	atomic_set(&fc->dev_count, 1);
647
	init_waitqueue_head(&fc->blocked_waitq);
648
	fuse_iqueue_init(&fc->iq, fiq_ops, fiq_priv);
649 650
	INIT_LIST_HEAD(&fc->bg_queue);
	INIT_LIST_HEAD(&fc->entry);
651
	INIT_LIST_HEAD(&fc->devices);
652
	atomic_set(&fc->num_waiting, 0);
653 654
	fc->max_background = FUSE_DEFAULT_MAX_BACKGROUND;
	fc->congestion_threshold = FUSE_DEFAULT_CONGESTION_THRESHOLD;
M
Miklos Szeredi 已提交
655
	atomic64_set(&fc->khctr, 0);
656
	fc->polled_files = RB_ROOT;
657
	fc->blocked = 0;
M
Maxim Patlasov 已提交
658
	fc->initialized = 0;
659
	fc->connected = 1;
660
	atomic64_set(&fc->attr_version, 1);
661
	get_random_bytes(&fc->scramble_key, sizeof(fc->scramble_key));
662
	fc->pid_ns = get_pid_ns(task_active_pid_ns(current));
663
	fc->user_ns = get_user_ns(user_ns);
M
Miklos Szeredi 已提交
664
	fc->max_pages = FUSE_DEFAULT_MAX_PAGES_PER_REQ;
M
Miklos Szeredi 已提交
665
}
666
EXPORT_SYMBOL_GPL(fuse_conn_init);
M
Miklos Szeredi 已提交
667

668 669
void fuse_conn_put(struct fuse_conn *fc)
{
670
	if (refcount_dec_and_test(&fc->count)) {
671 672
		struct fuse_iqueue *fiq = &fc->iq;

673 674
		if (IS_ENABLED(CONFIG_FUSE_DAX))
			fuse_dax_conn_free(fc);
675 676
		if (fiq->ops->release)
			fiq->ops->release(fiq);
677
		put_pid_ns(fc->pid_ns);
678
		put_user_ns(fc->user_ns);
T
Tejun Heo 已提交
679
		fc->release(fc);
680
	}
681
}
682
EXPORT_SYMBOL_GPL(fuse_conn_put);
683 684 685

struct fuse_conn *fuse_conn_get(struct fuse_conn *fc)
{
686
	refcount_inc(&fc->count);
687 688
	return fc;
}
689
EXPORT_SYMBOL_GPL(fuse_conn_get);
690

691
static struct inode *fuse_get_root_inode(struct super_block *sb, unsigned mode)
M
Miklos Szeredi 已提交
692 693 694 695 696 697
{
	struct fuse_attr attr;
	memset(&attr, 0, sizeof(attr));

	attr.mode = mode;
	attr.ino = FUSE_ROOT_ID;
698
	attr.nlink = 1;
699
	return fuse_iget(sb, 1, 0, &attr, 0, 0);
M
Miklos Szeredi 已提交
700 701
}

M
Miklos Szeredi 已提交
702
struct fuse_inode_handle {
M
Miklos Szeredi 已提交
703 704 705 706 707 708 709
	u64 nodeid;
	u32 generation;
};

static struct dentry *fuse_get_dentry(struct super_block *sb,
				      struct fuse_inode_handle *handle)
{
710
	struct fuse_conn *fc = get_fuse_conn_super(sb);
M
Miklos Szeredi 已提交
711 712 713 714 715 716 717 718
	struct inode *inode;
	struct dentry *entry;
	int err = -ESTALE;

	if (handle->nodeid == 0)
		goto out_err;

	inode = ilookup5(sb, handle->nodeid, fuse_inode_eq, &handle->nodeid);
719 720
	if (!inode) {
		struct fuse_entry_out outarg;
A
Al Viro 已提交
721
		const struct qstr name = QSTR_INIT(".", 1);
722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737

		if (!fc->export_support)
			goto out_err;

		err = fuse_lookup_name(sb, handle->nodeid, &name, &outarg,
				       &inode);
		if (err && err != -ENOENT)
			goto out_err;
		if (err || !inode) {
			err = -ESTALE;
			goto out_err;
		}
		err = -EIO;
		if (get_node_id(inode) != handle->nodeid)
			goto out_iput;
	}
M
Miklos Szeredi 已提交
738 739 740 741
	err = -ESTALE;
	if (inode->i_generation != handle->generation)
		goto out_iput;

742
	entry = d_obtain_alias(inode);
A
Al Viro 已提交
743
	if (!IS_ERR(entry) && get_node_id(inode) != FUSE_ROOT_ID)
M
Miklos Szeredi 已提交
744 745 746 747 748 749 750 751 752 753
		fuse_invalidate_entry_cache(entry);

	return entry;

 out_iput:
	iput(inode);
 out_err:
	return ERR_PTR(err);
}

A
Al Viro 已提交
754 755
static int fuse_encode_fh(struct inode *inode, u32 *fh, int *max_len,
			   struct inode *parent)
M
Miklos Szeredi 已提交
756
{
A
Al Viro 已提交
757
	int len = parent ? 6 : 3;
M
Miklos Szeredi 已提交
758 759 760
	u64 nodeid;
	u32 generation;

761 762
	if (*max_len < len) {
		*max_len = len;
763
		return  FILEID_INVALID;
764
	}
M
Miklos Szeredi 已提交
765 766 767 768 769 770 771 772

	nodeid = get_fuse_inode(inode)->nodeid;
	generation = inode->i_generation;

	fh[0] = (u32)(nodeid >> 32);
	fh[1] = (u32)(nodeid & 0xffffffff);
	fh[2] = generation;

A
Al Viro 已提交
773
	if (parent) {
M
Miklos Szeredi 已提交
774 775 776 777 778 779 780 781 782
		nodeid = get_fuse_inode(parent)->nodeid;
		generation = parent->i_generation;

		fh[3] = (u32)(nodeid >> 32);
		fh[4] = (u32)(nodeid & 0xffffffff);
		fh[5] = generation;
	}

	*max_len = len;
A
Al Viro 已提交
783
	return parent ? 0x82 : 0x81;
M
Miklos Szeredi 已提交
784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813
}

static struct dentry *fuse_fh_to_dentry(struct super_block *sb,
		struct fid *fid, int fh_len, int fh_type)
{
	struct fuse_inode_handle handle;

	if ((fh_type != 0x81 && fh_type != 0x82) || fh_len < 3)
		return NULL;

	handle.nodeid = (u64) fid->raw[0] << 32;
	handle.nodeid |= (u64) fid->raw[1];
	handle.generation = fid->raw[2];
	return fuse_get_dentry(sb, &handle);
}

static struct dentry *fuse_fh_to_parent(struct super_block *sb,
		struct fid *fid, int fh_len, int fh_type)
{
	struct fuse_inode_handle parent;

	if (fh_type != 0x82 || fh_len < 6)
		return NULL;

	parent.nodeid = (u64) fid->raw[3] << 32;
	parent.nodeid |= (u64) fid->raw[4];
	parent.generation = fid->raw[5];
	return fuse_get_dentry(sb, &parent);
}

814 815
static struct dentry *fuse_get_parent(struct dentry *child)
{
816
	struct inode *child_inode = d_inode(child);
817 818 819 820
	struct fuse_conn *fc = get_fuse_conn(child_inode);
	struct inode *inode;
	struct dentry *parent;
	struct fuse_entry_out outarg;
A
Al Viro 已提交
821
	const struct qstr name = QSTR_INIT("..", 2);
822 823 824 825 826 827 828
	int err;

	if (!fc->export_support)
		return ERR_PTR(-ESTALE);

	err = fuse_lookup_name(child_inode->i_sb, get_node_id(child_inode),
			       &name, &outarg, &inode);
829 830 831
	if (err) {
		if (err == -ENOENT)
			return ERR_PTR(-ESTALE);
832 833
		return ERR_PTR(err);
	}
834 835

	parent = d_obtain_alias(inode);
A
Al Viro 已提交
836
	if (!IS_ERR(parent) && get_node_id(inode) != FUSE_ROOT_ID)
837 838 839 840
		fuse_invalidate_entry_cache(parent);

	return parent;
}
M
Miklos Szeredi 已提交
841 842 843 844 845

static const struct export_operations fuse_export_operations = {
	.fh_to_dentry	= fuse_fh_to_dentry,
	.fh_to_parent	= fuse_fh_to_parent,
	.encode_fh	= fuse_encode_fh,
846
	.get_parent	= fuse_get_parent,
M
Miklos Szeredi 已提交
847 848
};

849
static const struct super_operations fuse_super_operations = {
M
Miklos Szeredi 已提交
850
	.alloc_inode    = fuse_alloc_inode,
A
Al Viro 已提交
851
	.free_inode     = fuse_free_inode,
852
	.evict_inode	= fuse_evict_inode,
M
Miklos Szeredi 已提交
853
	.write_inode	= fuse_write_inode,
M
Miklos Szeredi 已提交
854
	.drop_inode	= generic_delete_inode,
M
Miklos Szeredi 已提交
855
	.put_super	= fuse_put_super,
856
	.umount_begin	= fuse_umount_begin,
857
	.statfs		= fuse_statfs,
M
Miklos Szeredi 已提交
858 859 860
	.show_options	= fuse_show_options,
};

861 862
static void sanitize_global_limit(unsigned *limit)
{
M
Miklos Szeredi 已提交
863 864 865 866
	/*
	 * The default maximum number of async requests is calculated to consume
	 * 1/2^13 of the total memory, assuming 392 bytes per request.
	 */
867
	if (*limit == 0)
M
Miklos Szeredi 已提交
868
		*limit = ((totalram_pages() << PAGE_SHIFT) >> 13) / 392;
869 870 871 872 873

	if (*limit >= 1 << 16)
		*limit = (1 << 16) - 1;
}

874
static int set_global_limit(const char *val, const struct kernel_param *kp)
875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896
{
	int rv;

	rv = param_set_uint(val, kp);
	if (rv)
		return rv;

	sanitize_global_limit((unsigned *)kp->arg);

	return 0;
}

static void process_init_limits(struct fuse_conn *fc, struct fuse_init_out *arg)
{
	int cap_sys_admin = capable(CAP_SYS_ADMIN);

	if (arg->minor < 13)
		return;

	sanitize_global_limit(&max_user_bgreq);
	sanitize_global_limit(&max_user_congthresh);

K
Kirill Tkhai 已提交
897
	spin_lock(&fc->bg_lock);
898 899 900 901 902 903 904 905 906 907 908 909 910
	if (arg->max_background) {
		fc->max_background = arg->max_background;

		if (!cap_sys_admin && fc->max_background > max_user_bgreq)
			fc->max_background = max_user_bgreq;
	}
	if (arg->congestion_threshold) {
		fc->congestion_threshold = arg->congestion_threshold;

		if (!cap_sys_admin &&
		    fc->congestion_threshold > max_user_congthresh)
			fc->congestion_threshold = max_user_congthresh;
	}
K
Kirill Tkhai 已提交
911
	spin_unlock(&fc->bg_lock);
912 913
}

914 915 916 917 918 919 920 921
struct fuse_init_args {
	struct fuse_args args;
	struct fuse_init_in in;
	struct fuse_init_out out;
};

static void process_init_reply(struct fuse_conn *fc, struct fuse_args *args,
			       int error)
922
{
923 924
	struct fuse_init_args *ia = container_of(args, typeof(*ia), args);
	struct fuse_init_out *arg = &ia->out;
925
	bool ok = true;
926

927
	if (error || arg->major != FUSE_KERNEL_VERSION)
928
		ok = false;
929
	else {
930 931
		unsigned long ra_pages;

932 933
		process_init_limits(fc, arg);

934
		if (arg->minor >= 6) {
935
			ra_pages = arg->max_readahead / PAGE_SIZE;
936 937
			if (arg->flags & FUSE_ASYNC_READ)
				fc->async_read = 1;
938 939
			if (!(arg->flags & FUSE_POSIX_LOCKS))
				fc->no_lock = 1;
M
Miklos Szeredi 已提交
940 941 942
			if (arg->minor >= 17) {
				if (!(arg->flags & FUSE_FLOCK_LOCKS))
					fc->no_flock = 1;
M
Miklos Szeredi 已提交
943 944 945
			} else {
				if (!(arg->flags & FUSE_POSIX_LOCKS))
					fc->no_flock = 1;
M
Miklos Szeredi 已提交
946
			}
947 948
			if (arg->flags & FUSE_ATOMIC_O_TRUNC)
				fc->atomic_o_trunc = 1;
949 950 951 952 953
			if (arg->minor >= 9) {
				/* LOOKUP has dependency on proto version */
				if (arg->flags & FUSE_EXPORT_SUPPORT)
					fc->export_support = 1;
			}
954 955
			if (arg->flags & FUSE_BIG_WRITES)
				fc->big_writes = 1;
956 957
			if (arg->flags & FUSE_DONT_MASK)
				fc->dont_mask = 1;
958 959
			if (arg->flags & FUSE_AUTO_INVAL_DATA)
				fc->auto_inval_data = 1;
960 961
			else if (arg->flags & FUSE_EXPLICIT_INVAL_DATA)
				fc->explicit_inval_data = 1;
962
			if (arg->flags & FUSE_DO_READDIRPLUS) {
963
				fc->do_readdirplus = 1;
964 965 966
				if (arg->flags & FUSE_READDIRPLUS_AUTO)
					fc->readdirplus_auto = 1;
			}
967 968
			if (arg->flags & FUSE_ASYNC_DIO)
				fc->async_dio = 1;
P
Pavel Emelyanov 已提交
969 970
			if (arg->flags & FUSE_WRITEBACK_CACHE)
				fc->writeback_cache = 1;
971 972
			if (arg->flags & FUSE_PARALLEL_DIROPS)
				fc->parallel_dirops = 1;
973 974
			if (arg->flags & FUSE_HANDLE_KILLPRIV)
				fc->handle_killpriv = 1;
975 976
			if (arg->time_gran && arg->time_gran <= 1000000000)
				fc->sb->s_time_gran = arg->time_gran;
S
Seth Forshee 已提交
977
			if ((arg->flags & FUSE_POSIX_ACL)) {
M
Miklos Szeredi 已提交
978
				fc->default_permissions = 1;
S
Seth Forshee 已提交
979 980 981
				fc->posix_acl = 1;
				fc->sb->s_xattr = fuse_acl_xattr_handlers;
			}
D
Dan Schatzberg 已提交
982 983
			if (arg->flags & FUSE_CACHE_SYMLINKS)
				fc->cache_symlinks = 1;
984 985
			if (arg->flags & FUSE_ABORT_ERROR)
				fc->abort_err = 1;
986 987 988 989 990
			if (arg->flags & FUSE_MAX_PAGES) {
				fc->max_pages =
					min_t(unsigned int, FUSE_MAX_MAX_PAGES,
					max_t(unsigned int, arg->max_pages, 1));
			}
991 992 993 994 995
			if (IS_ENABLED(CONFIG_FUSE_DAX) &&
			    arg->flags & FUSE_MAP_ALIGNMENT &&
			    !fuse_dax_check_alignment(fc, arg->map_alignment)) {
				ok = false;
			}
996
		} else {
997
			ra_pages = fc->max_read / PAGE_SIZE;
998
			fc->no_lock = 1;
M
Miklos Szeredi 已提交
999
			fc->no_flock = 1;
1000
		}
1001

1002 1003
		fc->sb->s_bdi->ra_pages =
				min(fc->sb->s_bdi->ra_pages, ra_pages);
1004 1005
		fc->minor = arg->minor;
		fc->max_write = arg->minor < 5 ? 4096 : arg->max_write;
1006
		fc->max_write = max_t(unsigned, 4096, fc->max_write);
1007
		fc->conn_init = 1;
1008
	}
1009 1010
	kfree(ia);

1011 1012 1013 1014 1015
	if (!ok) {
		fc->conn_init = 0;
		fc->conn_error = 1;
	}

1016
	fuse_set_initialized(fc);
1017
	wake_up_all(&fc->blocked_waitq);
1018 1019
}

1020
void fuse_send_init(struct fuse_conn *fc)
1021
{
1022
	struct fuse_init_args *ia;
M
Miklos Szeredi 已提交
1023

1024 1025 1026 1027 1028 1029 1030
	ia = kzalloc(sizeof(*ia), GFP_KERNEL | __GFP_NOFAIL);

	ia->in.major = FUSE_KERNEL_VERSION;
	ia->in.minor = FUSE_KERNEL_MINOR_VERSION;
	ia->in.max_readahead = fc->sb->s_bdi->ra_pages * PAGE_SIZE;
	ia->in.flags |=
		FUSE_ASYNC_READ | FUSE_POSIX_LOCKS | FUSE_ATOMIC_O_TRUNC |
M
Miklos Szeredi 已提交
1031
		FUSE_EXPORT_SUPPORT | FUSE_BIG_WRITES | FUSE_DONT_MASK |
M
Miklos Szeredi 已提交
1032
		FUSE_SPLICE_WRITE | FUSE_SPLICE_MOVE | FUSE_SPLICE_READ |
1033
		FUSE_FLOCK_LOCKS | FUSE_HAS_IOCTL_DIR | FUSE_AUTO_INVAL_DATA |
P
Pavel Emelyanov 已提交
1034
		FUSE_DO_READDIRPLUS | FUSE_READDIRPLUS_AUTO | FUSE_ASYNC_DIO |
1035
		FUSE_WRITEBACK_CACHE | FUSE_NO_OPEN_SUPPORT |
1036
		FUSE_PARALLEL_DIROPS | FUSE_HANDLE_KILLPRIV | FUSE_POSIX_ACL |
1037
		FUSE_ABORT_ERROR | FUSE_MAX_PAGES | FUSE_CACHE_SYMLINKS |
1038
		FUSE_NO_OPENDIR_SUPPORT | FUSE_EXPLICIT_INVAL_DATA;
1039 1040 1041 1042
#ifdef CONFIG_FUSE_DAX
	if (fc->dax)
		ia->in.flags |= FUSE_MAP_ALIGNMENT;
#endif
1043 1044 1045 1046 1047
	ia->args.opcode = FUSE_INIT;
	ia->args.in_numargs = 1;
	ia->args.in_args[0].size = sizeof(ia->in);
	ia->args.in_args[0].value = &ia->in;
	ia->args.out_numargs = 1;
D
Daniel Mack 已提交
1048
	/* Variable length argument used for backward compatibility
1049 1050
	   with interface version < 7.5.  Rest of init_out is zeroed
	   by do_get_request(), so a short reply is not a problem */
1051
	ia->args.out_argvar = true;
1052 1053 1054 1055 1056 1057 1058 1059
	ia->args.out_args[0].size = sizeof(ia->out);
	ia->args.out_args[0].value = &ia->out;
	ia->args.force = true;
	ia->args.nocreds = true;
	ia->args.end = process_init_reply;

	if (fuse_simple_background(fc, &ia->args, GFP_KERNEL) != 0)
		process_init_reply(fc, &ia->args, -ENOTCONN);
1060
}
1061
EXPORT_SYMBOL_GPL(fuse_send_init);
1062

1063
void fuse_free_conn(struct fuse_conn *fc)
T
Tejun Heo 已提交
1064
{
1065
	WARN_ON(!list_empty(&fc->devices));
A
Al Viro 已提交
1066
	kfree_rcu(fc, rcu);
T
Tejun Heo 已提交
1067
}
1068
EXPORT_SYMBOL_GPL(fuse_free_conn);
T
Tejun Heo 已提交
1069

1070 1071 1072
static int fuse_bdi_init(struct fuse_conn *fc, struct super_block *sb)
{
	int err;
1073
	char *suffix = "";
1074

1075
	if (sb->s_bdev) {
1076
		suffix = "-fuseblk";
1077 1078 1079 1080 1081 1082 1083
		/*
		 * sb->s_bdi points to blkdev's bdi however we want to redirect
		 * it to our private bdi...
		 */
		bdi_put(sb->s_bdi);
		sb->s_bdi = &noop_backing_dev_info;
	}
1084 1085
	err = super_setup_bdi_name(sb, "%u:%u%s", MAJOR(fc->dev),
				   MINOR(fc->dev), suffix);
1086 1087 1088
	if (err)
		return err;

1089
	sb->s_bdi->ra_pages = VM_READAHEAD_PAGES;
1090 1091
	/* fuse does it's own writeback accounting */
	sb->s_bdi->capabilities = BDI_CAP_NO_ACCT_WB | BDI_CAP_STRICTLIMIT;
1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104

	/*
	 * For a single fuse filesystem use max 1% of dirty +
	 * writeback threshold.
	 *
	 * This gives about 1M of write buffer for memory maps on a
	 * machine with 1G and 10% dirty_ratio, which should be more
	 * than enough.
	 *
	 * Privileged users can raise it by writing to
	 *
	 *    /sys/class/bdi/<bdi>/max_ratio
	 */
1105
	bdi_set_max_ratio(sb->s_bdi, 1);
1106 1107 1108 1109

	return 0;
}

1110
struct fuse_dev *fuse_dev_alloc(void)
1111 1112
{
	struct fuse_dev *fud;
1113
	struct list_head *pq;
1114 1115

	fud = kzalloc(sizeof(struct fuse_dev), GFP_KERNEL);
1116 1117
	if (!fud)
		return NULL;
1118

1119 1120 1121 1122
	pq = kcalloc(FUSE_PQ_HASH_SIZE, sizeof(struct list_head), GFP_KERNEL);
	if (!pq) {
		kfree(fud);
		return NULL;
1123 1124
	}

1125 1126 1127
	fud->pq.processing = pq;
	fuse_pqueue_init(&fud->pq);

1128 1129 1130 1131 1132 1133 1134
	return fud;
}
EXPORT_SYMBOL_GPL(fuse_dev_alloc);

void fuse_dev_install(struct fuse_dev *fud, struct fuse_conn *fc)
{
	fud->fc = fuse_conn_get(fc);
1135 1136 1137
	spin_lock(&fc->lock);
	list_add_tail(&fud->entry, &fc->devices);
	spin_unlock(&fc->lock);
1138 1139
}
EXPORT_SYMBOL_GPL(fuse_dev_install);
1140

1141 1142 1143 1144 1145 1146 1147 1148 1149
struct fuse_dev *fuse_dev_alloc_install(struct fuse_conn *fc)
{
	struct fuse_dev *fud;

	fud = fuse_dev_alloc();
	if (!fud)
		return NULL;

	fuse_dev_install(fud, fc);
1150 1151
	return fud;
}
1152
EXPORT_SYMBOL_GPL(fuse_dev_alloc_install);
1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164

void fuse_dev_free(struct fuse_dev *fud)
{
	struct fuse_conn *fc = fud->fc;

	if (fc) {
		spin_lock(&fc->lock);
		list_del(&fud->entry);
		spin_unlock(&fc->lock);

		fuse_conn_put(fc);
	}
1165
	kfree(fud->pq.processing);
1166 1167 1168 1169
	kfree(fud);
}
EXPORT_SYMBOL_GPL(fuse_dev_free);

1170
int fuse_fill_super_common(struct super_block *sb, struct fuse_fs_context *ctx)
M
Miklos Szeredi 已提交
1171
{
1172
	struct fuse_dev *fud = NULL;
1173
	struct fuse_conn *fc = get_fuse_conn_super(sb);
M
Miklos Szeredi 已提交
1174
	struct inode *root;
1175
	struct dentry *root_dentry;
M
Miklos Szeredi 已提交
1176 1177
	int err;

1178
	err = -EINVAL;
1179
	if (sb->s_flags & SB_MANDLOCK)
1180
		goto err;
1181

1182
	sb->s_flags &= ~(SB_NOSEC | SB_I_VERSION);
A
Al Viro 已提交
1183

1184
	if (ctx->is_bdev) {
1185
#ifdef CONFIG_BLOCK
1186
		err = -EINVAL;
1187
		if (!sb_set_blocksize(sb, ctx->blksize))
1188
			goto err;
1189
#endif
M
Miklos Szeredi 已提交
1190
	} else {
1191 1192
		sb->s_blocksize = PAGE_SIZE;
		sb->s_blocksize_bits = PAGE_SHIFT;
M
Miklos Szeredi 已提交
1193
	}
1194 1195 1196

	sb->s_subtype = ctx->subtype;
	ctx->subtype = NULL;
M
Miklos Szeredi 已提交
1197 1198
	sb->s_magic = FUSE_SUPER_MAGIC;
	sb->s_op = &fuse_super_operations;
S
Seth Forshee 已提交
1199
	sb->s_xattr = fuse_xattr_handlers;
M
Miklos Szeredi 已提交
1200
	sb->s_maxbytes = MAX_LFS_FILESIZE;
1201
	sb->s_time_gran = 1;
M
Miklos Szeredi 已提交
1202
	sb->s_export_op = &fuse_export_operations;
1203 1204 1205
	sb->s_iflags |= SB_I_IMA_UNVERIFIABLE_SIGNATURE;
	if (sb->s_user_ns != &init_user_ns)
		sb->s_iflags |= SB_I_UNTRUSTED_MOUNTER;
M
Miklos Szeredi 已提交
1206

1207 1208 1209 1210 1211 1212 1213
	/*
	 * If we are not in the initial user namespace posix
	 * acls must be translated.
	 */
	if (sb->s_user_ns != &init_user_ns)
		sb->s_xattr = fuse_no_acl_xattr_handlers;

1214 1215 1216 1217 1218 1219
	if (IS_ENABLED(CONFIG_FUSE_DAX)) {
		err = fuse_dax_conn_alloc(fc, ctx->dax_dev);
		if (err)
			goto err;
	}

1220 1221 1222 1223
	if (ctx->fudptr) {
		err = -ENOMEM;
		fud = fuse_dev_alloc_install(fc);
		if (!fud)
1224
			goto err_free_dax;
1225
	}
1226

1227
	fc->dev = sb->s_dev;
J
John Muir 已提交
1228
	fc->sb = sb;
1229 1230
	err = fuse_bdi_init(fc, sb);
	if (err)
1231
		goto err_dev_free;
1232

1233
	/* Handle umasking inside the fuse code */
1234
	if (sb->s_flags & SB_POSIXACL)
1235
		fc->dont_mask = 1;
1236
	sb->s_flags |= SB_POSIXACL;
1237

1238 1239 1240 1241
	fc->default_permissions = ctx->default_permissions;
	fc->allow_other = ctx->allow_other;
	fc->user_id = ctx->user_id;
	fc->group_id = ctx->group_id;
1242
	fc->legacy_opts_show = ctx->legacy_opts_show;
1243
	fc->max_read = max_t(unsigned, 4096, ctx->max_read);
1244
	fc->destroy = ctx->destroy;
1245 1246
	fc->no_control = ctx->no_control;
	fc->no_force_umount = ctx->no_force_umount;
1247

M
Miklos Szeredi 已提交
1248
	err = -ENOMEM;
1249
	root = fuse_get_root_inode(sb, ctx->rootmode);
1250
	sb->s_d_op = &fuse_root_dentry_operations;
1251 1252
	root_dentry = d_make_root(root);
	if (!root_dentry)
1253
		goto err_dev_free;
1254
	/* Root dentry doesn't have .d_revalidate */
A
Al Viro 已提交
1255
	sb->s_d_op = &fuse_dentry_operations;
1256

1257
	mutex_lock(&fuse_mutex);
1258
	err = -EINVAL;
1259
	if (ctx->fudptr && *ctx->fudptr)
1260
		goto err_unlock;
1261

1262 1263 1264 1265 1266
	err = fuse_ctl_add_conn(fc);
	if (err)
		goto err_unlock;

	list_add_tail(&fc->entry, &fuse_conn_list);
1267
	sb->s_root = root_dentry;
1268 1269
	if (ctx->fudptr)
		*ctx->fudptr = fud;
1270
	mutex_unlock(&fuse_mutex);
1271 1272 1273 1274 1275 1276
	return 0;

 err_unlock:
	mutex_unlock(&fuse_mutex);
	dput(root_dentry);
 err_dev_free:
1277 1278
	if (fud)
		fuse_dev_free(fud);
1279 1280 1281
 err_free_dax:
	if (IS_ENABLED(CONFIG_FUSE_DAX))
		fuse_dax_conn_free(fc);
1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312
 err:
	return err;
}
EXPORT_SYMBOL_GPL(fuse_fill_super_common);

static int fuse_fill_super(struct super_block *sb, struct fs_context *fsc)
{
	struct fuse_fs_context *ctx = fsc->fs_private;
	struct file *file;
	int err;
	struct fuse_conn *fc;

	err = -EINVAL;
	file = fget(ctx->fd);
	if (!file)
		goto err;

	/*
	 * Require mount to happen from the same user namespace which
	 * opened /dev/fuse to prevent potential attacks.
	 */
	if ((file->f_op != &fuse_dev_operations) ||
	    (file->f_cred->user_ns != sb->s_user_ns))
		goto err_fput;
	ctx->fudptr = &file->private_data;

	fc = kmalloc(sizeof(*fc), GFP_KERNEL);
	err = -ENOMEM;
	if (!fc)
		goto err_fput;

1313
	fuse_conn_init(fc, sb->s_user_ns, &fuse_dev_fiq_ops, NULL);
1314 1315 1316 1317 1318 1319
	fc->release = fuse_free_conn;
	sb->s_fs_info = fc;

	err = fuse_fill_super_common(sb, ctx);
	if (err)
		goto err_put_conn;
M
Miklos Szeredi 已提交
1320 1321 1322 1323 1324 1325
	/*
	 * atomic_dec_and_test() in fput() provides the necessary
	 * memory barrier for file->private_data to be visible on all
	 * CPUs after this
	 */
	fput(file);
1326
	fuse_send_init(get_fuse_conn_super(sb));
M
Miklos Szeredi 已提交
1327 1328
	return 0;

1329
 err_put_conn:
1330
	fuse_conn_put(fc);
1331
	sb->s_fs_info = NULL;
1332 1333 1334
 err_fput:
	fput(file);
 err:
M
Miklos Szeredi 已提交
1335 1336 1337
	return err;
}

1338
static int fuse_get_tree(struct fs_context *fc)
M
Miklos Szeredi 已提交
1339
{
1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356
	struct fuse_fs_context *ctx = fc->fs_private;

	if (!ctx->fd_present || !ctx->rootmode_present ||
	    !ctx->user_id_present || !ctx->group_id_present)
		return -EINVAL;

#ifdef CONFIG_BLOCK
	if (ctx->is_bdev)
		return get_tree_bdev(fc, fuse_fill_super);
#endif

	return get_tree_nodev(fc, fuse_fill_super);
}

static const struct fs_context_operations fuse_context_ops = {
	.free		= fuse_free_fc,
	.parse_param	= fuse_parse_param,
1357
	.reconfigure	= fuse_reconfigure,
1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373
	.get_tree	= fuse_get_tree,
};

/*
 * Set up the filesystem mount context.
 */
static int fuse_init_fs_context(struct fs_context *fc)
{
	struct fuse_fs_context *ctx;

	ctx = kzalloc(sizeof(struct fuse_fs_context), GFP_KERNEL);
	if (!ctx)
		return -ENOMEM;

	ctx->max_read = ~0;
	ctx->blksize = FUSE_DEFAULT_BLKSIZE;
1374
	ctx->legacy_opts_show = true;
1375 1376

#ifdef CONFIG_BLOCK
1377
	if (fc->fs_type == &fuseblk_fs_type) {
1378
		ctx->is_bdev = true;
1379 1380
		ctx->destroy = true;
	}
1381 1382 1383 1384 1385
#endif

	fc->fs_private = ctx;
	fc->ops = &fuse_context_ops;
	return 0;
M
Miklos Szeredi 已提交
1386 1387
}

1388
static void fuse_sb_destroy(struct super_block *sb)
J
John Muir 已提交
1389 1390 1391 1392
{
	struct fuse_conn *fc = get_fuse_conn_super(sb);

	if (fc) {
1393 1394
		if (fc->destroy)
			fuse_send_destroy(fc);
1395

M
Miklos Szeredi 已提交
1396
		fuse_abort_conn(fc);
1397 1398
		fuse_wait_aborted(fc);

J
John Muir 已提交
1399 1400 1401 1402
		down_write(&fc->killsb);
		fc->sb = NULL;
		up_write(&fc->killsb);
	}
1403
}
J
John Muir 已提交
1404

1405
void fuse_kill_sb_anon(struct super_block *sb)
1406 1407
{
	fuse_sb_destroy(sb);
J
John Muir 已提交
1408 1409
	kill_anon_super(sb);
}
1410
EXPORT_SYMBOL_GPL(fuse_kill_sb_anon);
J
John Muir 已提交
1411

1412 1413 1414
static struct file_system_type fuse_fs_type = {
	.owner		= THIS_MODULE,
	.name		= "fuse",
1415
	.fs_flags	= FS_HAS_SUBTYPE | FS_USERNS_MOUNT,
1416
	.init_fs_context = fuse_init_fs_context,
1417
	.parameters	= fuse_fs_parameters,
J
John Muir 已提交
1418
	.kill_sb	= fuse_kill_sb_anon,
1419
};
1420
MODULE_ALIAS_FS("fuse");
1421 1422

#ifdef CONFIG_BLOCK
J
John Muir 已提交
1423 1424
static void fuse_kill_sb_blk(struct super_block *sb)
{
1425
	fuse_sb_destroy(sb);
J
John Muir 已提交
1426 1427 1428
	kill_block_super(sb);
}

1429 1430 1431
static struct file_system_type fuseblk_fs_type = {
	.owner		= THIS_MODULE,
	.name		= "fuseblk",
1432
	.init_fs_context = fuse_init_fs_context,
1433
	.parameters	= fuse_fs_parameters,
J
John Muir 已提交
1434
	.kill_sb	= fuse_kill_sb_blk,
A
Alexey Dobriyan 已提交
1435
	.fs_flags	= FS_REQUIRES_DEV | FS_HAS_SUBTYPE,
1436
};
1437
MODULE_ALIAS_FS("fuseblk");
1438

1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458
static inline int register_fuseblk(void)
{
	return register_filesystem(&fuseblk_fs_type);
}

static inline void unregister_fuseblk(void)
{
	unregister_filesystem(&fuseblk_fs_type);
}
#else
static inline int register_fuseblk(void)
{
	return 0;
}

static inline void unregister_fuseblk(void)
{
}
#endif

1459
static void fuse_inode_init_once(void *foo)
M
Miklos Szeredi 已提交
1460
{
M
Miklos Szeredi 已提交
1461
	struct inode *inode = foo;
M
Miklos Szeredi 已提交
1462

C
Christoph Lameter 已提交
1463
	inode_init_once(inode);
M
Miklos Szeredi 已提交
1464 1465 1466 1467 1468 1469
}

static int __init fuse_fs_init(void)
{
	int err;

1470
	fuse_inode_cachep = kmem_cache_create("fuse_inode",
1471 1472 1473
			sizeof(struct fuse_inode), 0,
			SLAB_HWCACHE_ALIGN|SLAB_ACCOUNT|SLAB_RECLAIM_ACCOUNT,
			fuse_inode_init_once);
1474 1475
	err = -ENOMEM;
	if (!fuse_inode_cachep)
1476 1477 1478 1479 1480 1481 1482 1483 1484
		goto out;

	err = register_fuseblk();
	if (err)
		goto out2;

	err = register_filesystem(&fuse_fs_type);
	if (err)
		goto out3;
1485 1486

	return 0;
M
Miklos Szeredi 已提交
1487

1488
 out3:
1489
	unregister_fuseblk();
1490 1491
 out2:
	kmem_cache_destroy(fuse_inode_cachep);
1492
 out:
M
Miklos Szeredi 已提交
1493 1494 1495 1496 1497 1498
	return err;
}

static void fuse_fs_cleanup(void)
{
	unregister_filesystem(&fuse_fs_type);
1499
	unregister_fuseblk();
1500 1501 1502 1503 1504 1505

	/*
	 * Make sure all delayed rcu free inodes are flushed before we
	 * destroy cache.
	 */
	rcu_barrier();
M
Miklos Szeredi 已提交
1506 1507 1508
	kmem_cache_destroy(fuse_inode_cachep);
}

1509 1510
static struct kobject *fuse_kobj;

1511 1512 1513 1514
static int fuse_sysfs_init(void)
{
	int err;

1515
	fuse_kobj = kobject_create_and_add("fuse", fs_kobj);
1516 1517
	if (!fuse_kobj) {
		err = -ENOMEM;
1518
		goto out_err;
1519
	}
1520

1521 1522
	err = sysfs_create_mount_point(fuse_kobj, "connections");
	if (err)
1523 1524 1525 1526 1527
		goto out_fuse_unregister;

	return 0;

 out_fuse_unregister:
1528
	kobject_put(fuse_kobj);
1529 1530 1531 1532 1533 1534
 out_err:
	return err;
}

static void fuse_sysfs_cleanup(void)
{
1535
	sysfs_remove_mount_point(fuse_kobj, "connections");
1536
	kobject_put(fuse_kobj);
1537 1538
}

M
Miklos Szeredi 已提交
1539 1540 1541 1542
static int __init fuse_init(void)
{
	int res;

K
Kirill Smelkov 已提交
1543 1544
	pr_info("init (API version %i.%i)\n",
		FUSE_KERNEL_VERSION, FUSE_KERNEL_MINOR_VERSION);
M
Miklos Szeredi 已提交
1545

1546
	INIT_LIST_HEAD(&fuse_conn_list);
M
Miklos Szeredi 已提交
1547 1548 1549 1550
	res = fuse_fs_init();
	if (res)
		goto err;

M
Miklos Szeredi 已提交
1551 1552 1553 1554
	res = fuse_dev_init();
	if (res)
		goto err_fs_cleanup;

1555 1556 1557 1558
	res = fuse_sysfs_init();
	if (res)
		goto err_dev_cleanup;

1559 1560 1561 1562
	res = fuse_ctl_init();
	if (res)
		goto err_sysfs_cleanup;

1563 1564 1565
	sanitize_global_limit(&max_user_bgreq);
	sanitize_global_limit(&max_user_congthresh);

M
Miklos Szeredi 已提交
1566 1567
	return 0;

1568 1569
 err_sysfs_cleanup:
	fuse_sysfs_cleanup();
1570 1571
 err_dev_cleanup:
	fuse_dev_cleanup();
M
Miklos Szeredi 已提交
1572 1573
 err_fs_cleanup:
	fuse_fs_cleanup();
M
Miklos Szeredi 已提交
1574 1575 1576 1577 1578 1579
 err:
	return res;
}

static void __exit fuse_exit(void)
{
K
Kirill Smelkov 已提交
1580
	pr_debug("exit\n");
M
Miklos Szeredi 已提交
1581

1582
	fuse_ctl_cleanup();
1583
	fuse_sysfs_cleanup();
M
Miklos Szeredi 已提交
1584
	fuse_fs_cleanup();
M
Miklos Szeredi 已提交
1585
	fuse_dev_cleanup();
M
Miklos Szeredi 已提交
1586 1587 1588 1589
}

module_init(fuse_init);
module_exit(fuse_exit);