inode.c 35.8 KB
Newer Older
M
Miklos Szeredi 已提交
1 2
/*
  FUSE: Filesystem in Userspace
M
Miklos Szeredi 已提交
3
  Copyright (C) 2001-2008  Miklos Szeredi <miklos@szeredi.hu>
M
Miklos Szeredi 已提交
4 5 6 7 8 9 10 11 12 13 14 15 16

  This program can be distributed under the terms of the GNU GPL.
  See the file COPYING.
*/

#include "fuse_i.h"

#include <linux/pagemap.h>
#include <linux/slab.h>
#include <linux/file.h>
#include <linux/seq_file.h>
#include <linux/init.h>
#include <linux/module.h>
17
#include <linux/moduleparam.h>
18 19
#include <linux/fs_context.h>
#include <linux/fs_parser.h>
M
Miklos Szeredi 已提交
20
#include <linux/statfs.h>
21
#include <linux/random.h>
A
Alexey Dobriyan 已提交
22
#include <linux/sched.h>
M
Miklos Szeredi 已提交
23
#include <linux/exportfs.h>
S
Seth Forshee 已提交
24
#include <linux/posix_acl.h>
25
#include <linux/pid_namespace.h>
M
Miklos Szeredi 已提交
26 27 28 29 30

MODULE_AUTHOR("Miklos Szeredi <miklos@szeredi.hu>");
MODULE_DESCRIPTION("Filesystem in Userspace");
MODULE_LICENSE("GPL");

31
static struct kmem_cache *fuse_inode_cachep;
32 33
struct list_head fuse_conn_list;
DEFINE_MUTEX(fuse_mutex);
M
Miklos Szeredi 已提交
34

35
static int set_global_limit(const char *val, const struct kernel_param *kp);
36

37
unsigned max_user_bgreq;
38 39 40 41 42 43 44
module_param_call(max_user_bgreq, set_global_limit, param_get_uint,
		  &max_user_bgreq, 0644);
__MODULE_PARM_TYPE(max_user_bgreq, "uint");
MODULE_PARM_DESC(max_user_bgreq,
 "Global limit for the maximum number of backgrounded requests an "
 "unprivileged user can set");

45
unsigned max_user_congthresh;
46 47 48 49 50 51 52
module_param_call(max_user_congthresh, set_global_limit, param_get_uint,
		  &max_user_congthresh, 0644);
__MODULE_PARM_TYPE(max_user_congthresh, "uint");
MODULE_PARM_DESC(max_user_congthresh,
 "Global limit for the maximum congestion threshold an "
 "unprivileged user can set");

M
Miklos Szeredi 已提交
53 54
#define FUSE_SUPER_MAGIC 0x65735546

M
Miklos Szeredi 已提交
55 56
#define FUSE_DEFAULT_BLKSIZE 512

57 58 59 60 61 62
/** Maximum number of outstanding background requests */
#define FUSE_DEFAULT_MAX_BACKGROUND 12

/** Congestion starts at 75% of maximum */
#define FUSE_DEFAULT_CONGESTION_THRESHOLD (FUSE_DEFAULT_MAX_BACKGROUND * 3 / 4)

63 64 65 66
#ifdef CONFIG_BLOCK
static struct file_system_type fuseblk_fs_type;
#endif

67
struct fuse_forget_link *fuse_alloc_forget(void)
68
{
69
	return kzalloc(sizeof(struct fuse_forget_link), GFP_KERNEL_ACCOUNT);
70 71
}

M
Miklos Szeredi 已提交
72 73 74 75
static struct inode *fuse_alloc_inode(struct super_block *sb)
{
	struct fuse_inode *fi;

Z
zhangliguang 已提交
76 77
	fi = kmem_cache_alloc(fuse_inode_cachep, GFP_KERNEL);
	if (!fi)
M
Miklos Szeredi 已提交
78 79
		return NULL;

M
Miklos Szeredi 已提交
80
	fi->i_time = 0;
81
	fi->inval_mask = 0;
M
Miklos Szeredi 已提交
82
	fi->nodeid = 0;
83
	fi->nlookup = 0;
84
	fi->attr_version = 0;
85
	fi->orig_ino = 0;
86
	fi->state = 0;
87
	mutex_init(&fi->mutex);
88
	spin_lock_init(&fi->lock);
89 90
	fi->forget = fuse_alloc_forget();
	if (!fi->forget) {
Z
zhangliguang 已提交
91
		kmem_cache_free(fuse_inode_cachep, fi);
92 93
		return NULL;
	}
M
Miklos Szeredi 已提交
94

Z
zhangliguang 已提交
95
	return &fi->inode;
M
Miklos Szeredi 已提交
96 97
}

A
Al Viro 已提交
98
static void fuse_free_inode(struct inode *inode)
M
Miklos Szeredi 已提交
99
{
100
	struct fuse_inode *fi = get_fuse_inode(inode);
A
Al Viro 已提交
101

102
	mutex_destroy(&fi->mutex);
103
	kfree(fi->forget);
A
Al Viro 已提交
104
	kmem_cache_free(fuse_inode_cachep, fi);
M
Miklos Szeredi 已提交
105 106
}

107
static void fuse_evict_inode(struct inode *inode)
M
Miklos Szeredi 已提交
108
{
A
Al Viro 已提交
109 110
	struct fuse_inode *fi = get_fuse_inode(inode);

111
	truncate_inode_pages_final(&inode->i_data);
112
	clear_inode(inode);
113
	if (inode->i_sb->s_flags & SB_ACTIVE) {
M
Miklos Szeredi 已提交
114
		struct fuse_conn *fc = get_fuse_conn(inode);
115 116
		fuse_queue_forget(fc, fi->forget, fi->nodeid, fi->nlookup);
		fi->forget = NULL;
117
	}
A
Al Viro 已提交
118 119 120 121
	if (S_ISREG(inode->i_mode) && !is_bad_inode(inode)) {
		WARN_ON(!list_empty(&fi->write_files));
		WARN_ON(!list_empty(&fi->queued_writes));
	}
M
Miklos Szeredi 已提交
122 123
}

124
static int fuse_reconfigure(struct fs_context *fc)
125
{
126 127
	struct super_block *sb = fc->root->d_sb;

128
	sync_filesystem(sb);
129
	if (fc->sb_flags & SB_MANDLOCK)
130 131 132 133 134
		return -EINVAL;

	return 0;
}

135 136 137 138 139 140 141 142 143 144 145 146
/*
 * ino_t is 32-bits on 32-bit arch. We have to squash the 64-bit value down
 * so that it will fit.
 */
static ino_t fuse_squash_ino(u64 ino64)
{
	ino_t ino = (ino_t) ino64;
	if (sizeof(ino_t) < sizeof(u64))
		ino ^= ino64 >> (sizeof(u64) - sizeof(ino_t)) * 8;
	return ino;
}

M
Miklos Szeredi 已提交
147 148
void fuse_change_attributes_common(struct inode *inode, struct fuse_attr *attr,
				   u64 attr_valid)
M
Miklos Szeredi 已提交
149
{
M
Miklos Szeredi 已提交
150
	struct fuse_conn *fc = get_fuse_conn(inode);
151
	struct fuse_inode *fi = get_fuse_inode(inode);
M
Miklos Szeredi 已提交
152

153 154
	lockdep_assert_held(&fi->lock);

155
	fi->attr_version = atomic64_inc_return(&fc->attr_version);
156
	fi->i_time = attr_valid;
157
	WRITE_ONCE(fi->inval_mask, 0);
158

159
	inode->i_ino     = fuse_squash_ino(attr->ino);
160
	inode->i_mode    = (inode->i_mode & S_IFMT) | (attr->mode & 07777);
M
Miklos Szeredi 已提交
161
	set_nlink(inode, attr->nlink);
162 163
	inode->i_uid     = make_kuid(fc->user_ns, attr->uid);
	inode->i_gid     = make_kgid(fc->user_ns, attr->gid);
M
Miklos Szeredi 已提交
164 165 166
	inode->i_blocks  = attr->blocks;
	inode->i_atime.tv_sec   = attr->atime;
	inode->i_atime.tv_nsec  = attr->atimensec;
M
Maxim Patlasov 已提交
167 168 169 170
	/* mtime from server may be stale due to local buffered write */
	if (!fc->writeback_cache || !S_ISREG(inode->i_mode)) {
		inode->i_mtime.tv_sec   = attr->mtime;
		inode->i_mtime.tv_nsec  = attr->mtimensec;
M
Maxim Patlasov 已提交
171 172
		inode->i_ctime.tv_sec   = attr->ctime;
		inode->i_ctime.tv_nsec  = attr->ctimensec;
M
Maxim Patlasov 已提交
173
	}
174

175 176 177 178 179
	if (attr->blksize != 0)
		inode->i_blkbits = ilog2(attr->blksize);
	else
		inode->i_blkbits = inode->i_sb->s_blocksize_bits;

180 181 182 183 184 185
	/*
	 * Don't set the sticky bit in i_mode, unless we want the VFS
	 * to check permissions.  This prevents failures due to the
	 * check in may_delete().
	 */
	fi->orig_i_mode = inode->i_mode;
M
Miklos Szeredi 已提交
186
	if (!fc->default_permissions)
187
		inode->i_mode &= ~S_ISVTX;
188 189

	fi->orig_ino = attr->ino;
M
Miklos Szeredi 已提交
190 191 192 193 194 195 196
}

void fuse_change_attributes(struct inode *inode, struct fuse_attr *attr,
			    u64 attr_valid, u64 attr_version)
{
	struct fuse_conn *fc = get_fuse_conn(inode);
	struct fuse_inode *fi = get_fuse_inode(inode);
P
Pavel Emelyanov 已提交
197
	bool is_wb = fc->writeback_cache;
M
Miklos Szeredi 已提交
198
	loff_t oldsize;
199
	struct timespec64 old_mtime;
M
Miklos Szeredi 已提交
200

201
	spin_lock(&fi->lock);
202 203
	if ((attr_version != 0 && fi->attr_version > attr_version) ||
	    test_bit(FUSE_I_SIZE_UNSTABLE, &fi->state)) {
204
		spin_unlock(&fi->lock);
M
Miklos Szeredi 已提交
205 206 207
		return;
	}

208
	old_mtime = inode->i_mtime;
M
Miklos Szeredi 已提交
209
	fuse_change_attributes_common(inode, attr, attr_valid);
210

211
	oldsize = inode->i_size;
P
Pavel Emelyanov 已提交
212 213 214 215 216 217 218
	/*
	 * In case of writeback_cache enabled, the cached writes beyond EOF
	 * extend local i_size without keeping userspace server in sync. So,
	 * attr->size coming from server can be stale. We cannot trust it.
	 */
	if (!is_wb || !S_ISREG(inode->i_mode))
		i_size_write(inode, attr->size);
219
	spin_unlock(&fi->lock);
220

P
Pavel Emelyanov 已提交
221
	if (!is_wb && S_ISREG(inode->i_mode)) {
222 223 224
		bool inval = false;

		if (oldsize != attr->size) {
225
			truncate_pagecache(inode, attr->size);
226 227
			if (!fc->explicit_inval_data)
				inval = true;
228
		} else if (fc->auto_inval_data) {
229
			struct timespec64 new_mtime = {
230 231 232 233 234 235 236 237
				.tv_sec = attr->mtime,
				.tv_nsec = attr->mtimensec,
			};

			/*
			 * Auto inval mode also checks and invalidates if mtime
			 * has changed.
			 */
238
			if (!timespec64_equal(&old_mtime, &new_mtime))
239 240 241 242 243
				inval = true;
		}

		if (inval)
			invalidate_inode_pages2(inode->i_mapping);
244
	}
M
Miklos Szeredi 已提交
245 246 247 248 249
}

static void fuse_init_inode(struct inode *inode, struct fuse_attr *attr)
{
	inode->i_mode = attr->mode & S_IFMT;
M
Miklos Szeredi 已提交
250
	inode->i_size = attr->size;
M
Maxim Patlasov 已提交
251 252
	inode->i_mtime.tv_sec  = attr->mtime;
	inode->i_mtime.tv_nsec = attr->mtimensec;
M
Maxim Patlasov 已提交
253 254
	inode->i_ctime.tv_sec  = attr->ctime;
	inode->i_ctime.tv_nsec = attr->ctimensec;
255 256
	if (S_ISREG(inode->i_mode)) {
		fuse_init_common(inode);
M
Miklos Szeredi 已提交
257
		fuse_init_file_inode(inode);
258 259 260 261 262 263 264 265 266
	} else if (S_ISDIR(inode->i_mode))
		fuse_init_dir(inode);
	else if (S_ISLNK(inode->i_mode))
		fuse_init_symlink(inode);
	else if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode) ||
		 S_ISFIFO(inode->i_mode) || S_ISSOCK(inode->i_mode)) {
		fuse_init_common(inode);
		init_special_inode(inode, inode->i_mode,
				   new_decode_dev(attr->rdev));
267 268
	} else
		BUG();
M
Miklos Szeredi 已提交
269 270
}

J
John Muir 已提交
271
int fuse_inode_eq(struct inode *inode, void *_nodeidp)
M
Miklos Szeredi 已提交
272
{
M
Miklos Szeredi 已提交
273
	u64 nodeid = *(u64 *) _nodeidp;
M
Miklos Szeredi 已提交
274 275 276 277 278 279 280 281
	if (get_node_id(inode) == nodeid)
		return 1;
	else
		return 0;
}

static int fuse_inode_set(struct inode *inode, void *_nodeidp)
{
M
Miklos Szeredi 已提交
282
	u64 nodeid = *(u64 *) _nodeidp;
M
Miklos Szeredi 已提交
283 284 285 286
	get_fuse_inode(inode)->nodeid = nodeid;
	return 0;
}

M
Miklos Szeredi 已提交
287
struct inode *fuse_iget(struct super_block *sb, u64 nodeid,
288 289
			int generation, struct fuse_attr *attr,
			u64 attr_valid, u64 attr_version)
M
Miklos Szeredi 已提交
290 291
{
	struct inode *inode;
292
	struct fuse_inode *fi;
M
Miklos Szeredi 已提交
293 294 295 296 297 298 299 300
	struct fuse_conn *fc = get_fuse_conn_super(sb);

 retry:
	inode = iget5_locked(sb, nodeid, fuse_inode_eq, fuse_inode_set, &nodeid);
	if (!inode)
		return NULL;

	if ((inode->i_state & I_NEW)) {
M
Maxim Patlasov 已提交
301
		inode->i_flags |= S_NOATIME;
302
		if (!fc->writeback_cache || !S_ISREG(attr->mode))
M
Maxim Patlasov 已提交
303
			inode->i_flags |= S_NOCMTIME;
M
Miklos Szeredi 已提交
304 305 306 307 308 309 310 311 312 313
		inode->i_generation = generation;
		fuse_init_inode(inode, attr);
		unlock_new_inode(inode);
	} else if ((inode->i_mode ^ attr->mode) & S_IFMT) {
		/* Inode has changed type, any I/O on the old should fail */
		make_bad_inode(inode);
		iput(inode);
		goto retry;
	}

314
	fi = get_fuse_inode(inode);
315
	spin_lock(&fi->lock);
M
Miklos Szeredi 已提交
316
	fi->nlookup++;
317
	spin_unlock(&fi->lock);
318 319
	fuse_change_attributes(inode, attr, attr_valid, attr_version);

M
Miklos Szeredi 已提交
320 321 322
	return inode;
}

J
John Muir 已提交
323 324 325
int fuse_reverse_inval_inode(struct super_block *sb, u64 nodeid,
			     loff_t offset, loff_t len)
{
326 327
	struct fuse_conn *fc = get_fuse_conn_super(sb);
	struct fuse_inode *fi;
J
John Muir 已提交
328 329 330 331 332 333 334 335
	struct inode *inode;
	pgoff_t pg_start;
	pgoff_t pg_end;

	inode = ilookup5(sb, nodeid, fuse_inode_eq, &nodeid);
	if (!inode)
		return -ENOENT;

336 337 338 339 340
	fi = get_fuse_inode(inode);
	spin_lock(&fi->lock);
	fi->attr_version = atomic64_inc_return(&fc->attr_version);
	spin_unlock(&fi->lock);

J
John Muir 已提交
341
	fuse_invalidate_attr(inode);
S
Seth Forshee 已提交
342
	forget_all_cached_acls(inode);
J
John Muir 已提交
343
	if (offset >= 0) {
344
		pg_start = offset >> PAGE_SHIFT;
J
John Muir 已提交
345 346 347
		if (len <= 0)
			pg_end = -1;
		else
348
			pg_end = (offset + len - 1) >> PAGE_SHIFT;
J
John Muir 已提交
349 350 351 352 353 354 355
		invalidate_inode_pages2_range(inode->i_mapping,
					      pg_start, pg_end);
	}
	iput(inode);
	return 0;
}

356
bool fuse_lock_inode(struct inode *inode)
357
{
358 359 360
	bool locked = false;

	if (!get_fuse_conn(inode)->parallel_dirops) {
361
		mutex_lock(&get_fuse_inode(inode)->mutex);
362 363 364 365
		locked = true;
	}

	return locked;
366 367
}

368
void fuse_unlock_inode(struct inode *inode, bool locked)
369
{
370
	if (locked)
371 372 373
		mutex_unlock(&get_fuse_inode(inode)->mutex);
}

374
static void fuse_umount_begin(struct super_block *sb)
375
{
376 377 378 379
	struct fuse_conn *fc = get_fuse_conn_super(sb);

	if (!fc->no_force_umount)
		fuse_abort_conn(fc);
380 381
}

382 383
static void fuse_send_destroy(struct fuse_conn *fc)
{
384 385 386 387 388 389 390
	if (fc->conn_init) {
		FUSE_ARGS(args);

		args.opcode = FUSE_DESTROY;
		args.force = true;
		args.nocreds = true;
		fuse_simple_request(fc, &args);
391 392 393
	}
}

394 395 396 397
static void fuse_put_super(struct super_block *sb)
{
	struct fuse_conn *fc = get_fuse_conn_super(sb);

M
Miklos Szeredi 已提交
398 399 400 401 402
	mutex_lock(&fuse_mutex);
	list_del(&fc->entry);
	fuse_ctl_remove_conn(fc);
	mutex_unlock(&fuse_mutex);

403
	fuse_conn_put(fc);
M
Miklos Szeredi 已提交
404 405
}

406 407 408 409
static void convert_fuse_statfs(struct kstatfs *stbuf, struct fuse_kstatfs *attr)
{
	stbuf->f_type    = FUSE_SUPER_MAGIC;
	stbuf->f_bsize   = attr->bsize;
410
	stbuf->f_frsize  = attr->frsize;
411 412 413 414 415 416 417 418 419
	stbuf->f_blocks  = attr->blocks;
	stbuf->f_bfree   = attr->bfree;
	stbuf->f_bavail  = attr->bavail;
	stbuf->f_files   = attr->files;
	stbuf->f_ffree   = attr->ffree;
	stbuf->f_namelen = attr->namelen;
	/* fsid is left zero */
}

420
static int fuse_statfs(struct dentry *dentry, struct kstatfs *buf)
421
{
422
	struct super_block *sb = dentry->d_sb;
423
	struct fuse_conn *fc = get_fuse_conn_super(sb);
424
	FUSE_ARGS(args);
425 426 427
	struct fuse_statfs_out outarg;
	int err;

428
	if (!fuse_allow_current_process(fc)) {
M
Miklos Szeredi 已提交
429 430 431 432
		buf->f_type = FUSE_SUPER_MAGIC;
		return 0;
	}

433
	memset(&outarg, 0, sizeof(outarg));
434 435 436 437 438 439
	args.in_numargs = 0;
	args.opcode = FUSE_STATFS;
	args.nodeid = get_node_id(d_inode(dentry));
	args.out_numargs = 1;
	args.out_args[0].size = sizeof(outarg);
	args.out_args[0].value = &outarg;
440
	err = fuse_simple_request(fc, &args);
441 442 443 444 445
	if (!err)
		convert_fuse_statfs(buf, &outarg.st);
	return err;
}

M
Miklos Szeredi 已提交
446
enum {
447 448
	OPT_SOURCE,
	OPT_SUBTYPE,
M
Miklos Szeredi 已提交
449 450 451
	OPT_FD,
	OPT_ROOTMODE,
	OPT_USER_ID,
452
	OPT_GROUP_ID,
M
Miklos Szeredi 已提交
453 454
	OPT_DEFAULT_PERMISSIONS,
	OPT_ALLOW_OTHER,
455
	OPT_MAX_READ,
M
Miklos Szeredi 已提交
456
	OPT_BLKSIZE,
M
Miklos Szeredi 已提交
457 458 459
	OPT_ERR
};

460
static const struct fs_parameter_spec fuse_fs_parameters[] = {
461 462 463 464 465 466 467 468 469
	fsparam_string	("source",		OPT_SOURCE),
	fsparam_u32	("fd",			OPT_FD),
	fsparam_u32oct	("rootmode",		OPT_ROOTMODE),
	fsparam_u32	("user_id",		OPT_USER_ID),
	fsparam_u32	("group_id",		OPT_GROUP_ID),
	fsparam_flag	("default_permissions",	OPT_DEFAULT_PERMISSIONS),
	fsparam_flag	("allow_other",		OPT_ALLOW_OTHER),
	fsparam_u32	("max_read",		OPT_MAX_READ),
	fsparam_u32	("blksize",		OPT_BLKSIZE),
470
	fsparam_string	("subtype",		OPT_SUBTYPE),
471 472 473 474
	{}
};

static int fuse_parse_param(struct fs_context *fc, struct fs_parameter *param)
475
{
476 477 478 479
	struct fs_parse_result result;
	struct fuse_fs_context *ctx = fc->fs_private;
	int opt;

480 481 482 483 484 485 486 487 488 489
	if (fc->purpose == FS_CONTEXT_FOR_RECONFIGURE) {
		/*
		 * Ignore options coming from mount(MS_REMOUNT) for backward
		 * compatibility.
		 */
		if (fc->oldapi)
			return 0;

		return invalfc(fc, "No changes allowed in reconfigure");
	}
490

491
	opt = fs_parse(fc, fuse_fs_parameters, param, &result);
492 493 494 495 496 497
	if (opt < 0)
		return opt;

	switch (opt) {
	case OPT_SOURCE:
		if (fc->source)
A
Al Viro 已提交
498
			return invalfc(fc, "Multiple sources specified");
499 500 501 502 503 504
		fc->source = param->string;
		param->string = NULL;
		break;

	case OPT_SUBTYPE:
		if (ctx->subtype)
A
Al Viro 已提交
505
			return invalfc(fc, "Multiple subtypes specified");
506 507 508 509 510 511
		ctx->subtype = param->string;
		param->string = NULL;
		return 0;

	case OPT_FD:
		ctx->fd = result.uint_32;
512
		ctx->fd_present = true;
513 514 515 516
		break;

	case OPT_ROOTMODE:
		if (!fuse_valid_type(result.uint_32))
A
Al Viro 已提交
517
			return invalfc(fc, "Invalid rootmode");
518
		ctx->rootmode = result.uint_32;
519
		ctx->rootmode_present = true;
520 521 522 523 524
		break;

	case OPT_USER_ID:
		ctx->user_id = make_kuid(fc->user_ns, result.uint_32);
		if (!uid_valid(ctx->user_id))
A
Al Viro 已提交
525
			return invalfc(fc, "Invalid user_id");
526
		ctx->user_id_present = true;
527 528 529 530 531
		break;

	case OPT_GROUP_ID:
		ctx->group_id = make_kgid(fc->user_ns, result.uint_32);
		if (!gid_valid(ctx->group_id))
A
Al Viro 已提交
532
			return invalfc(fc, "Invalid group_id");
533
		ctx->group_id_present = true;
534 535 536
		break;

	case OPT_DEFAULT_PERMISSIONS:
537
		ctx->default_permissions = true;
538 539 540
		break;

	case OPT_ALLOW_OTHER:
541
		ctx->allow_other = true;
542 543 544 545 546 547 548 549
		break;

	case OPT_MAX_READ:
		ctx->max_read = result.uint_32;
		break;

	case OPT_BLKSIZE:
		if (!ctx->is_bdev)
A
Al Viro 已提交
550
			return invalfc(fc, "blksize only supported for fuseblk");
551 552 553 554 555
		ctx->blksize = result.uint_32;
		break;

	default:
		return -EINVAL;
556
	}
557 558

	return 0;
559 560
}

561
static void fuse_free_fc(struct fs_context *fc)
M
Miklos Szeredi 已提交
562
{
563
	struct fuse_fs_context *ctx = fc->fs_private;
564

565 566 567 568
	if (ctx) {
		kfree(ctx->subtype);
		kfree(ctx);
	}
M
Miklos Szeredi 已提交
569 570
}

571
static int fuse_show_options(struct seq_file *m, struct dentry *root)
M
Miklos Szeredi 已提交
572
{
573 574
	struct super_block *sb = root->d_sb;
	struct fuse_conn *fc = get_fuse_conn_super(sb);
M
Miklos Szeredi 已提交
575

576 577 578
	if (fc->no_mount_options)
		return 0;

579 580
	seq_printf(m, ",user_id=%u", from_kuid_munged(fc->user_ns, fc->user_id));
	seq_printf(m, ",group_id=%u", from_kgid_munged(fc->user_ns, fc->group_id));
M
Miklos Szeredi 已提交
581
	if (fc->default_permissions)
M
Miklos Szeredi 已提交
582
		seq_puts(m, ",default_permissions");
M
Miklos Szeredi 已提交
583
	if (fc->allow_other)
M
Miklos Szeredi 已提交
584
		seq_puts(m, ",allow_other");
585 586
	if (fc->max_read != ~0)
		seq_printf(m, ",max_read=%u", fc->max_read);
587 588
	if (sb->s_bdev && sb->s_blocksize != FUSE_DEFAULT_BLKSIZE)
		seq_printf(m, ",blksize=%lu", sb->s_blocksize);
M
Miklos Szeredi 已提交
589 590 591
	return 0;
}

592 593 594
static void fuse_iqueue_init(struct fuse_iqueue *fiq,
			     const struct fuse_iqueue_ops *ops,
			     void *priv)
M
Miklos Szeredi 已提交
595 596
{
	memset(fiq, 0, sizeof(struct fuse_iqueue));
597
	spin_lock_init(&fiq->lock);
M
Miklos Szeredi 已提交
598 599 600 601
	init_waitqueue_head(&fiq->waitq);
	INIT_LIST_HEAD(&fiq->pending);
	INIT_LIST_HEAD(&fiq->interrupts);
	fiq->forget_list_tail = &fiq->forget_list_head;
602
	fiq->connected = 1;
603 604
	fiq->ops = ops;
	fiq->priv = priv;
M
Miklos Szeredi 已提交
605 606
}

607 608
static void fuse_pqueue_init(struct fuse_pqueue *fpq)
{
609 610
	unsigned int i;

M
Miklos Szeredi 已提交
611
	spin_lock_init(&fpq->lock);
612 613
	for (i = 0; i < FUSE_PQ_HASH_SIZE; i++)
		INIT_LIST_HEAD(&fpq->processing[i]);
614
	INIT_LIST_HEAD(&fpq->io);
615
	fpq->connected = 1;
616 617
}

618 619
void fuse_conn_init(struct fuse_conn *fc, struct user_namespace *user_ns,
		    const struct fuse_iqueue_ops *fiq_ops, void *fiq_priv)
M
Miklos Szeredi 已提交
620
{
621 622
	memset(fc, 0, sizeof(*fc));
	spin_lock_init(&fc->lock);
K
Kirill Tkhai 已提交
623
	spin_lock_init(&fc->bg_lock);
J
John Muir 已提交
624
	init_rwsem(&fc->killsb);
625
	refcount_set(&fc->count, 1);
626
	atomic_set(&fc->dev_count, 1);
627
	init_waitqueue_head(&fc->blocked_waitq);
628
	fuse_iqueue_init(&fc->iq, fiq_ops, fiq_priv);
629 630
	INIT_LIST_HEAD(&fc->bg_queue);
	INIT_LIST_HEAD(&fc->entry);
631
	INIT_LIST_HEAD(&fc->devices);
632
	atomic_set(&fc->num_waiting, 0);
633 634
	fc->max_background = FUSE_DEFAULT_MAX_BACKGROUND;
	fc->congestion_threshold = FUSE_DEFAULT_CONGESTION_THRESHOLD;
M
Miklos Szeredi 已提交
635
	atomic64_set(&fc->khctr, 0);
636
	fc->polled_files = RB_ROOT;
637
	fc->blocked = 0;
M
Maxim Patlasov 已提交
638
	fc->initialized = 0;
639
	fc->connected = 1;
640
	atomic64_set(&fc->attr_version, 1);
641
	get_random_bytes(&fc->scramble_key, sizeof(fc->scramble_key));
642
	fc->pid_ns = get_pid_ns(task_active_pid_ns(current));
643
	fc->user_ns = get_user_ns(user_ns);
M
Miklos Szeredi 已提交
644
	fc->max_pages = FUSE_DEFAULT_MAX_PAGES_PER_REQ;
M
Miklos Szeredi 已提交
645
}
646
EXPORT_SYMBOL_GPL(fuse_conn_init);
M
Miklos Szeredi 已提交
647

648 649
void fuse_conn_put(struct fuse_conn *fc)
{
650
	if (refcount_dec_and_test(&fc->count)) {
651 652 653 654
		struct fuse_iqueue *fiq = &fc->iq;

		if (fiq->ops->release)
			fiq->ops->release(fiq);
655
		put_pid_ns(fc->pid_ns);
656
		put_user_ns(fc->user_ns);
T
Tejun Heo 已提交
657
		fc->release(fc);
658
	}
659
}
660
EXPORT_SYMBOL_GPL(fuse_conn_put);
661 662 663

struct fuse_conn *fuse_conn_get(struct fuse_conn *fc)
{
664
	refcount_inc(&fc->count);
665 666
	return fc;
}
667
EXPORT_SYMBOL_GPL(fuse_conn_get);
668

669
static struct inode *fuse_get_root_inode(struct super_block *sb, unsigned mode)
M
Miklos Szeredi 已提交
670 671 672 673 674 675
{
	struct fuse_attr attr;
	memset(&attr, 0, sizeof(attr));

	attr.mode = mode;
	attr.ino = FUSE_ROOT_ID;
676
	attr.nlink = 1;
677
	return fuse_iget(sb, 1, 0, &attr, 0, 0);
M
Miklos Szeredi 已提交
678 679
}

M
Miklos Szeredi 已提交
680
struct fuse_inode_handle {
M
Miklos Szeredi 已提交
681 682 683 684 685 686 687
	u64 nodeid;
	u32 generation;
};

static struct dentry *fuse_get_dentry(struct super_block *sb,
				      struct fuse_inode_handle *handle)
{
688
	struct fuse_conn *fc = get_fuse_conn_super(sb);
M
Miklos Szeredi 已提交
689 690 691 692 693 694 695 696
	struct inode *inode;
	struct dentry *entry;
	int err = -ESTALE;

	if (handle->nodeid == 0)
		goto out_err;

	inode = ilookup5(sb, handle->nodeid, fuse_inode_eq, &handle->nodeid);
697 698
	if (!inode) {
		struct fuse_entry_out outarg;
A
Al Viro 已提交
699
		const struct qstr name = QSTR_INIT(".", 1);
700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715

		if (!fc->export_support)
			goto out_err;

		err = fuse_lookup_name(sb, handle->nodeid, &name, &outarg,
				       &inode);
		if (err && err != -ENOENT)
			goto out_err;
		if (err || !inode) {
			err = -ESTALE;
			goto out_err;
		}
		err = -EIO;
		if (get_node_id(inode) != handle->nodeid)
			goto out_iput;
	}
M
Miklos Szeredi 已提交
716 717 718 719
	err = -ESTALE;
	if (inode->i_generation != handle->generation)
		goto out_iput;

720
	entry = d_obtain_alias(inode);
A
Al Viro 已提交
721
	if (!IS_ERR(entry) && get_node_id(inode) != FUSE_ROOT_ID)
M
Miklos Szeredi 已提交
722 723 724 725 726 727 728 729 730 731
		fuse_invalidate_entry_cache(entry);

	return entry;

 out_iput:
	iput(inode);
 out_err:
	return ERR_PTR(err);
}

A
Al Viro 已提交
732 733
static int fuse_encode_fh(struct inode *inode, u32 *fh, int *max_len,
			   struct inode *parent)
M
Miklos Szeredi 已提交
734
{
A
Al Viro 已提交
735
	int len = parent ? 6 : 3;
M
Miklos Szeredi 已提交
736 737 738
	u64 nodeid;
	u32 generation;

739 740
	if (*max_len < len) {
		*max_len = len;
741
		return  FILEID_INVALID;
742
	}
M
Miklos Szeredi 已提交
743 744 745 746 747 748 749 750

	nodeid = get_fuse_inode(inode)->nodeid;
	generation = inode->i_generation;

	fh[0] = (u32)(nodeid >> 32);
	fh[1] = (u32)(nodeid & 0xffffffff);
	fh[2] = generation;

A
Al Viro 已提交
751
	if (parent) {
M
Miklos Szeredi 已提交
752 753 754 755 756 757 758 759 760
		nodeid = get_fuse_inode(parent)->nodeid;
		generation = parent->i_generation;

		fh[3] = (u32)(nodeid >> 32);
		fh[4] = (u32)(nodeid & 0xffffffff);
		fh[5] = generation;
	}

	*max_len = len;
A
Al Viro 已提交
761
	return parent ? 0x82 : 0x81;
M
Miklos Szeredi 已提交
762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791
}

static struct dentry *fuse_fh_to_dentry(struct super_block *sb,
		struct fid *fid, int fh_len, int fh_type)
{
	struct fuse_inode_handle handle;

	if ((fh_type != 0x81 && fh_type != 0x82) || fh_len < 3)
		return NULL;

	handle.nodeid = (u64) fid->raw[0] << 32;
	handle.nodeid |= (u64) fid->raw[1];
	handle.generation = fid->raw[2];
	return fuse_get_dentry(sb, &handle);
}

static struct dentry *fuse_fh_to_parent(struct super_block *sb,
		struct fid *fid, int fh_len, int fh_type)
{
	struct fuse_inode_handle parent;

	if (fh_type != 0x82 || fh_len < 6)
		return NULL;

	parent.nodeid = (u64) fid->raw[3] << 32;
	parent.nodeid |= (u64) fid->raw[4];
	parent.generation = fid->raw[5];
	return fuse_get_dentry(sb, &parent);
}

792 793
static struct dentry *fuse_get_parent(struct dentry *child)
{
794
	struct inode *child_inode = d_inode(child);
795 796 797 798
	struct fuse_conn *fc = get_fuse_conn(child_inode);
	struct inode *inode;
	struct dentry *parent;
	struct fuse_entry_out outarg;
A
Al Viro 已提交
799
	const struct qstr name = QSTR_INIT("..", 2);
800 801 802 803 804 805 806
	int err;

	if (!fc->export_support)
		return ERR_PTR(-ESTALE);

	err = fuse_lookup_name(child_inode->i_sb, get_node_id(child_inode),
			       &name, &outarg, &inode);
807 808 809
	if (err) {
		if (err == -ENOENT)
			return ERR_PTR(-ESTALE);
810 811
		return ERR_PTR(err);
	}
812 813

	parent = d_obtain_alias(inode);
A
Al Viro 已提交
814
	if (!IS_ERR(parent) && get_node_id(inode) != FUSE_ROOT_ID)
815 816 817 818
		fuse_invalidate_entry_cache(parent);

	return parent;
}
M
Miklos Szeredi 已提交
819 820 821 822 823

static const struct export_operations fuse_export_operations = {
	.fh_to_dentry	= fuse_fh_to_dentry,
	.fh_to_parent	= fuse_fh_to_parent,
	.encode_fh	= fuse_encode_fh,
824
	.get_parent	= fuse_get_parent,
M
Miklos Szeredi 已提交
825 826
};

827
static const struct super_operations fuse_super_operations = {
M
Miklos Szeredi 已提交
828
	.alloc_inode    = fuse_alloc_inode,
A
Al Viro 已提交
829
	.free_inode     = fuse_free_inode,
830
	.evict_inode	= fuse_evict_inode,
M
Miklos Szeredi 已提交
831
	.write_inode	= fuse_write_inode,
M
Miklos Szeredi 已提交
832
	.drop_inode	= generic_delete_inode,
M
Miklos Szeredi 已提交
833
	.put_super	= fuse_put_super,
834
	.umount_begin	= fuse_umount_begin,
835
	.statfs		= fuse_statfs,
M
Miklos Szeredi 已提交
836 837 838
	.show_options	= fuse_show_options,
};

839 840
static void sanitize_global_limit(unsigned *limit)
{
M
Miklos Szeredi 已提交
841 842 843 844
	/*
	 * The default maximum number of async requests is calculated to consume
	 * 1/2^13 of the total memory, assuming 392 bytes per request.
	 */
845
	if (*limit == 0)
M
Miklos Szeredi 已提交
846
		*limit = ((totalram_pages() << PAGE_SHIFT) >> 13) / 392;
847 848 849 850 851

	if (*limit >= 1 << 16)
		*limit = (1 << 16) - 1;
}

852
static int set_global_limit(const char *val, const struct kernel_param *kp)
853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874
{
	int rv;

	rv = param_set_uint(val, kp);
	if (rv)
		return rv;

	sanitize_global_limit((unsigned *)kp->arg);

	return 0;
}

static void process_init_limits(struct fuse_conn *fc, struct fuse_init_out *arg)
{
	int cap_sys_admin = capable(CAP_SYS_ADMIN);

	if (arg->minor < 13)
		return;

	sanitize_global_limit(&max_user_bgreq);
	sanitize_global_limit(&max_user_congthresh);

K
Kirill Tkhai 已提交
875
	spin_lock(&fc->bg_lock);
876 877 878 879 880 881 882 883 884 885 886 887 888
	if (arg->max_background) {
		fc->max_background = arg->max_background;

		if (!cap_sys_admin && fc->max_background > max_user_bgreq)
			fc->max_background = max_user_bgreq;
	}
	if (arg->congestion_threshold) {
		fc->congestion_threshold = arg->congestion_threshold;

		if (!cap_sys_admin &&
		    fc->congestion_threshold > max_user_congthresh)
			fc->congestion_threshold = max_user_congthresh;
	}
K
Kirill Tkhai 已提交
889
	spin_unlock(&fc->bg_lock);
890 891
}

892 893 894 895 896 897 898 899
struct fuse_init_args {
	struct fuse_args args;
	struct fuse_init_in in;
	struct fuse_init_out out;
};

static void process_init_reply(struct fuse_conn *fc, struct fuse_args *args,
			       int error)
900
{
901 902
	struct fuse_init_args *ia = container_of(args, typeof(*ia), args);
	struct fuse_init_out *arg = &ia->out;
903

904
	if (error || arg->major != FUSE_KERNEL_VERSION)
905 906
		fc->conn_error = 1;
	else {
907 908
		unsigned long ra_pages;

909 910
		process_init_limits(fc, arg);

911
		if (arg->minor >= 6) {
912
			ra_pages = arg->max_readahead / PAGE_SIZE;
913 914
			if (arg->flags & FUSE_ASYNC_READ)
				fc->async_read = 1;
915 916
			if (!(arg->flags & FUSE_POSIX_LOCKS))
				fc->no_lock = 1;
M
Miklos Szeredi 已提交
917 918 919
			if (arg->minor >= 17) {
				if (!(arg->flags & FUSE_FLOCK_LOCKS))
					fc->no_flock = 1;
M
Miklos Szeredi 已提交
920 921 922
			} else {
				if (!(arg->flags & FUSE_POSIX_LOCKS))
					fc->no_flock = 1;
M
Miklos Szeredi 已提交
923
			}
924 925
			if (arg->flags & FUSE_ATOMIC_O_TRUNC)
				fc->atomic_o_trunc = 1;
926 927 928 929 930
			if (arg->minor >= 9) {
				/* LOOKUP has dependency on proto version */
				if (arg->flags & FUSE_EXPORT_SUPPORT)
					fc->export_support = 1;
			}
931 932
			if (arg->flags & FUSE_BIG_WRITES)
				fc->big_writes = 1;
933 934
			if (arg->flags & FUSE_DONT_MASK)
				fc->dont_mask = 1;
935 936
			if (arg->flags & FUSE_AUTO_INVAL_DATA)
				fc->auto_inval_data = 1;
937 938
			else if (arg->flags & FUSE_EXPLICIT_INVAL_DATA)
				fc->explicit_inval_data = 1;
939
			if (arg->flags & FUSE_DO_READDIRPLUS) {
940
				fc->do_readdirplus = 1;
941 942 943
				if (arg->flags & FUSE_READDIRPLUS_AUTO)
					fc->readdirplus_auto = 1;
			}
944 945
			if (arg->flags & FUSE_ASYNC_DIO)
				fc->async_dio = 1;
P
Pavel Emelyanov 已提交
946 947
			if (arg->flags & FUSE_WRITEBACK_CACHE)
				fc->writeback_cache = 1;
948 949
			if (arg->flags & FUSE_PARALLEL_DIROPS)
				fc->parallel_dirops = 1;
950 951
			if (arg->flags & FUSE_HANDLE_KILLPRIV)
				fc->handle_killpriv = 1;
952 953
			if (arg->time_gran && arg->time_gran <= 1000000000)
				fc->sb->s_time_gran = arg->time_gran;
S
Seth Forshee 已提交
954
			if ((arg->flags & FUSE_POSIX_ACL)) {
M
Miklos Szeredi 已提交
955
				fc->default_permissions = 1;
S
Seth Forshee 已提交
956 957 958
				fc->posix_acl = 1;
				fc->sb->s_xattr = fuse_acl_xattr_handlers;
			}
D
Dan Schatzberg 已提交
959 960
			if (arg->flags & FUSE_CACHE_SYMLINKS)
				fc->cache_symlinks = 1;
961 962
			if (arg->flags & FUSE_ABORT_ERROR)
				fc->abort_err = 1;
963 964 965 966 967
			if (arg->flags & FUSE_MAX_PAGES) {
				fc->max_pages =
					min_t(unsigned int, FUSE_MAX_MAX_PAGES,
					max_t(unsigned int, arg->max_pages, 1));
			}
968
		} else {
969
			ra_pages = fc->max_read / PAGE_SIZE;
970
			fc->no_lock = 1;
M
Miklos Szeredi 已提交
971
			fc->no_flock = 1;
972
		}
973

974 975
		fc->sb->s_bdi->ra_pages =
				min(fc->sb->s_bdi->ra_pages, ra_pages);
976 977
		fc->minor = arg->minor;
		fc->max_write = arg->minor < 5 ? 4096 : arg->max_write;
978
		fc->max_write = max_t(unsigned, 4096, fc->max_write);
979
		fc->conn_init = 1;
980
	}
981 982
	kfree(ia);

983
	fuse_set_initialized(fc);
984
	wake_up_all(&fc->blocked_waitq);
985 986
}

987
void fuse_send_init(struct fuse_conn *fc)
988
{
989
	struct fuse_init_args *ia;
M
Miklos Szeredi 已提交
990

991 992 993 994 995 996 997
	ia = kzalloc(sizeof(*ia), GFP_KERNEL | __GFP_NOFAIL);

	ia->in.major = FUSE_KERNEL_VERSION;
	ia->in.minor = FUSE_KERNEL_MINOR_VERSION;
	ia->in.max_readahead = fc->sb->s_bdi->ra_pages * PAGE_SIZE;
	ia->in.flags |=
		FUSE_ASYNC_READ | FUSE_POSIX_LOCKS | FUSE_ATOMIC_O_TRUNC |
M
Miklos Szeredi 已提交
998
		FUSE_EXPORT_SUPPORT | FUSE_BIG_WRITES | FUSE_DONT_MASK |
M
Miklos Szeredi 已提交
999
		FUSE_SPLICE_WRITE | FUSE_SPLICE_MOVE | FUSE_SPLICE_READ |
1000
		FUSE_FLOCK_LOCKS | FUSE_HAS_IOCTL_DIR | FUSE_AUTO_INVAL_DATA |
P
Pavel Emelyanov 已提交
1001
		FUSE_DO_READDIRPLUS | FUSE_READDIRPLUS_AUTO | FUSE_ASYNC_DIO |
1002
		FUSE_WRITEBACK_CACHE | FUSE_NO_OPEN_SUPPORT |
1003
		FUSE_PARALLEL_DIROPS | FUSE_HANDLE_KILLPRIV | FUSE_POSIX_ACL |
1004
		FUSE_ABORT_ERROR | FUSE_MAX_PAGES | FUSE_CACHE_SYMLINKS |
1005
		FUSE_NO_OPENDIR_SUPPORT | FUSE_EXPLICIT_INVAL_DATA;
1006 1007 1008 1009 1010
	ia->args.opcode = FUSE_INIT;
	ia->args.in_numargs = 1;
	ia->args.in_args[0].size = sizeof(ia->in);
	ia->args.in_args[0].value = &ia->in;
	ia->args.out_numargs = 1;
D
Daniel Mack 已提交
1011
	/* Variable length argument used for backward compatibility
1012 1013
	   with interface version < 7.5.  Rest of init_out is zeroed
	   by do_get_request(), so a short reply is not a problem */
1014
	ia->args.out_argvar = true;
1015 1016 1017 1018 1019 1020 1021 1022
	ia->args.out_args[0].size = sizeof(ia->out);
	ia->args.out_args[0].value = &ia->out;
	ia->args.force = true;
	ia->args.nocreds = true;
	ia->args.end = process_init_reply;

	if (fuse_simple_background(fc, &ia->args, GFP_KERNEL) != 0)
		process_init_reply(fc, &ia->args, -ENOTCONN);
1023
}
1024
EXPORT_SYMBOL_GPL(fuse_send_init);
1025

1026
void fuse_free_conn(struct fuse_conn *fc)
T
Tejun Heo 已提交
1027
{
1028
	WARN_ON(!list_empty(&fc->devices));
A
Al Viro 已提交
1029
	kfree_rcu(fc, rcu);
T
Tejun Heo 已提交
1030
}
1031
EXPORT_SYMBOL_GPL(fuse_free_conn);
T
Tejun Heo 已提交
1032

1033 1034 1035
static int fuse_bdi_init(struct fuse_conn *fc, struct super_block *sb)
{
	int err;
1036
	char *suffix = "";
1037

1038
	if (sb->s_bdev) {
1039
		suffix = "-fuseblk";
1040 1041 1042 1043 1044 1045 1046
		/*
		 * sb->s_bdi points to blkdev's bdi however we want to redirect
		 * it to our private bdi...
		 */
		bdi_put(sb->s_bdi);
		sb->s_bdi = &noop_backing_dev_info;
	}
1047 1048
	err = super_setup_bdi_name(sb, "%u:%u%s", MAJOR(fc->dev),
				   MINOR(fc->dev), suffix);
1049 1050 1051
	if (err)
		return err;

1052
	sb->s_bdi->ra_pages = VM_READAHEAD_PAGES;
1053 1054
	/* fuse does it's own writeback accounting */
	sb->s_bdi->capabilities = BDI_CAP_NO_ACCT_WB | BDI_CAP_STRICTLIMIT;
1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067

	/*
	 * For a single fuse filesystem use max 1% of dirty +
	 * writeback threshold.
	 *
	 * This gives about 1M of write buffer for memory maps on a
	 * machine with 1G and 10% dirty_ratio, which should be more
	 * than enough.
	 *
	 * Privileged users can raise it by writing to
	 *
	 *    /sys/class/bdi/<bdi>/max_ratio
	 */
1068
	bdi_set_max_ratio(sb->s_bdi, 1);
1069 1070 1071 1072

	return 0;
}

1073
struct fuse_dev *fuse_dev_alloc(void)
1074 1075
{
	struct fuse_dev *fud;
1076
	struct list_head *pq;
1077 1078

	fud = kzalloc(sizeof(struct fuse_dev), GFP_KERNEL);
1079 1080
	if (!fud)
		return NULL;
1081

1082 1083 1084 1085
	pq = kcalloc(FUSE_PQ_HASH_SIZE, sizeof(struct list_head), GFP_KERNEL);
	if (!pq) {
		kfree(fud);
		return NULL;
1086 1087
	}

1088 1089 1090
	fud->pq.processing = pq;
	fuse_pqueue_init(&fud->pq);

1091 1092 1093 1094 1095 1096 1097
	return fud;
}
EXPORT_SYMBOL_GPL(fuse_dev_alloc);

void fuse_dev_install(struct fuse_dev *fud, struct fuse_conn *fc)
{
	fud->fc = fuse_conn_get(fc);
1098 1099 1100
	spin_lock(&fc->lock);
	list_add_tail(&fud->entry, &fc->devices);
	spin_unlock(&fc->lock);
1101 1102
}
EXPORT_SYMBOL_GPL(fuse_dev_install);
1103

1104 1105 1106 1107 1108 1109 1110 1111 1112
struct fuse_dev *fuse_dev_alloc_install(struct fuse_conn *fc)
{
	struct fuse_dev *fud;

	fud = fuse_dev_alloc();
	if (!fud)
		return NULL;

	fuse_dev_install(fud, fc);
1113 1114
	return fud;
}
1115
EXPORT_SYMBOL_GPL(fuse_dev_alloc_install);
1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127

void fuse_dev_free(struct fuse_dev *fud)
{
	struct fuse_conn *fc = fud->fc;

	if (fc) {
		spin_lock(&fc->lock);
		list_del(&fud->entry);
		spin_unlock(&fc->lock);

		fuse_conn_put(fc);
	}
1128
	kfree(fud->pq.processing);
1129 1130 1131 1132
	kfree(fud);
}
EXPORT_SYMBOL_GPL(fuse_dev_free);

1133
int fuse_fill_super_common(struct super_block *sb, struct fuse_fs_context *ctx)
M
Miklos Szeredi 已提交
1134
{
1135
	struct fuse_dev *fud = NULL;
1136
	struct fuse_conn *fc = get_fuse_conn_super(sb);
M
Miklos Szeredi 已提交
1137
	struct inode *root;
1138
	struct dentry *root_dentry;
M
Miklos Szeredi 已提交
1139 1140
	int err;

1141
	err = -EINVAL;
1142
	if (sb->s_flags & SB_MANDLOCK)
1143
		goto err;
1144

1145
	sb->s_flags &= ~(SB_NOSEC | SB_I_VERSION);
A
Al Viro 已提交
1146

1147
	if (ctx->is_bdev) {
1148
#ifdef CONFIG_BLOCK
1149
		err = -EINVAL;
1150
		if (!sb_set_blocksize(sb, ctx->blksize))
1151
			goto err;
1152
#endif
M
Miklos Szeredi 已提交
1153
	} else {
1154 1155
		sb->s_blocksize = PAGE_SIZE;
		sb->s_blocksize_bits = PAGE_SHIFT;
M
Miklos Szeredi 已提交
1156
	}
1157 1158 1159

	sb->s_subtype = ctx->subtype;
	ctx->subtype = NULL;
M
Miklos Szeredi 已提交
1160 1161
	sb->s_magic = FUSE_SUPER_MAGIC;
	sb->s_op = &fuse_super_operations;
S
Seth Forshee 已提交
1162
	sb->s_xattr = fuse_xattr_handlers;
M
Miklos Szeredi 已提交
1163
	sb->s_maxbytes = MAX_LFS_FILESIZE;
1164
	sb->s_time_gran = 1;
M
Miklos Szeredi 已提交
1165
	sb->s_export_op = &fuse_export_operations;
1166 1167 1168
	sb->s_iflags |= SB_I_IMA_UNVERIFIABLE_SIGNATURE;
	if (sb->s_user_ns != &init_user_ns)
		sb->s_iflags |= SB_I_UNTRUSTED_MOUNTER;
M
Miklos Szeredi 已提交
1169

1170 1171 1172 1173 1174 1175 1176
	/*
	 * If we are not in the initial user namespace posix
	 * acls must be translated.
	 */
	if (sb->s_user_ns != &init_user_ns)
		sb->s_xattr = fuse_no_acl_xattr_handlers;

1177 1178 1179 1180 1181 1182
	if (ctx->fudptr) {
		err = -ENOMEM;
		fud = fuse_dev_alloc_install(fc);
		if (!fud)
			goto err;
	}
1183

1184
	fc->dev = sb->s_dev;
J
John Muir 已提交
1185
	fc->sb = sb;
1186 1187
	err = fuse_bdi_init(fc, sb);
	if (err)
1188
		goto err_dev_free;
1189

1190
	/* Handle umasking inside the fuse code */
1191
	if (sb->s_flags & SB_POSIXACL)
1192
		fc->dont_mask = 1;
1193
	sb->s_flags |= SB_POSIXACL;
1194

1195 1196 1197 1198 1199
	fc->default_permissions = ctx->default_permissions;
	fc->allow_other = ctx->allow_other;
	fc->user_id = ctx->user_id;
	fc->group_id = ctx->group_id;
	fc->max_read = max_t(unsigned, 4096, ctx->max_read);
1200
	fc->destroy = ctx->destroy;
1201 1202
	fc->no_control = ctx->no_control;
	fc->no_force_umount = ctx->no_force_umount;
1203
	fc->no_mount_options = ctx->no_mount_options;
1204

M
Miklos Szeredi 已提交
1205
	err = -ENOMEM;
1206
	root = fuse_get_root_inode(sb, ctx->rootmode);
1207
	sb->s_d_op = &fuse_root_dentry_operations;
1208 1209
	root_dentry = d_make_root(root);
	if (!root_dentry)
1210
		goto err_dev_free;
1211
	/* Root dentry doesn't have .d_revalidate */
A
Al Viro 已提交
1212
	sb->s_d_op = &fuse_dentry_operations;
1213

1214
	mutex_lock(&fuse_mutex);
1215
	err = -EINVAL;
1216
	if (ctx->fudptr && *ctx->fudptr)
1217
		goto err_unlock;
1218

1219 1220 1221 1222 1223
	err = fuse_ctl_add_conn(fc);
	if (err)
		goto err_unlock;

	list_add_tail(&fc->entry, &fuse_conn_list);
1224
	sb->s_root = root_dentry;
1225 1226
	if (ctx->fudptr)
		*ctx->fudptr = fud;
1227
	mutex_unlock(&fuse_mutex);
1228 1229 1230 1231 1232 1233
	return 0;

 err_unlock:
	mutex_unlock(&fuse_mutex);
	dput(root_dentry);
 err_dev_free:
1234 1235
	if (fud)
		fuse_dev_free(fud);
1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266
 err:
	return err;
}
EXPORT_SYMBOL_GPL(fuse_fill_super_common);

static int fuse_fill_super(struct super_block *sb, struct fs_context *fsc)
{
	struct fuse_fs_context *ctx = fsc->fs_private;
	struct file *file;
	int err;
	struct fuse_conn *fc;

	err = -EINVAL;
	file = fget(ctx->fd);
	if (!file)
		goto err;

	/*
	 * Require mount to happen from the same user namespace which
	 * opened /dev/fuse to prevent potential attacks.
	 */
	if ((file->f_op != &fuse_dev_operations) ||
	    (file->f_cred->user_ns != sb->s_user_ns))
		goto err_fput;
	ctx->fudptr = &file->private_data;

	fc = kmalloc(sizeof(*fc), GFP_KERNEL);
	err = -ENOMEM;
	if (!fc)
		goto err_fput;

1267
	fuse_conn_init(fc, sb->s_user_ns, &fuse_dev_fiq_ops, NULL);
1268 1269 1270 1271 1272 1273
	fc->release = fuse_free_conn;
	sb->s_fs_info = fc;

	err = fuse_fill_super_common(sb, ctx);
	if (err)
		goto err_put_conn;
M
Miklos Szeredi 已提交
1274 1275 1276 1277 1278 1279
	/*
	 * atomic_dec_and_test() in fput() provides the necessary
	 * memory barrier for file->private_data to be visible on all
	 * CPUs after this
	 */
	fput(file);
1280
	fuse_send_init(get_fuse_conn_super(sb));
M
Miklos Szeredi 已提交
1281 1282
	return 0;

1283
 err_put_conn:
1284
	fuse_conn_put(fc);
1285
	sb->s_fs_info = NULL;
1286 1287 1288
 err_fput:
	fput(file);
 err:
M
Miklos Szeredi 已提交
1289 1290 1291
	return err;
}

1292
static int fuse_get_tree(struct fs_context *fc)
M
Miklos Szeredi 已提交
1293
{
1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310
	struct fuse_fs_context *ctx = fc->fs_private;

	if (!ctx->fd_present || !ctx->rootmode_present ||
	    !ctx->user_id_present || !ctx->group_id_present)
		return -EINVAL;

#ifdef CONFIG_BLOCK
	if (ctx->is_bdev)
		return get_tree_bdev(fc, fuse_fill_super);
#endif

	return get_tree_nodev(fc, fuse_fill_super);
}

static const struct fs_context_operations fuse_context_ops = {
	.free		= fuse_free_fc,
	.parse_param	= fuse_parse_param,
1311
	.reconfigure	= fuse_reconfigure,
1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329
	.get_tree	= fuse_get_tree,
};

/*
 * Set up the filesystem mount context.
 */
static int fuse_init_fs_context(struct fs_context *fc)
{
	struct fuse_fs_context *ctx;

	ctx = kzalloc(sizeof(struct fuse_fs_context), GFP_KERNEL);
	if (!ctx)
		return -ENOMEM;

	ctx->max_read = ~0;
	ctx->blksize = FUSE_DEFAULT_BLKSIZE;

#ifdef CONFIG_BLOCK
1330
	if (fc->fs_type == &fuseblk_fs_type) {
1331
		ctx->is_bdev = true;
1332 1333
		ctx->destroy = true;
	}
1334 1335 1336 1337 1338
#endif

	fc->fs_private = ctx;
	fc->ops = &fuse_context_ops;
	return 0;
M
Miklos Szeredi 已提交
1339 1340
}

1341
static void fuse_sb_destroy(struct super_block *sb)
J
John Muir 已提交
1342 1343 1344 1345
{
	struct fuse_conn *fc = get_fuse_conn_super(sb);

	if (fc) {
1346 1347
		if (fc->destroy)
			fuse_send_destroy(fc);
1348

M
Miklos Szeredi 已提交
1349
		fuse_abort_conn(fc);
1350 1351
		fuse_wait_aborted(fc);

J
John Muir 已提交
1352 1353 1354 1355
		down_write(&fc->killsb);
		fc->sb = NULL;
		up_write(&fc->killsb);
	}
1356
}
J
John Muir 已提交
1357

1358
void fuse_kill_sb_anon(struct super_block *sb)
1359 1360
{
	fuse_sb_destroy(sb);
J
John Muir 已提交
1361 1362
	kill_anon_super(sb);
}
1363
EXPORT_SYMBOL_GPL(fuse_kill_sb_anon);
J
John Muir 已提交
1364

1365 1366 1367
static struct file_system_type fuse_fs_type = {
	.owner		= THIS_MODULE,
	.name		= "fuse",
1368
	.fs_flags	= FS_HAS_SUBTYPE | FS_USERNS_MOUNT,
1369
	.init_fs_context = fuse_init_fs_context,
1370
	.parameters	= fuse_fs_parameters,
J
John Muir 已提交
1371
	.kill_sb	= fuse_kill_sb_anon,
1372
};
1373
MODULE_ALIAS_FS("fuse");
1374 1375

#ifdef CONFIG_BLOCK
J
John Muir 已提交
1376 1377
static void fuse_kill_sb_blk(struct super_block *sb)
{
1378
	fuse_sb_destroy(sb);
J
John Muir 已提交
1379 1380 1381
	kill_block_super(sb);
}

1382 1383 1384
static struct file_system_type fuseblk_fs_type = {
	.owner		= THIS_MODULE,
	.name		= "fuseblk",
1385
	.init_fs_context = fuse_init_fs_context,
1386
	.parameters	= fuse_fs_parameters,
J
John Muir 已提交
1387
	.kill_sb	= fuse_kill_sb_blk,
A
Alexey Dobriyan 已提交
1388
	.fs_flags	= FS_REQUIRES_DEV | FS_HAS_SUBTYPE,
1389
};
1390
MODULE_ALIAS_FS("fuseblk");
1391

1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411
static inline int register_fuseblk(void)
{
	return register_filesystem(&fuseblk_fs_type);
}

static inline void unregister_fuseblk(void)
{
	unregister_filesystem(&fuseblk_fs_type);
}
#else
static inline int register_fuseblk(void)
{
	return 0;
}

static inline void unregister_fuseblk(void)
{
}
#endif

1412
static void fuse_inode_init_once(void *foo)
M
Miklos Szeredi 已提交
1413
{
M
Miklos Szeredi 已提交
1414
	struct inode *inode = foo;
M
Miklos Szeredi 已提交
1415

C
Christoph Lameter 已提交
1416
	inode_init_once(inode);
M
Miklos Szeredi 已提交
1417 1418 1419 1420 1421 1422
}

static int __init fuse_fs_init(void)
{
	int err;

1423
	fuse_inode_cachep = kmem_cache_create("fuse_inode",
1424 1425 1426
			sizeof(struct fuse_inode), 0,
			SLAB_HWCACHE_ALIGN|SLAB_ACCOUNT|SLAB_RECLAIM_ACCOUNT,
			fuse_inode_init_once);
1427 1428
	err = -ENOMEM;
	if (!fuse_inode_cachep)
1429 1430 1431 1432 1433 1434 1435 1436 1437
		goto out;

	err = register_fuseblk();
	if (err)
		goto out2;

	err = register_filesystem(&fuse_fs_type);
	if (err)
		goto out3;
1438 1439

	return 0;
M
Miklos Szeredi 已提交
1440

1441
 out3:
1442
	unregister_fuseblk();
1443 1444
 out2:
	kmem_cache_destroy(fuse_inode_cachep);
1445
 out:
M
Miklos Szeredi 已提交
1446 1447 1448 1449 1450 1451
	return err;
}

static void fuse_fs_cleanup(void)
{
	unregister_filesystem(&fuse_fs_type);
1452
	unregister_fuseblk();
1453 1454 1455 1456 1457 1458

	/*
	 * Make sure all delayed rcu free inodes are flushed before we
	 * destroy cache.
	 */
	rcu_barrier();
M
Miklos Szeredi 已提交
1459 1460 1461
	kmem_cache_destroy(fuse_inode_cachep);
}

1462 1463
static struct kobject *fuse_kobj;

1464 1465 1466 1467
static int fuse_sysfs_init(void)
{
	int err;

1468
	fuse_kobj = kobject_create_and_add("fuse", fs_kobj);
1469 1470
	if (!fuse_kobj) {
		err = -ENOMEM;
1471
		goto out_err;
1472
	}
1473

1474 1475
	err = sysfs_create_mount_point(fuse_kobj, "connections");
	if (err)
1476 1477 1478 1479 1480
		goto out_fuse_unregister;

	return 0;

 out_fuse_unregister:
1481
	kobject_put(fuse_kobj);
1482 1483 1484 1485 1486 1487
 out_err:
	return err;
}

static void fuse_sysfs_cleanup(void)
{
1488
	sysfs_remove_mount_point(fuse_kobj, "connections");
1489
	kobject_put(fuse_kobj);
1490 1491
}

M
Miklos Szeredi 已提交
1492 1493 1494 1495
static int __init fuse_init(void)
{
	int res;

K
Kirill Smelkov 已提交
1496 1497
	pr_info("init (API version %i.%i)\n",
		FUSE_KERNEL_VERSION, FUSE_KERNEL_MINOR_VERSION);
M
Miklos Szeredi 已提交
1498

1499
	INIT_LIST_HEAD(&fuse_conn_list);
M
Miklos Szeredi 已提交
1500 1501 1502 1503
	res = fuse_fs_init();
	if (res)
		goto err;

M
Miklos Szeredi 已提交
1504 1505 1506 1507
	res = fuse_dev_init();
	if (res)
		goto err_fs_cleanup;

1508 1509 1510 1511
	res = fuse_sysfs_init();
	if (res)
		goto err_dev_cleanup;

1512 1513 1514 1515
	res = fuse_ctl_init();
	if (res)
		goto err_sysfs_cleanup;

1516 1517 1518
	sanitize_global_limit(&max_user_bgreq);
	sanitize_global_limit(&max_user_congthresh);

M
Miklos Szeredi 已提交
1519 1520
	return 0;

1521 1522
 err_sysfs_cleanup:
	fuse_sysfs_cleanup();
1523 1524
 err_dev_cleanup:
	fuse_dev_cleanup();
M
Miklos Szeredi 已提交
1525 1526
 err_fs_cleanup:
	fuse_fs_cleanup();
M
Miklos Szeredi 已提交
1527 1528 1529 1530 1531 1532
 err:
	return res;
}

static void __exit fuse_exit(void)
{
K
Kirill Smelkov 已提交
1533
	pr_debug("exit\n");
M
Miklos Szeredi 已提交
1534

1535
	fuse_ctl_cleanup();
1536
	fuse_sysfs_cleanup();
M
Miklos Szeredi 已提交
1537
	fuse_fs_cleanup();
M
Miklos Szeredi 已提交
1538
	fuse_dev_cleanup();
M
Miklos Szeredi 已提交
1539 1540 1541 1542
}

module_init(fuse_init);
module_exit(fuse_exit);