inode.c 33.1 KB
Newer Older
M
Miklos Szeredi 已提交
1 2
/*
  FUSE: Filesystem in Userspace
M
Miklos Szeredi 已提交
3
  Copyright (C) 2001-2008  Miklos Szeredi <miklos@szeredi.hu>
M
Miklos Szeredi 已提交
4 5 6 7 8 9 10 11 12 13 14 15 16

  This program can be distributed under the terms of the GNU GPL.
  See the file COPYING.
*/

#include "fuse_i.h"

#include <linux/pagemap.h>
#include <linux/slab.h>
#include <linux/file.h>
#include <linux/seq_file.h>
#include <linux/init.h>
#include <linux/module.h>
17
#include <linux/moduleparam.h>
M
Miklos Szeredi 已提交
18 19
#include <linux/parser.h>
#include <linux/statfs.h>
20
#include <linux/random.h>
A
Alexey Dobriyan 已提交
21
#include <linux/sched.h>
M
Miklos Szeredi 已提交
22
#include <linux/exportfs.h>
S
Seth Forshee 已提交
23
#include <linux/posix_acl.h>
24
#include <linux/pid_namespace.h>
M
Miklos Szeredi 已提交
25 26 27 28 29

MODULE_AUTHOR("Miklos Szeredi <miklos@szeredi.hu>");
MODULE_DESCRIPTION("Filesystem in Userspace");
MODULE_LICENSE("GPL");

30
static struct kmem_cache *fuse_inode_cachep;
31 32
struct list_head fuse_conn_list;
DEFINE_MUTEX(fuse_mutex);
M
Miklos Szeredi 已提交
33

34
static int set_global_limit(const char *val, const struct kernel_param *kp);
35

36
unsigned max_user_bgreq;
37 38 39 40 41 42 43
module_param_call(max_user_bgreq, set_global_limit, param_get_uint,
		  &max_user_bgreq, 0644);
__MODULE_PARM_TYPE(max_user_bgreq, "uint");
MODULE_PARM_DESC(max_user_bgreq,
 "Global limit for the maximum number of backgrounded requests an "
 "unprivileged user can set");

44
unsigned max_user_congthresh;
45 46 47 48 49 50 51
module_param_call(max_user_congthresh, set_global_limit, param_get_uint,
		  &max_user_congthresh, 0644);
__MODULE_PARM_TYPE(max_user_congthresh, "uint");
MODULE_PARM_DESC(max_user_congthresh,
 "Global limit for the maximum congestion threshold an "
 "unprivileged user can set");

M
Miklos Szeredi 已提交
52 53
#define FUSE_SUPER_MAGIC 0x65735546

M
Miklos Szeredi 已提交
54 55
#define FUSE_DEFAULT_BLKSIZE 512

56 57 58 59 60 61
/** Maximum number of outstanding background requests */
#define FUSE_DEFAULT_MAX_BACKGROUND 12

/** Congestion starts at 75% of maximum */
#define FUSE_DEFAULT_CONGESTION_THRESHOLD (FUSE_DEFAULT_MAX_BACKGROUND * 3 / 4)

62
struct fuse_forget_link *fuse_alloc_forget(void)
63 64 65 66
{
	return kzalloc(sizeof(struct fuse_forget_link), GFP_KERNEL);
}

M
Miklos Szeredi 已提交
67 68 69 70 71
static struct inode *fuse_alloc_inode(struct super_block *sb)
{
	struct inode *inode;
	struct fuse_inode *fi;

72
	inode = kmem_cache_alloc(fuse_inode_cachep, GFP_KERNEL);
M
Miklos Szeredi 已提交
73 74 75 76
	if (!inode)
		return NULL;

	fi = get_fuse_inode(inode);
M
Miklos Szeredi 已提交
77
	fi->i_time = 0;
M
Miklos Szeredi 已提交
78
	fi->nodeid = 0;
79
	fi->nlookup = 0;
80
	fi->attr_version = 0;
M
Miklos Szeredi 已提交
81
	fi->writectr = 0;
82
	fi->orig_ino = 0;
83
	fi->state = 0;
84
	INIT_LIST_HEAD(&fi->write_files);
M
Miklos Szeredi 已提交
85 86 87
	INIT_LIST_HEAD(&fi->queued_writes);
	INIT_LIST_HEAD(&fi->writepages);
	init_waitqueue_head(&fi->page_waitq);
88
	mutex_init(&fi->mutex);
89 90
	fi->forget = fuse_alloc_forget();
	if (!fi->forget) {
91 92 93
		kmem_cache_free(fuse_inode_cachep, inode);
		return NULL;
	}
M
Miklos Szeredi 已提交
94 95 96 97

	return inode;
}

N
Nick Piggin 已提交
98 99 100 101 102 103
static void fuse_i_callback(struct rcu_head *head)
{
	struct inode *inode = container_of(head, struct inode, i_rcu);
	kmem_cache_free(fuse_inode_cachep, inode);
}

M
Miklos Szeredi 已提交
104 105
static void fuse_destroy_inode(struct inode *inode)
{
106
	struct fuse_inode *fi = get_fuse_inode(inode);
107
	BUG_ON(!list_empty(&fi->write_files));
M
Miklos Szeredi 已提交
108
	BUG_ON(!list_empty(&fi->queued_writes));
109
	mutex_destroy(&fi->mutex);
110
	kfree(fi->forget);
N
Nick Piggin 已提交
111
	call_rcu(&inode->i_rcu, fuse_i_callback);
M
Miklos Szeredi 已提交
112 113
}

114
static void fuse_evict_inode(struct inode *inode)
M
Miklos Szeredi 已提交
115
{
116
	truncate_inode_pages_final(&inode->i_data);
117
	clear_inode(inode);
118
	if (inode->i_sb->s_flags & SB_ACTIVE) {
M
Miklos Szeredi 已提交
119
		struct fuse_conn *fc = get_fuse_conn(inode);
120
		struct fuse_inode *fi = get_fuse_inode(inode);
121 122
		fuse_queue_forget(fc, fi->forget, fi->nodeid, fi->nlookup);
		fi->forget = NULL;
123
	}
M
Miklos Szeredi 已提交
124 125
}

126 127
static int fuse_remount_fs(struct super_block *sb, int *flags, char *data)
{
128
	sync_filesystem(sb);
129
	if (*flags & SB_MANDLOCK)
130 131 132 133 134
		return -EINVAL;

	return 0;
}

135 136 137 138 139 140 141 142 143 144 145 146
/*
 * ino_t is 32-bits on 32-bit arch. We have to squash the 64-bit value down
 * so that it will fit.
 */
static ino_t fuse_squash_ino(u64 ino64)
{
	ino_t ino = (ino_t) ino64;
	if (sizeof(ino_t) < sizeof(u64))
		ino ^= ino64 >> (sizeof(u64) - sizeof(ino_t)) * 8;
	return ino;
}

M
Miklos Szeredi 已提交
147 148
void fuse_change_attributes_common(struct inode *inode, struct fuse_attr *attr,
				   u64 attr_valid)
M
Miklos Szeredi 已提交
149
{
M
Miklos Szeredi 已提交
150
	struct fuse_conn *fc = get_fuse_conn(inode);
151
	struct fuse_inode *fi = get_fuse_inode(inode);
M
Miklos Szeredi 已提交
152

153 154 155
	fi->attr_version = ++fc->attr_version;
	fi->i_time = attr_valid;

156
	inode->i_ino     = fuse_squash_ino(attr->ino);
157
	inode->i_mode    = (inode->i_mode & S_IFMT) | (attr->mode & 07777);
M
Miklos Szeredi 已提交
158
	set_nlink(inode, attr->nlink);
159 160
	inode->i_uid     = make_kuid(fc->user_ns, attr->uid);
	inode->i_gid     = make_kgid(fc->user_ns, attr->gid);
M
Miklos Szeredi 已提交
161 162 163
	inode->i_blocks  = attr->blocks;
	inode->i_atime.tv_sec   = attr->atime;
	inode->i_atime.tv_nsec  = attr->atimensec;
M
Maxim Patlasov 已提交
164 165 166 167
	/* mtime from server may be stale due to local buffered write */
	if (!fc->writeback_cache || !S_ISREG(inode->i_mode)) {
		inode->i_mtime.tv_sec   = attr->mtime;
		inode->i_mtime.tv_nsec  = attr->mtimensec;
M
Maxim Patlasov 已提交
168 169
		inode->i_ctime.tv_sec   = attr->ctime;
		inode->i_ctime.tv_nsec  = attr->ctimensec;
M
Maxim Patlasov 已提交
170
	}
171

172 173 174 175 176
	if (attr->blksize != 0)
		inode->i_blkbits = ilog2(attr->blksize);
	else
		inode->i_blkbits = inode->i_sb->s_blocksize_bits;

177 178 179 180 181 182
	/*
	 * Don't set the sticky bit in i_mode, unless we want the VFS
	 * to check permissions.  This prevents failures due to the
	 * check in may_delete().
	 */
	fi->orig_i_mode = inode->i_mode;
M
Miklos Szeredi 已提交
183
	if (!fc->default_permissions)
184
		inode->i_mode &= ~S_ISVTX;
185 186

	fi->orig_ino = attr->ino;
M
Miklos Szeredi 已提交
187 188 189 190 191 192 193
}

void fuse_change_attributes(struct inode *inode, struct fuse_attr *attr,
			    u64 attr_valid, u64 attr_version)
{
	struct fuse_conn *fc = get_fuse_conn(inode);
	struct fuse_inode *fi = get_fuse_inode(inode);
P
Pavel Emelyanov 已提交
194
	bool is_wb = fc->writeback_cache;
M
Miklos Szeredi 已提交
195
	loff_t oldsize;
196
	struct timespec64 old_mtime;
M
Miklos Szeredi 已提交
197 198

	spin_lock(&fc->lock);
199 200
	if ((attr_version != 0 && fi->attr_version > attr_version) ||
	    test_bit(FUSE_I_SIZE_UNSTABLE, &fi->state)) {
M
Miklos Szeredi 已提交
201 202 203 204
		spin_unlock(&fc->lock);
		return;
	}

205
	old_mtime = inode->i_mtime;
M
Miklos Szeredi 已提交
206
	fuse_change_attributes_common(inode, attr, attr_valid);
207

208
	oldsize = inode->i_size;
P
Pavel Emelyanov 已提交
209 210 211 212 213 214 215
	/*
	 * In case of writeback_cache enabled, the cached writes beyond EOF
	 * extend local i_size without keeping userspace server in sync. So,
	 * attr->size coming from server can be stale. We cannot trust it.
	 */
	if (!is_wb || !S_ISREG(inode->i_mode))
		i_size_write(inode, attr->size);
216 217
	spin_unlock(&fc->lock);

P
Pavel Emelyanov 已提交
218
	if (!is_wb && S_ISREG(inode->i_mode)) {
219 220 221
		bool inval = false;

		if (oldsize != attr->size) {
222
			truncate_pagecache(inode, attr->size);
223 224
			inval = true;
		} else if (fc->auto_inval_data) {
225
			struct timespec64 new_mtime = {
226 227 228 229 230 231 232 233
				.tv_sec = attr->mtime,
				.tv_nsec = attr->mtimensec,
			};

			/*
			 * Auto inval mode also checks and invalidates if mtime
			 * has changed.
			 */
234
			if (!timespec64_equal(&old_mtime, &new_mtime))
235 236 237 238 239
				inval = true;
		}

		if (inval)
			invalidate_inode_pages2(inode->i_mapping);
240
	}
M
Miklos Szeredi 已提交
241 242 243 244 245
}

static void fuse_init_inode(struct inode *inode, struct fuse_attr *attr)
{
	inode->i_mode = attr->mode & S_IFMT;
M
Miklos Szeredi 已提交
246
	inode->i_size = attr->size;
M
Maxim Patlasov 已提交
247 248
	inode->i_mtime.tv_sec  = attr->mtime;
	inode->i_mtime.tv_nsec = attr->mtimensec;
M
Maxim Patlasov 已提交
249 250
	inode->i_ctime.tv_sec  = attr->ctime;
	inode->i_ctime.tv_nsec = attr->ctimensec;
251 252
	if (S_ISREG(inode->i_mode)) {
		fuse_init_common(inode);
M
Miklos Szeredi 已提交
253
		fuse_init_file_inode(inode);
254 255 256 257 258 259 260 261 262
	} else if (S_ISDIR(inode->i_mode))
		fuse_init_dir(inode);
	else if (S_ISLNK(inode->i_mode))
		fuse_init_symlink(inode);
	else if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode) ||
		 S_ISFIFO(inode->i_mode) || S_ISSOCK(inode->i_mode)) {
		fuse_init_common(inode);
		init_special_inode(inode, inode->i_mode,
				   new_decode_dev(attr->rdev));
263 264
	} else
		BUG();
M
Miklos Szeredi 已提交
265 266
}

J
John Muir 已提交
267
int fuse_inode_eq(struct inode *inode, void *_nodeidp)
M
Miklos Szeredi 已提交
268
{
M
Miklos Szeredi 已提交
269
	u64 nodeid = *(u64 *) _nodeidp;
M
Miklos Szeredi 已提交
270 271 272 273 274 275 276 277
	if (get_node_id(inode) == nodeid)
		return 1;
	else
		return 0;
}

static int fuse_inode_set(struct inode *inode, void *_nodeidp)
{
M
Miklos Szeredi 已提交
278
	u64 nodeid = *(u64 *) _nodeidp;
M
Miklos Szeredi 已提交
279 280 281 282
	get_fuse_inode(inode)->nodeid = nodeid;
	return 0;
}

M
Miklos Szeredi 已提交
283
struct inode *fuse_iget(struct super_block *sb, u64 nodeid,
284 285
			int generation, struct fuse_attr *attr,
			u64 attr_valid, u64 attr_version)
M
Miklos Szeredi 已提交
286 287
{
	struct inode *inode;
288
	struct fuse_inode *fi;
M
Miklos Szeredi 已提交
289 290 291 292 293 294 295 296
	struct fuse_conn *fc = get_fuse_conn_super(sb);

 retry:
	inode = iget5_locked(sb, nodeid, fuse_inode_eq, fuse_inode_set, &nodeid);
	if (!inode)
		return NULL;

	if ((inode->i_state & I_NEW)) {
M
Maxim Patlasov 已提交
297
		inode->i_flags |= S_NOATIME;
298
		if (!fc->writeback_cache || !S_ISREG(attr->mode))
M
Maxim Patlasov 已提交
299
			inode->i_flags |= S_NOCMTIME;
M
Miklos Szeredi 已提交
300 301 302 303 304 305 306 307 308 309
		inode->i_generation = generation;
		fuse_init_inode(inode, attr);
		unlock_new_inode(inode);
	} else if ((inode->i_mode ^ attr->mode) & S_IFMT) {
		/* Inode has changed type, any I/O on the old should fail */
		make_bad_inode(inode);
		iput(inode);
		goto retry;
	}

310
	fi = get_fuse_inode(inode);
311
	spin_lock(&fc->lock);
M
Miklos Szeredi 已提交
312
	fi->nlookup++;
313
	spin_unlock(&fc->lock);
314 315
	fuse_change_attributes(inode, attr, attr_valid, attr_version);

M
Miklos Szeredi 已提交
316 317 318
	return inode;
}

J
John Muir 已提交
319 320 321 322 323 324 325 326 327 328 329 330
int fuse_reverse_inval_inode(struct super_block *sb, u64 nodeid,
			     loff_t offset, loff_t len)
{
	struct inode *inode;
	pgoff_t pg_start;
	pgoff_t pg_end;

	inode = ilookup5(sb, nodeid, fuse_inode_eq, &nodeid);
	if (!inode)
		return -ENOENT;

	fuse_invalidate_attr(inode);
S
Seth Forshee 已提交
331
	forget_all_cached_acls(inode);
J
John Muir 已提交
332
	if (offset >= 0) {
333
		pg_start = offset >> PAGE_SHIFT;
J
John Muir 已提交
334 335 336
		if (len <= 0)
			pg_end = -1;
		else
337
			pg_end = (offset + len - 1) >> PAGE_SHIFT;
J
John Muir 已提交
338 339 340 341 342 343 344
		invalidate_inode_pages2_range(inode->i_mapping,
					      pg_start, pg_end);
	}
	iput(inode);
	return 0;
}

345
bool fuse_lock_inode(struct inode *inode)
346
{
347 348 349
	bool locked = false;

	if (!get_fuse_conn(inode)->parallel_dirops) {
350
		mutex_lock(&get_fuse_inode(inode)->mutex);
351 352 353 354
		locked = true;
	}

	return locked;
355 356
}

357
void fuse_unlock_inode(struct inode *inode, bool locked)
358
{
359
	if (locked)
360 361 362
		mutex_unlock(&get_fuse_inode(inode)->mutex);
}

363
static void fuse_umount_begin(struct super_block *sb)
364
{
365
	fuse_abort_conn(get_fuse_conn_super(sb), false);
366 367
}

368 369 370 371 372 373
static void fuse_send_destroy(struct fuse_conn *fc)
{
	struct fuse_req *req = fc->destroy_req;
	if (req && fc->conn_init) {
		fc->destroy_req = NULL;
		req->in.h.opcode = FUSE_DESTROY;
M
Miklos Szeredi 已提交
374 375
		__set_bit(FR_FORCE, &req->flags);
		__clear_bit(FR_BACKGROUND, &req->flags);
376
		fuse_request_send(fc, req);
377 378 379 380
		fuse_put_request(fc, req);
	}
}

381 382 383 384
static void fuse_put_super(struct super_block *sb)
{
	struct fuse_conn *fc = get_fuse_conn_super(sb);

M
Miklos Szeredi 已提交
385 386 387 388 389
	mutex_lock(&fuse_mutex);
	list_del(&fc->entry);
	fuse_ctl_remove_conn(fc);
	mutex_unlock(&fuse_mutex);

390
	fuse_conn_put(fc);
M
Miklos Szeredi 已提交
391 392
}

393 394 395 396
static void convert_fuse_statfs(struct kstatfs *stbuf, struct fuse_kstatfs *attr)
{
	stbuf->f_type    = FUSE_SUPER_MAGIC;
	stbuf->f_bsize   = attr->bsize;
397
	stbuf->f_frsize  = attr->frsize;
398 399 400 401 402 403 404 405 406
	stbuf->f_blocks  = attr->blocks;
	stbuf->f_bfree   = attr->bfree;
	stbuf->f_bavail  = attr->bavail;
	stbuf->f_files   = attr->files;
	stbuf->f_ffree   = attr->ffree;
	stbuf->f_namelen = attr->namelen;
	/* fsid is left zero */
}

407
static int fuse_statfs(struct dentry *dentry, struct kstatfs *buf)
408
{
409
	struct super_block *sb = dentry->d_sb;
410
	struct fuse_conn *fc = get_fuse_conn_super(sb);
411
	FUSE_ARGS(args);
412 413 414
	struct fuse_statfs_out outarg;
	int err;

415
	if (!fuse_allow_current_process(fc)) {
M
Miklos Szeredi 已提交
416 417 418 419
		buf->f_type = FUSE_SUPER_MAGIC;
		return 0;
	}

420
	memset(&outarg, 0, sizeof(outarg));
421 422
	args.in.numargs = 0;
	args.in.h.opcode = FUSE_STATFS;
423
	args.in.h.nodeid = get_node_id(d_inode(dentry));
424
	args.out.numargs = 1;
425
	args.out.args[0].size = sizeof(outarg);
426 427
	args.out.args[0].value = &outarg;
	err = fuse_simple_request(fc, &args);
428 429 430 431 432
	if (!err)
		convert_fuse_statfs(buf, &outarg.st);
	return err;
}

M
Miklos Szeredi 已提交
433 434 435 436
enum {
	OPT_FD,
	OPT_ROOTMODE,
	OPT_USER_ID,
437
	OPT_GROUP_ID,
M
Miklos Szeredi 已提交
438 439
	OPT_DEFAULT_PERMISSIONS,
	OPT_ALLOW_OTHER,
440
	OPT_MAX_READ,
M
Miklos Szeredi 已提交
441
	OPT_BLKSIZE,
M
Miklos Szeredi 已提交
442 443 444
	OPT_ERR
};

445
static const match_table_t tokens = {
M
Miklos Szeredi 已提交
446 447 448
	{OPT_FD,			"fd=%u"},
	{OPT_ROOTMODE,			"rootmode=%o"},
	{OPT_USER_ID,			"user_id=%u"},
449
	{OPT_GROUP_ID,			"group_id=%u"},
M
Miklos Szeredi 已提交
450 451
	{OPT_DEFAULT_PERMISSIONS,	"default_permissions"},
	{OPT_ALLOW_OTHER,		"allow_other"},
452
	{OPT_MAX_READ,			"max_read=%u"},
M
Miklos Szeredi 已提交
453
	{OPT_BLKSIZE,			"blksize=%u"},
M
Miklos Szeredi 已提交
454 455 456
	{OPT_ERR,			NULL}
};

457 458 459 460 461 462 463 464 465 466 467
static int fuse_match_uint(substring_t *s, unsigned int *res)
{
	int err = -ENOMEM;
	char *buf = match_strdup(s);
	if (buf) {
		err = kstrtouint(buf, 10, res);
		kfree(buf);
	}
	return err;
}

468
int parse_fuse_opt(char *opt, struct fuse_mount_data *d, int is_bdev,
469
			  struct user_namespace *user_ns)
M
Miklos Szeredi 已提交
470 471 472
{
	char *p;
	memset(d, 0, sizeof(struct fuse_mount_data));
473
	d->max_read = ~0;
M
Miklos Szeredi 已提交
474
	d->blksize = FUSE_DEFAULT_BLKSIZE;
M
Miklos Szeredi 已提交
475 476 477 478

	while ((p = strsep(&opt, ",")) != NULL) {
		int token;
		int value;
479
		unsigned uv;
M
Miklos Szeredi 已提交
480 481 482 483 484 485 486 487 488 489
		substring_t args[MAX_OPT_ARGS];
		if (!*p)
			continue;

		token = match_token(p, tokens, args);
		switch (token) {
		case OPT_FD:
			if (match_int(&args[0], &value))
				return 0;
			d->fd = value;
490
			d->fd_present = 1;
M
Miklos Szeredi 已提交
491 492 493 494 495
			break;

		case OPT_ROOTMODE:
			if (match_octal(&args[0], &value))
				return 0;
496 497
			if (!fuse_valid_type(value))
				return 0;
M
Miklos Szeredi 已提交
498
			d->rootmode = value;
499
			d->rootmode_present = 1;
M
Miklos Szeredi 已提交
500 501 502
			break;

		case OPT_USER_ID:
503
			if (fuse_match_uint(&args[0], &uv))
M
Miklos Szeredi 已提交
504
				return 0;
505
			d->user_id = make_kuid(user_ns, uv);
506 507
			if (!uid_valid(d->user_id))
				return 0;
508
			d->user_id_present = 1;
M
Miklos Szeredi 已提交
509 510
			break;

511
		case OPT_GROUP_ID:
512
			if (fuse_match_uint(&args[0], &uv))
513
				return 0;
514
			d->group_id = make_kgid(user_ns, uv);
515 516
			if (!gid_valid(d->group_id))
				return 0;
517
			d->group_id_present = 1;
518 519
			break;

M
Miklos Szeredi 已提交
520
		case OPT_DEFAULT_PERMISSIONS:
M
Miklos Szeredi 已提交
521
			d->default_permissions = 1;
M
Miklos Szeredi 已提交
522 523 524
			break;

		case OPT_ALLOW_OTHER:
M
Miklos Szeredi 已提交
525
			d->allow_other = 1;
M
Miklos Szeredi 已提交
526 527
			break;

528 529 530 531 532 533
		case OPT_MAX_READ:
			if (match_int(&args[0], &value))
				return 0;
			d->max_read = value;
			break;

M
Miklos Szeredi 已提交
534 535 536 537 538 539
		case OPT_BLKSIZE:
			if (!is_bdev || match_int(&args[0], &value))
				return 0;
			d->blksize = value;
			break;

M
Miklos Szeredi 已提交
540 541 542 543
		default:
			return 0;
		}
	}
544

545 546
	if (!d->rootmode_present || !d->user_id_present ||
	    !d->group_id_present)
M
Miklos Szeredi 已提交
547 548 549 550
		return 0;

	return 1;
}
551
EXPORT_SYMBOL_GPL(parse_fuse_opt);
M
Miklos Szeredi 已提交
552

553
static int fuse_show_options(struct seq_file *m, struct dentry *root)
M
Miklos Szeredi 已提交
554
{
555 556
	struct super_block *sb = root->d_sb;
	struct fuse_conn *fc = get_fuse_conn_super(sb);
M
Miklos Szeredi 已提交
557

558 559
	seq_printf(m, ",user_id=%u", from_kuid_munged(fc->user_ns, fc->user_id));
	seq_printf(m, ",group_id=%u", from_kgid_munged(fc->user_ns, fc->group_id));
M
Miklos Szeredi 已提交
560
	if (fc->default_permissions)
M
Miklos Szeredi 已提交
561
		seq_puts(m, ",default_permissions");
M
Miklos Szeredi 已提交
562
	if (fc->allow_other)
M
Miklos Szeredi 已提交
563
		seq_puts(m, ",allow_other");
564 565
	if (fc->max_read != ~0)
		seq_printf(m, ",max_read=%u", fc->max_read);
566 567
	if (sb->s_bdev && sb->s_blocksize != FUSE_DEFAULT_BLKSIZE)
		seq_printf(m, ",blksize=%lu", sb->s_blocksize);
M
Miklos Szeredi 已提交
568 569 570
	return 0;
}

M
Miklos Szeredi 已提交
571 572 573
static void fuse_iqueue_init(struct fuse_iqueue *fiq)
{
	memset(fiq, 0, sizeof(struct fuse_iqueue));
574
	spin_lock_init(&fiq->lock);
M
Miklos Szeredi 已提交
575 576 577 578
	init_waitqueue_head(&fiq->waitq);
	INIT_LIST_HEAD(&fiq->pending);
	INIT_LIST_HEAD(&fiq->interrupts);
	fiq->forget_list_tail = &fiq->forget_list_head;
579
	fiq->connected = 1;
M
Miklos Szeredi 已提交
580 581
}

582 583 584
static void fuse_pqueue_init(struct fuse_pqueue *fpq)
{
	memset(fpq, 0, sizeof(struct fuse_pqueue));
M
Miklos Szeredi 已提交
585
	spin_lock_init(&fpq->lock);
586 587
	INIT_LIST_HEAD(&fpq->processing);
	INIT_LIST_HEAD(&fpq->io);
588
	fpq->connected = 1;
589 590
}

591
void fuse_conn_init(struct fuse_conn *fc, struct user_namespace *user_ns)
M
Miklos Szeredi 已提交
592
{
593 594
	memset(fc, 0, sizeof(*fc));
	spin_lock_init(&fc->lock);
J
John Muir 已提交
595
	init_rwsem(&fc->killsb);
596
	refcount_set(&fc->count, 1);
597
	atomic_set(&fc->dev_count, 1);
598 599
	init_waitqueue_head(&fc->blocked_waitq);
	init_waitqueue_head(&fc->reserved_req_waitq);
M
Miklos Szeredi 已提交
600
	fuse_iqueue_init(&fc->iq);
601 602
	INIT_LIST_HEAD(&fc->bg_queue);
	INIT_LIST_HEAD(&fc->entry);
603
	INIT_LIST_HEAD(&fc->devices);
604
	atomic_set(&fc->num_waiting, 0);
605 606
	fc->max_background = FUSE_DEFAULT_MAX_BACKGROUND;
	fc->congestion_threshold = FUSE_DEFAULT_CONGESTION_THRESHOLD;
607 608
	fc->khctr = 0;
	fc->polled_files = RB_ROOT;
609
	fc->blocked = 0;
M
Maxim Patlasov 已提交
610
	fc->initialized = 0;
611
	fc->connected = 1;
612 613
	fc->attr_version = 1;
	get_random_bytes(&fc->scramble_key, sizeof(fc->scramble_key));
614
	fc->pid_ns = get_pid_ns(task_active_pid_ns(current));
615
	fc->user_ns = get_user_ns(user_ns);
M
Miklos Szeredi 已提交
616
}
617
EXPORT_SYMBOL_GPL(fuse_conn_init);
M
Miklos Szeredi 已提交
618

619 620
void fuse_conn_put(struct fuse_conn *fc)
{
621
	if (refcount_dec_and_test(&fc->count)) {
622 623
		if (fc->destroy_req)
			fuse_request_free(fc->destroy_req);
624
		put_pid_ns(fc->pid_ns);
625
		put_user_ns(fc->user_ns);
T
Tejun Heo 已提交
626
		fc->release(fc);
627
	}
628
}
629
EXPORT_SYMBOL_GPL(fuse_conn_put);
630 631 632

struct fuse_conn *fuse_conn_get(struct fuse_conn *fc)
{
633
	refcount_inc(&fc->count);
634 635
	return fc;
}
636
EXPORT_SYMBOL_GPL(fuse_conn_get);
637

638
static struct inode *fuse_get_root_inode(struct super_block *sb, unsigned mode)
M
Miklos Szeredi 已提交
639 640 641 642 643 644
{
	struct fuse_attr attr;
	memset(&attr, 0, sizeof(attr));

	attr.mode = mode;
	attr.ino = FUSE_ROOT_ID;
645
	attr.nlink = 1;
646
	return fuse_iget(sb, 1, 0, &attr, 0, 0);
M
Miklos Szeredi 已提交
647 648
}

M
Miklos Szeredi 已提交
649
struct fuse_inode_handle {
M
Miklos Szeredi 已提交
650 651 652 653 654 655 656
	u64 nodeid;
	u32 generation;
};

static struct dentry *fuse_get_dentry(struct super_block *sb,
				      struct fuse_inode_handle *handle)
{
657
	struct fuse_conn *fc = get_fuse_conn_super(sb);
M
Miklos Szeredi 已提交
658 659 660 661 662 663 664 665
	struct inode *inode;
	struct dentry *entry;
	int err = -ESTALE;

	if (handle->nodeid == 0)
		goto out_err;

	inode = ilookup5(sb, handle->nodeid, fuse_inode_eq, &handle->nodeid);
666 667
	if (!inode) {
		struct fuse_entry_out outarg;
A
Al Viro 已提交
668
		const struct qstr name = QSTR_INIT(".", 1);
669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684

		if (!fc->export_support)
			goto out_err;

		err = fuse_lookup_name(sb, handle->nodeid, &name, &outarg,
				       &inode);
		if (err && err != -ENOENT)
			goto out_err;
		if (err || !inode) {
			err = -ESTALE;
			goto out_err;
		}
		err = -EIO;
		if (get_node_id(inode) != handle->nodeid)
			goto out_iput;
	}
M
Miklos Szeredi 已提交
685 686 687 688
	err = -ESTALE;
	if (inode->i_generation != handle->generation)
		goto out_iput;

689
	entry = d_obtain_alias(inode);
A
Al Viro 已提交
690
	if (!IS_ERR(entry) && get_node_id(inode) != FUSE_ROOT_ID)
M
Miklos Szeredi 已提交
691 692 693 694 695 696 697 698 699 700
		fuse_invalidate_entry_cache(entry);

	return entry;

 out_iput:
	iput(inode);
 out_err:
	return ERR_PTR(err);
}

A
Al Viro 已提交
701 702
static int fuse_encode_fh(struct inode *inode, u32 *fh, int *max_len,
			   struct inode *parent)
M
Miklos Szeredi 已提交
703
{
A
Al Viro 已提交
704
	int len = parent ? 6 : 3;
M
Miklos Szeredi 已提交
705 706 707
	u64 nodeid;
	u32 generation;

708 709
	if (*max_len < len) {
		*max_len = len;
710
		return  FILEID_INVALID;
711
	}
M
Miklos Szeredi 已提交
712 713 714 715 716 717 718 719

	nodeid = get_fuse_inode(inode)->nodeid;
	generation = inode->i_generation;

	fh[0] = (u32)(nodeid >> 32);
	fh[1] = (u32)(nodeid & 0xffffffff);
	fh[2] = generation;

A
Al Viro 已提交
720
	if (parent) {
M
Miklos Szeredi 已提交
721 722 723 724 725 726 727 728 729
		nodeid = get_fuse_inode(parent)->nodeid;
		generation = parent->i_generation;

		fh[3] = (u32)(nodeid >> 32);
		fh[4] = (u32)(nodeid & 0xffffffff);
		fh[5] = generation;
	}

	*max_len = len;
A
Al Viro 已提交
730
	return parent ? 0x82 : 0x81;
M
Miklos Szeredi 已提交
731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760
}

static struct dentry *fuse_fh_to_dentry(struct super_block *sb,
		struct fid *fid, int fh_len, int fh_type)
{
	struct fuse_inode_handle handle;

	if ((fh_type != 0x81 && fh_type != 0x82) || fh_len < 3)
		return NULL;

	handle.nodeid = (u64) fid->raw[0] << 32;
	handle.nodeid |= (u64) fid->raw[1];
	handle.generation = fid->raw[2];
	return fuse_get_dentry(sb, &handle);
}

static struct dentry *fuse_fh_to_parent(struct super_block *sb,
		struct fid *fid, int fh_len, int fh_type)
{
	struct fuse_inode_handle parent;

	if (fh_type != 0x82 || fh_len < 6)
		return NULL;

	parent.nodeid = (u64) fid->raw[3] << 32;
	parent.nodeid |= (u64) fid->raw[4];
	parent.generation = fid->raw[5];
	return fuse_get_dentry(sb, &parent);
}

761 762
static struct dentry *fuse_get_parent(struct dentry *child)
{
763
	struct inode *child_inode = d_inode(child);
764 765 766 767
	struct fuse_conn *fc = get_fuse_conn(child_inode);
	struct inode *inode;
	struct dentry *parent;
	struct fuse_entry_out outarg;
A
Al Viro 已提交
768
	const struct qstr name = QSTR_INIT("..", 2);
769 770 771 772 773 774 775
	int err;

	if (!fc->export_support)
		return ERR_PTR(-ESTALE);

	err = fuse_lookup_name(child_inode->i_sb, get_node_id(child_inode),
			       &name, &outarg, &inode);
776 777 778
	if (err) {
		if (err == -ENOENT)
			return ERR_PTR(-ESTALE);
779 780
		return ERR_PTR(err);
	}
781 782

	parent = d_obtain_alias(inode);
A
Al Viro 已提交
783
	if (!IS_ERR(parent) && get_node_id(inode) != FUSE_ROOT_ID)
784 785 786 787
		fuse_invalidate_entry_cache(parent);

	return parent;
}
M
Miklos Szeredi 已提交
788 789 790 791 792

static const struct export_operations fuse_export_operations = {
	.fh_to_dentry	= fuse_fh_to_dentry,
	.fh_to_parent	= fuse_fh_to_parent,
	.encode_fh	= fuse_encode_fh,
793
	.get_parent	= fuse_get_parent,
M
Miklos Szeredi 已提交
794 795
};

796
static const struct super_operations fuse_super_operations = {
M
Miklos Szeredi 已提交
797 798
	.alloc_inode    = fuse_alloc_inode,
	.destroy_inode  = fuse_destroy_inode,
799
	.evict_inode	= fuse_evict_inode,
M
Miklos Szeredi 已提交
800
	.write_inode	= fuse_write_inode,
M
Miklos Szeredi 已提交
801
	.drop_inode	= generic_delete_inode,
802
	.remount_fs	= fuse_remount_fs,
M
Miklos Szeredi 已提交
803
	.put_super	= fuse_put_super,
804
	.umount_begin	= fuse_umount_begin,
805
	.statfs		= fuse_statfs,
M
Miklos Szeredi 已提交
806 807 808
	.show_options	= fuse_show_options,
};

809 810 811
static void sanitize_global_limit(unsigned *limit)
{
	if (*limit == 0)
812
		*limit = ((totalram_pages << PAGE_SHIFT) >> 13) /
813 814 815 816 817 818
			 sizeof(struct fuse_req);

	if (*limit >= 1 << 16)
		*limit = (1 << 16) - 1;
}

819
static int set_global_limit(const char *val, const struct kernel_param *kp)
820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856
{
	int rv;

	rv = param_set_uint(val, kp);
	if (rv)
		return rv;

	sanitize_global_limit((unsigned *)kp->arg);

	return 0;
}

static void process_init_limits(struct fuse_conn *fc, struct fuse_init_out *arg)
{
	int cap_sys_admin = capable(CAP_SYS_ADMIN);

	if (arg->minor < 13)
		return;

	sanitize_global_limit(&max_user_bgreq);
	sanitize_global_limit(&max_user_congthresh);

	if (arg->max_background) {
		fc->max_background = arg->max_background;

		if (!cap_sys_admin && fc->max_background > max_user_bgreq)
			fc->max_background = max_user_bgreq;
	}
	if (arg->congestion_threshold) {
		fc->congestion_threshold = arg->congestion_threshold;

		if (!cap_sys_admin &&
		    fc->congestion_threshold > max_user_congthresh)
			fc->congestion_threshold = max_user_congthresh;
	}
}

857 858 859 860 861 862 863
static void process_init_reply(struct fuse_conn *fc, struct fuse_req *req)
{
	struct fuse_init_out *arg = &req->misc.init_out;

	if (req->out.h.error || arg->major != FUSE_KERNEL_VERSION)
		fc->conn_error = 1;
	else {
864 865
		unsigned long ra_pages;

866 867
		process_init_limits(fc, arg);

868
		if (arg->minor >= 6) {
869
			ra_pages = arg->max_readahead / PAGE_SIZE;
870 871
			if (arg->flags & FUSE_ASYNC_READ)
				fc->async_read = 1;
872 873
			if (!(arg->flags & FUSE_POSIX_LOCKS))
				fc->no_lock = 1;
M
Miklos Szeredi 已提交
874 875 876
			if (arg->minor >= 17) {
				if (!(arg->flags & FUSE_FLOCK_LOCKS))
					fc->no_flock = 1;
M
Miklos Szeredi 已提交
877 878 879
			} else {
				if (!(arg->flags & FUSE_POSIX_LOCKS))
					fc->no_flock = 1;
M
Miklos Szeredi 已提交
880
			}
881 882
			if (arg->flags & FUSE_ATOMIC_O_TRUNC)
				fc->atomic_o_trunc = 1;
883 884 885 886 887
			if (arg->minor >= 9) {
				/* LOOKUP has dependency on proto version */
				if (arg->flags & FUSE_EXPORT_SUPPORT)
					fc->export_support = 1;
			}
888 889
			if (arg->flags & FUSE_BIG_WRITES)
				fc->big_writes = 1;
890 891
			if (arg->flags & FUSE_DONT_MASK)
				fc->dont_mask = 1;
892 893
			if (arg->flags & FUSE_AUTO_INVAL_DATA)
				fc->auto_inval_data = 1;
894
			if (arg->flags & FUSE_DO_READDIRPLUS) {
895
				fc->do_readdirplus = 1;
896 897 898
				if (arg->flags & FUSE_READDIRPLUS_AUTO)
					fc->readdirplus_auto = 1;
			}
899 900
			if (arg->flags & FUSE_ASYNC_DIO)
				fc->async_dio = 1;
P
Pavel Emelyanov 已提交
901 902
			if (arg->flags & FUSE_WRITEBACK_CACHE)
				fc->writeback_cache = 1;
903 904
			if (arg->flags & FUSE_PARALLEL_DIROPS)
				fc->parallel_dirops = 1;
905 906
			if (arg->flags & FUSE_HANDLE_KILLPRIV)
				fc->handle_killpriv = 1;
907 908
			if (arg->time_gran && arg->time_gran <= 1000000000)
				fc->sb->s_time_gran = arg->time_gran;
S
Seth Forshee 已提交
909
			if ((arg->flags & FUSE_POSIX_ACL)) {
M
Miklos Szeredi 已提交
910
				fc->default_permissions = 1;
S
Seth Forshee 已提交
911 912 913
				fc->posix_acl = 1;
				fc->sb->s_xattr = fuse_acl_xattr_handlers;
			}
914 915
			if (arg->flags & FUSE_ABORT_ERROR)
				fc->abort_err = 1;
916
		} else {
917
			ra_pages = fc->max_read / PAGE_SIZE;
918
			fc->no_lock = 1;
M
Miklos Szeredi 已提交
919
			fc->no_flock = 1;
920
		}
921

922 923
		fc->sb->s_bdi->ra_pages =
				min(fc->sb->s_bdi->ra_pages, ra_pages);
924 925
		fc->minor = arg->minor;
		fc->max_write = arg->minor < 5 ? 4096 : arg->max_write;
926
		fc->max_write = max_t(unsigned, 4096, fc->max_write);
927
		fc->conn_init = 1;
928
	}
929
	fuse_set_initialized(fc);
930
	wake_up_all(&fc->blocked_waitq);
931 932
}

933
static void fuse_send_init(struct fuse_conn *fc, struct fuse_req *req)
934 935
{
	struct fuse_init_in *arg = &req->misc.init_in;
M
Miklos Szeredi 已提交
936

937 938
	arg->major = FUSE_KERNEL_VERSION;
	arg->minor = FUSE_KERNEL_MINOR_VERSION;
939
	arg->max_readahead = fc->sb->s_bdi->ra_pages * PAGE_SIZE;
940
	arg->flags |= FUSE_ASYNC_READ | FUSE_POSIX_LOCKS | FUSE_ATOMIC_O_TRUNC |
M
Miklos Szeredi 已提交
941
		FUSE_EXPORT_SUPPORT | FUSE_BIG_WRITES | FUSE_DONT_MASK |
M
Miklos Szeredi 已提交
942
		FUSE_SPLICE_WRITE | FUSE_SPLICE_MOVE | FUSE_SPLICE_READ |
943
		FUSE_FLOCK_LOCKS | FUSE_HAS_IOCTL_DIR | FUSE_AUTO_INVAL_DATA |
P
Pavel Emelyanov 已提交
944
		FUSE_DO_READDIRPLUS | FUSE_READDIRPLUS_AUTO | FUSE_ASYNC_DIO |
945
		FUSE_WRITEBACK_CACHE | FUSE_NO_OPEN_SUPPORT |
946 947
		FUSE_PARALLEL_DIROPS | FUSE_HANDLE_KILLPRIV | FUSE_POSIX_ACL |
		FUSE_ABORT_ERROR;
948 949 950 951 952
	req->in.h.opcode = FUSE_INIT;
	req->in.numargs = 1;
	req->in.args[0].size = sizeof(*arg);
	req->in.args[0].value = arg;
	req->out.numargs = 1;
D
Daniel Mack 已提交
953
	/* Variable length argument used for backward compatibility
954 955 956 957 958 959
	   with interface version < 7.5.  Rest of init_out is zeroed
	   by do_get_request(), so a short reply is not a problem */
	req->out.argvar = 1;
	req->out.args[0].size = sizeof(struct fuse_init_out);
	req->out.args[0].value = &req->misc.init_out;
	req->end = process_init_reply;
960
	fuse_request_send_background(fc, req);
961 962
}

T
Tejun Heo 已提交
963 964
static void fuse_free_conn(struct fuse_conn *fc)
{
965
	WARN_ON(!list_empty(&fc->devices));
A
Al Viro 已提交
966
	kfree_rcu(fc, rcu);
T
Tejun Heo 已提交
967 968
}

969 970 971
static int fuse_bdi_init(struct fuse_conn *fc, struct super_block *sb)
{
	int err;
972
	char *suffix = "";
973

974
	if (sb->s_bdev) {
975
		suffix = "-fuseblk";
976 977 978 979 980 981 982
		/*
		 * sb->s_bdi points to blkdev's bdi however we want to redirect
		 * it to our private bdi...
		 */
		bdi_put(sb->s_bdi);
		sb->s_bdi = &noop_backing_dev_info;
	}
983 984
	err = super_setup_bdi_name(sb, "%u:%u%s", MAJOR(fc->dev),
				   MINOR(fc->dev), suffix);
985 986 987
	if (err)
		return err;

988 989 990
	sb->s_bdi->ra_pages = (VM_MAX_READAHEAD * 1024) / PAGE_SIZE;
	/* fuse does it's own writeback accounting */
	sb->s_bdi->capabilities = BDI_CAP_NO_ACCT_WB | BDI_CAP_STRICTLIMIT;
991 992 993 994 995 996 997 998 999 1000 1001 1002 1003

	/*
	 * For a single fuse filesystem use max 1% of dirty +
	 * writeback threshold.
	 *
	 * This gives about 1M of write buffer for memory maps on a
	 * machine with 1G and 10% dirty_ratio, which should be more
	 * than enough.
	 *
	 * Privileged users can raise it by writing to
	 *
	 *    /sys/class/bdi/<bdi>/max_ratio
	 */
1004
	bdi_set_max_ratio(sb->s_bdi, 1);
1005 1006 1007 1008

	return 0;
}

1009 1010 1011 1012 1013 1014 1015
struct fuse_dev *fuse_dev_alloc(struct fuse_conn *fc)
{
	struct fuse_dev *fud;

	fud = kzalloc(sizeof(struct fuse_dev), GFP_KERNEL);
	if (fud) {
		fud->fc = fuse_conn_get(fc);
1016
		fuse_pqueue_init(&fud->pq);
1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041

		spin_lock(&fc->lock);
		list_add_tail(&fud->entry, &fc->devices);
		spin_unlock(&fc->lock);
	}

	return fud;
}
EXPORT_SYMBOL_GPL(fuse_dev_alloc);

void fuse_dev_free(struct fuse_dev *fud)
{
	struct fuse_conn *fc = fud->fc;

	if (fc) {
		spin_lock(&fc->lock);
		list_del(&fud->entry);
		spin_unlock(&fc->lock);

		fuse_conn_put(fc);
	}
	kfree(fud);
}
EXPORT_SYMBOL_GPL(fuse_dev_free);

1042 1043 1044
int fuse_fill_super_common(struct super_block *sb,
			   struct fuse_mount_data *mount_data,
			   void **fudptr)
M
Miklos Szeredi 已提交
1045
{
1046
	struct fuse_dev *fud;
M
Miklos Szeredi 已提交
1047 1048
	struct fuse_conn *fc;
	struct inode *root;
1049
	struct dentry *root_dentry;
M
Miklos Szeredi 已提交
1050
	int err;
M
Miklos Szeredi 已提交
1051
	int is_bdev = sb->s_bdev != NULL;
M
Miklos Szeredi 已提交
1052

1053
	err = -EINVAL;
1054
	if (sb->s_flags & SB_MANDLOCK)
1055
		goto err;
1056

1057
	sb->s_flags &= ~(SB_NOSEC | SB_I_VERSION);
A
Al Viro 已提交
1058

M
Miklos Szeredi 已提交
1059
	if (is_bdev) {
1060
#ifdef CONFIG_BLOCK
1061
		err = -EINVAL;
1062
		if (!sb_set_blocksize(sb, mount_data->blksize))
1063
			goto err;
1064
#endif
M
Miklos Szeredi 已提交
1065
	} else {
1066 1067
		sb->s_blocksize = PAGE_SIZE;
		sb->s_blocksize_bits = PAGE_SHIFT;
M
Miklos Szeredi 已提交
1068
	}
M
Miklos Szeredi 已提交
1069 1070
	sb->s_magic = FUSE_SUPER_MAGIC;
	sb->s_op = &fuse_super_operations;
S
Seth Forshee 已提交
1071
	sb->s_xattr = fuse_xattr_handlers;
M
Miklos Szeredi 已提交
1072
	sb->s_maxbytes = MAX_LFS_FILESIZE;
1073
	sb->s_time_gran = 1;
M
Miklos Szeredi 已提交
1074
	sb->s_export_op = &fuse_export_operations;
1075 1076 1077
	sb->s_iflags |= SB_I_IMA_UNVERIFIABLE_SIGNATURE;
	if (sb->s_user_ns != &init_user_ns)
		sb->s_iflags |= SB_I_UNTRUSTED_MOUNTER;
M
Miklos Szeredi 已提交
1078

1079 1080 1081 1082 1083 1084 1085
	/*
	 * If we are not in the initial user namespace posix
	 * acls must be translated.
	 */
	if (sb->s_user_ns != &init_user_ns)
		sb->s_xattr = fuse_no_acl_xattr_handlers;

1086
	fc = kmalloc(sizeof(*fc), GFP_KERNEL);
1087 1088
	err = -ENOMEM;
	if (!fc)
1089
		goto err;
M
Miklos Szeredi 已提交
1090

1091
	fuse_conn_init(fc, sb->s_user_ns);
1092
	fc->release = fuse_free_conn;
1093

1094 1095 1096 1097
	fud = fuse_dev_alloc(fc);
	if (!fud)
		goto err_put_conn;

1098
	fc->dev = sb->s_dev;
J
John Muir 已提交
1099
	fc->sb = sb;
1100 1101
	err = fuse_bdi_init(fc, sb);
	if (err)
1102
		goto err_dev_free;
1103

1104
	/* Handle umasking inside the fuse code */
1105
	if (sb->s_flags & SB_POSIXACL)
1106
		fc->dont_mask = 1;
1107
	sb->s_flags |= SB_POSIXACL;
1108

1109 1110 1111 1112 1113
	fc->default_permissions = mount_data->default_permissions;
	fc->allow_other = mount_data->allow_other;
	fc->user_id = mount_data->user_id;
	fc->group_id = mount_data->group_id;
	fc->max_read = max_t(unsigned, 4096, mount_data->max_read);
M
Miklos Szeredi 已提交
1114

1115 1116 1117
	/* Used by get_root_inode() */
	sb->s_fs_info = fc;

M
Miklos Szeredi 已提交
1118
	err = -ENOMEM;
1119
	root = fuse_get_root_inode(sb, mount_data->rootmode);
1120
	sb->s_d_op = &fuse_root_dentry_operations;
1121 1122
	root_dentry = d_make_root(root);
	if (!root_dentry)
1123
		goto err_dev_free;
1124
	/* Root dentry doesn't have .d_revalidate */
A
Al Viro 已提交
1125
	sb->s_d_op = &fuse_dentry_operations;
1126

1127
	if (is_bdev) {
1128
		fc->destroy_req = fuse_request_alloc(0);
1129
		if (!fc->destroy_req)
1130
			goto err_put_root;
1131 1132
	}

1133
	mutex_lock(&fuse_mutex);
1134
	err = -EINVAL;
1135
	if (*fudptr)
1136
		goto err_unlock;
1137

1138 1139 1140 1141 1142
	err = fuse_ctl_add_conn(fc);
	if (err)
		goto err_unlock;

	list_add_tail(&fc->entry, &fuse_conn_list);
1143
	sb->s_root = root_dentry;
1144
	*fudptr = fud;
1145
	mutex_unlock(&fuse_mutex);
M
Miklos Szeredi 已提交
1146 1147
	return 0;

1148 1149
 err_unlock:
	mutex_unlock(&fuse_mutex);
1150 1151
 err_put_root:
	dput(root_dentry);
1152 1153
 err_dev_free:
	fuse_dev_free(fud);
1154
 err_put_conn:
1155
	fuse_conn_put(fc);
1156
	sb->s_fs_info = NULL;
1157
 err:
M
Miklos Szeredi 已提交
1158 1159
	return err;
}
1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211
EXPORT_SYMBOL_GPL(fuse_fill_super_common);

static int fuse_fill_super(struct super_block *sb, void *data, int silent)
{
	struct fuse_mount_data d;
	struct file *file;
	int is_bdev = sb->s_bdev != NULL;
	int err;
	struct fuse_req *init_req;

	err = -EINVAL;
	if (!parse_fuse_opt(data, &d, is_bdev, sb->s_user_ns))
		goto err;
	if (!d.fd_present)
		goto err;

	file = fget(d.fd);
	if (!file)
		goto err;

	/*
	 * Require mount to happen from the same user namespace which
	 * opened /dev/fuse to prevent potential attacks.
	 */
	if ((file->f_op != &fuse_dev_operations) ||
	    (file->f_cred->user_ns != sb->s_user_ns))
		goto err_fput;

	init_req = fuse_request_alloc(0);
	if (!init_req)
		goto err_fput;
	__set_bit(FR_BACKGROUND, &init_req->flags);

	err = fuse_fill_super_common(sb, &d, &file->private_data);
	if (err < 0)
		goto err_free_init_req;
	/*
	 * atomic_dec_and_test() in fput() provides the necessary
	 * memory barrier for file->private_data to be visible on all
	 * CPUs after this
	 */
	fput(file);
	fuse_send_init(get_fuse_conn_super(sb), init_req);
	return 0;

err_free_init_req:
	fuse_request_free(init_req);
err_fput:
	fput(file);
err:
	return err;
}
M
Miklos Szeredi 已提交
1212

A
Al Viro 已提交
1213
static struct dentry *fuse_mount(struct file_system_type *fs_type,
1214
		       int flags, const char *dev_name,
A
Al Viro 已提交
1215
		       void *raw_data)
M
Miklos Szeredi 已提交
1216
{
A
Al Viro 已提交
1217
	return mount_nodev(fs_type, flags, raw_data, fuse_fill_super);
M
Miklos Szeredi 已提交
1218 1219
}

1220
static void fuse_sb_destroy(struct super_block *sb)
J
John Muir 已提交
1221 1222 1223 1224
{
	struct fuse_conn *fc = get_fuse_conn_super(sb);

	if (fc) {
1225 1226 1227 1228 1229
		fuse_send_destroy(fc);

		fuse_abort_conn(fc, false);
		fuse_wait_aborted(fc);

J
John Muir 已提交
1230 1231 1232 1233
		down_write(&fc->killsb);
		fc->sb = NULL;
		up_write(&fc->killsb);
	}
1234
}
J
John Muir 已提交
1235

1236 1237 1238
static void fuse_kill_sb_anon(struct super_block *sb)
{
	fuse_sb_destroy(sb);
J
John Muir 已提交
1239 1240 1241
	kill_anon_super(sb);
}

1242 1243 1244
static struct file_system_type fuse_fs_type = {
	.owner		= THIS_MODULE,
	.name		= "fuse",
1245
	.fs_flags	= FS_HAS_SUBTYPE | FS_USERNS_MOUNT,
A
Al Viro 已提交
1246
	.mount		= fuse_mount,
J
John Muir 已提交
1247
	.kill_sb	= fuse_kill_sb_anon,
1248
};
1249
MODULE_ALIAS_FS("fuse");
1250 1251

#ifdef CONFIG_BLOCK
A
Al Viro 已提交
1252
static struct dentry *fuse_mount_blk(struct file_system_type *fs_type,
1253
			   int flags, const char *dev_name,
A
Al Viro 已提交
1254
			   void *raw_data)
1255
{
A
Al Viro 已提交
1256
	return mount_bdev(fs_type, flags, dev_name, raw_data, fuse_fill_super);
1257 1258
}

J
John Muir 已提交
1259 1260
static void fuse_kill_sb_blk(struct super_block *sb)
{
1261
	fuse_sb_destroy(sb);
J
John Muir 已提交
1262 1263 1264
	kill_block_super(sb);
}

1265 1266 1267
static struct file_system_type fuseblk_fs_type = {
	.owner		= THIS_MODULE,
	.name		= "fuseblk",
A
Al Viro 已提交
1268
	.mount		= fuse_mount_blk,
J
John Muir 已提交
1269
	.kill_sb	= fuse_kill_sb_blk,
A
Alexey Dobriyan 已提交
1270
	.fs_flags	= FS_REQUIRES_DEV | FS_HAS_SUBTYPE,
1271
};
1272
MODULE_ALIAS_FS("fuseblk");
1273

1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293
static inline int register_fuseblk(void)
{
	return register_filesystem(&fuseblk_fs_type);
}

static inline void unregister_fuseblk(void)
{
	unregister_filesystem(&fuseblk_fs_type);
}
#else
static inline int register_fuseblk(void)
{
	return 0;
}

static inline void unregister_fuseblk(void)
{
}
#endif

1294
static void fuse_inode_init_once(void *foo)
M
Miklos Szeredi 已提交
1295
{
M
Miklos Szeredi 已提交
1296
	struct inode *inode = foo;
M
Miklos Szeredi 已提交
1297

C
Christoph Lameter 已提交
1298
	inode_init_once(inode);
M
Miklos Szeredi 已提交
1299 1300 1301 1302 1303 1304
}

static int __init fuse_fs_init(void)
{
	int err;

1305
	fuse_inode_cachep = kmem_cache_create("fuse_inode",
1306 1307 1308
			sizeof(struct fuse_inode), 0,
			SLAB_HWCACHE_ALIGN|SLAB_ACCOUNT|SLAB_RECLAIM_ACCOUNT,
			fuse_inode_init_once);
1309 1310
	err = -ENOMEM;
	if (!fuse_inode_cachep)
1311 1312 1313 1314 1315 1316 1317 1318 1319
		goto out;

	err = register_fuseblk();
	if (err)
		goto out2;

	err = register_filesystem(&fuse_fs_type);
	if (err)
		goto out3;
1320 1321

	return 0;
M
Miklos Szeredi 已提交
1322

1323
 out3:
1324
	unregister_fuseblk();
1325 1326
 out2:
	kmem_cache_destroy(fuse_inode_cachep);
1327
 out:
M
Miklos Szeredi 已提交
1328 1329 1330 1331 1332 1333
	return err;
}

static void fuse_fs_cleanup(void)
{
	unregister_filesystem(&fuse_fs_type);
1334
	unregister_fuseblk();
1335 1336 1337 1338 1339 1340

	/*
	 * Make sure all delayed rcu free inodes are flushed before we
	 * destroy cache.
	 */
	rcu_barrier();
M
Miklos Szeredi 已提交
1341 1342 1343
	kmem_cache_destroy(fuse_inode_cachep);
}

1344 1345
static struct kobject *fuse_kobj;

1346 1347 1348 1349
static int fuse_sysfs_init(void)
{
	int err;

1350
	fuse_kobj = kobject_create_and_add("fuse", fs_kobj);
1351 1352
	if (!fuse_kobj) {
		err = -ENOMEM;
1353
		goto out_err;
1354
	}
1355

1356 1357
	err = sysfs_create_mount_point(fuse_kobj, "connections");
	if (err)
1358 1359 1360 1361 1362
		goto out_fuse_unregister;

	return 0;

 out_fuse_unregister:
1363
	kobject_put(fuse_kobj);
1364 1365 1366 1367 1368 1369
 out_err:
	return err;
}

static void fuse_sysfs_cleanup(void)
{
1370
	sysfs_remove_mount_point(fuse_kobj, "connections");
1371
	kobject_put(fuse_kobj);
1372 1373
}

M
Miklos Szeredi 已提交
1374 1375 1376 1377
static int __init fuse_init(void)
{
	int res;

M
Miklos Szeredi 已提交
1378
	printk(KERN_INFO "fuse init (API version %i.%i)\n",
M
Miklos Szeredi 已提交
1379 1380
	       FUSE_KERNEL_VERSION, FUSE_KERNEL_MINOR_VERSION);

1381
	INIT_LIST_HEAD(&fuse_conn_list);
M
Miklos Szeredi 已提交
1382 1383 1384 1385
	res = fuse_fs_init();
	if (res)
		goto err;

M
Miklos Szeredi 已提交
1386 1387 1388 1389
	res = fuse_dev_init();
	if (res)
		goto err_fs_cleanup;

1390 1391 1392 1393
	res = fuse_sysfs_init();
	if (res)
		goto err_dev_cleanup;

1394 1395 1396 1397
	res = fuse_ctl_init();
	if (res)
		goto err_sysfs_cleanup;

1398 1399 1400
	sanitize_global_limit(&max_user_bgreq);
	sanitize_global_limit(&max_user_congthresh);

M
Miklos Szeredi 已提交
1401 1402
	return 0;

1403 1404
 err_sysfs_cleanup:
	fuse_sysfs_cleanup();
1405 1406
 err_dev_cleanup:
	fuse_dev_cleanup();
M
Miklos Szeredi 已提交
1407 1408
 err_fs_cleanup:
	fuse_fs_cleanup();
M
Miklos Szeredi 已提交
1409 1410 1411 1412 1413 1414 1415 1416
 err:
	return res;
}

static void __exit fuse_exit(void)
{
	printk(KERN_DEBUG "fuse exit\n");

1417
	fuse_ctl_cleanup();
1418
	fuse_sysfs_cleanup();
M
Miklos Szeredi 已提交
1419
	fuse_fs_cleanup();
M
Miklos Szeredi 已提交
1420
	fuse_dev_cleanup();
M
Miklos Szeredi 已提交
1421 1422 1423 1424
}

module_init(fuse_init);
module_exit(fuse_exit);