inode.c 33.5 KB
Newer Older
M
Miklos Szeredi 已提交
1 2
/*
  FUSE: Filesystem in Userspace
M
Miklos Szeredi 已提交
3
  Copyright (C) 2001-2008  Miklos Szeredi <miklos@szeredi.hu>
M
Miklos Szeredi 已提交
4 5 6 7 8 9 10 11 12 13 14 15 16

  This program can be distributed under the terms of the GNU GPL.
  See the file COPYING.
*/

#include "fuse_i.h"

#include <linux/pagemap.h>
#include <linux/slab.h>
#include <linux/file.h>
#include <linux/seq_file.h>
#include <linux/init.h>
#include <linux/module.h>
17
#include <linux/moduleparam.h>
M
Miklos Szeredi 已提交
18 19
#include <linux/parser.h>
#include <linux/statfs.h>
20
#include <linux/random.h>
A
Alexey Dobriyan 已提交
21
#include <linux/sched.h>
M
Miklos Szeredi 已提交
22
#include <linux/exportfs.h>
S
Seth Forshee 已提交
23
#include <linux/posix_acl.h>
24
#include <linux/pid_namespace.h>
M
Miklos Szeredi 已提交
25 26 27 28 29

MODULE_AUTHOR("Miklos Szeredi <miklos@szeredi.hu>");
MODULE_DESCRIPTION("Filesystem in Userspace");
MODULE_LICENSE("GPL");

30
static struct kmem_cache *fuse_inode_cachep;
31 32
struct list_head fuse_conn_list;
DEFINE_MUTEX(fuse_mutex);
M
Miklos Szeredi 已提交
33

34
static int set_global_limit(const char *val, const struct kernel_param *kp);
35

36
unsigned max_user_bgreq;
37 38 39 40 41 42 43
module_param_call(max_user_bgreq, set_global_limit, param_get_uint,
		  &max_user_bgreq, 0644);
__MODULE_PARM_TYPE(max_user_bgreq, "uint");
MODULE_PARM_DESC(max_user_bgreq,
 "Global limit for the maximum number of backgrounded requests an "
 "unprivileged user can set");

44
unsigned max_user_congthresh;
45 46 47 48 49 50 51
module_param_call(max_user_congthresh, set_global_limit, param_get_uint,
		  &max_user_congthresh, 0644);
__MODULE_PARM_TYPE(max_user_congthresh, "uint");
MODULE_PARM_DESC(max_user_congthresh,
 "Global limit for the maximum congestion threshold an "
 "unprivileged user can set");

M
Miklos Szeredi 已提交
52 53
#define FUSE_SUPER_MAGIC 0x65735546

M
Miklos Szeredi 已提交
54 55
#define FUSE_DEFAULT_BLKSIZE 512

56 57 58 59 60 61
/** Maximum number of outstanding background requests */
#define FUSE_DEFAULT_MAX_BACKGROUND 12

/** Congestion starts at 75% of maximum */
#define FUSE_DEFAULT_CONGESTION_THRESHOLD (FUSE_DEFAULT_MAX_BACKGROUND * 3 / 4)

M
Miklos Szeredi 已提交
62 63 64
struct fuse_mount_data {
	int fd;
	unsigned rootmode;
65 66
	kuid_t user_id;
	kgid_t group_id;
M
Miklos Szeredi 已提交
67 68 69 70
	unsigned fd_present:1;
	unsigned rootmode_present:1;
	unsigned user_id_present:1;
	unsigned group_id_present:1;
M
Miklos Szeredi 已提交
71 72
	unsigned default_permissions:1;
	unsigned allow_other:1;
73
	unsigned max_read;
M
Miklos Szeredi 已提交
74
	unsigned blksize;
M
Miklos Szeredi 已提交
75 76
};

77
struct fuse_forget_link *fuse_alloc_forget(void)
78 79 80 81
{
	return kzalloc(sizeof(struct fuse_forget_link), GFP_KERNEL);
}

M
Miklos Szeredi 已提交
82 83 84 85
static struct inode *fuse_alloc_inode(struct super_block *sb)
{
	struct fuse_inode *fi;

Z
zhangliguang 已提交
86 87
	fi = kmem_cache_alloc(fuse_inode_cachep, GFP_KERNEL);
	if (!fi)
M
Miklos Szeredi 已提交
88 89
		return NULL;

M
Miklos Szeredi 已提交
90
	fi->i_time = 0;
91
	fi->inval_mask = 0;
M
Miklos Szeredi 已提交
92
	fi->nodeid = 0;
93
	fi->nlookup = 0;
94
	fi->attr_version = 0;
95
	fi->orig_ino = 0;
96
	fi->state = 0;
97
	mutex_init(&fi->mutex);
98
	spin_lock_init(&fi->lock);
99 100
	fi->forget = fuse_alloc_forget();
	if (!fi->forget) {
Z
zhangliguang 已提交
101
		kmem_cache_free(fuse_inode_cachep, fi);
102 103
		return NULL;
	}
M
Miklos Szeredi 已提交
104

Z
zhangliguang 已提交
105
	return &fi->inode;
M
Miklos Szeredi 已提交
106 107
}

A
Al Viro 已提交
108
static void fuse_free_inode(struct inode *inode)
M
Miklos Szeredi 已提交
109
{
110
	struct fuse_inode *fi = get_fuse_inode(inode);
A
Al Viro 已提交
111

112
	mutex_destroy(&fi->mutex);
113
	kfree(fi->forget);
A
Al Viro 已提交
114
	kmem_cache_free(fuse_inode_cachep, fi);
M
Miklos Szeredi 已提交
115 116
}

117
static void fuse_evict_inode(struct inode *inode)
M
Miklos Szeredi 已提交
118
{
A
Al Viro 已提交
119 120
	struct fuse_inode *fi = get_fuse_inode(inode);

121
	truncate_inode_pages_final(&inode->i_data);
122
	clear_inode(inode);
123
	if (inode->i_sb->s_flags & SB_ACTIVE) {
M
Miklos Szeredi 已提交
124
		struct fuse_conn *fc = get_fuse_conn(inode);
125 126
		fuse_queue_forget(fc, fi->forget, fi->nodeid, fi->nlookup);
		fi->forget = NULL;
127
	}
A
Al Viro 已提交
128 129 130 131
	if (S_ISREG(inode->i_mode) && !is_bad_inode(inode)) {
		WARN_ON(!list_empty(&fi->write_files));
		WARN_ON(!list_empty(&fi->queued_writes));
	}
M
Miklos Szeredi 已提交
132 133
}

134 135
static int fuse_remount_fs(struct super_block *sb, int *flags, char *data)
{
136
	sync_filesystem(sb);
137
	if (*flags & SB_MANDLOCK)
138 139 140 141 142
		return -EINVAL;

	return 0;
}

143 144 145 146 147 148 149 150 151 152 153 154
/*
 * ino_t is 32-bits on 32-bit arch. We have to squash the 64-bit value down
 * so that it will fit.
 */
static ino_t fuse_squash_ino(u64 ino64)
{
	ino_t ino = (ino_t) ino64;
	if (sizeof(ino_t) < sizeof(u64))
		ino ^= ino64 >> (sizeof(u64) - sizeof(ino_t)) * 8;
	return ino;
}

M
Miklos Szeredi 已提交
155 156
void fuse_change_attributes_common(struct inode *inode, struct fuse_attr *attr,
				   u64 attr_valid)
M
Miklos Szeredi 已提交
157
{
M
Miklos Szeredi 已提交
158
	struct fuse_conn *fc = get_fuse_conn(inode);
159
	struct fuse_inode *fi = get_fuse_inode(inode);
M
Miklos Szeredi 已提交
160

161 162
	lockdep_assert_held(&fi->lock);

163
	fi->attr_version = atomic64_inc_return(&fc->attr_version);
164
	fi->i_time = attr_valid;
165
	WRITE_ONCE(fi->inval_mask, 0);
166

167
	inode->i_ino     = fuse_squash_ino(attr->ino);
168
	inode->i_mode    = (inode->i_mode & S_IFMT) | (attr->mode & 07777);
M
Miklos Szeredi 已提交
169
	set_nlink(inode, attr->nlink);
170 171
	inode->i_uid     = make_kuid(fc->user_ns, attr->uid);
	inode->i_gid     = make_kgid(fc->user_ns, attr->gid);
M
Miklos Szeredi 已提交
172 173 174
	inode->i_blocks  = attr->blocks;
	inode->i_atime.tv_sec   = attr->atime;
	inode->i_atime.tv_nsec  = attr->atimensec;
M
Maxim Patlasov 已提交
175 176 177 178
	/* mtime from server may be stale due to local buffered write */
	if (!fc->writeback_cache || !S_ISREG(inode->i_mode)) {
		inode->i_mtime.tv_sec   = attr->mtime;
		inode->i_mtime.tv_nsec  = attr->mtimensec;
M
Maxim Patlasov 已提交
179 180
		inode->i_ctime.tv_sec   = attr->ctime;
		inode->i_ctime.tv_nsec  = attr->ctimensec;
M
Maxim Patlasov 已提交
181
	}
182

183 184 185 186 187
	if (attr->blksize != 0)
		inode->i_blkbits = ilog2(attr->blksize);
	else
		inode->i_blkbits = inode->i_sb->s_blocksize_bits;

188 189 190 191 192 193
	/*
	 * Don't set the sticky bit in i_mode, unless we want the VFS
	 * to check permissions.  This prevents failures due to the
	 * check in may_delete().
	 */
	fi->orig_i_mode = inode->i_mode;
M
Miklos Szeredi 已提交
194
	if (!fc->default_permissions)
195
		inode->i_mode &= ~S_ISVTX;
196 197

	fi->orig_ino = attr->ino;
M
Miklos Szeredi 已提交
198 199 200 201 202 203 204
}

void fuse_change_attributes(struct inode *inode, struct fuse_attr *attr,
			    u64 attr_valid, u64 attr_version)
{
	struct fuse_conn *fc = get_fuse_conn(inode);
	struct fuse_inode *fi = get_fuse_inode(inode);
P
Pavel Emelyanov 已提交
205
	bool is_wb = fc->writeback_cache;
M
Miklos Szeredi 已提交
206
	loff_t oldsize;
207
	struct timespec64 old_mtime;
M
Miklos Szeredi 已提交
208

209
	spin_lock(&fi->lock);
210 211
	if ((attr_version != 0 && fi->attr_version > attr_version) ||
	    test_bit(FUSE_I_SIZE_UNSTABLE, &fi->state)) {
212
		spin_unlock(&fi->lock);
M
Miklos Szeredi 已提交
213 214 215
		return;
	}

216
	old_mtime = inode->i_mtime;
M
Miklos Szeredi 已提交
217
	fuse_change_attributes_common(inode, attr, attr_valid);
218

219
	oldsize = inode->i_size;
P
Pavel Emelyanov 已提交
220 221 222 223 224 225 226
	/*
	 * In case of writeback_cache enabled, the cached writes beyond EOF
	 * extend local i_size without keeping userspace server in sync. So,
	 * attr->size coming from server can be stale. We cannot trust it.
	 */
	if (!is_wb || !S_ISREG(inode->i_mode))
		i_size_write(inode, attr->size);
227
	spin_unlock(&fi->lock);
228

P
Pavel Emelyanov 已提交
229
	if (!is_wb && S_ISREG(inode->i_mode)) {
230 231 232
		bool inval = false;

		if (oldsize != attr->size) {
233
			truncate_pagecache(inode, attr->size);
234 235
			if (!fc->explicit_inval_data)
				inval = true;
236
		} else if (fc->auto_inval_data) {
237
			struct timespec64 new_mtime = {
238 239 240 241 242 243 244 245
				.tv_sec = attr->mtime,
				.tv_nsec = attr->mtimensec,
			};

			/*
			 * Auto inval mode also checks and invalidates if mtime
			 * has changed.
			 */
246
			if (!timespec64_equal(&old_mtime, &new_mtime))
247 248 249 250 251
				inval = true;
		}

		if (inval)
			invalidate_inode_pages2(inode->i_mapping);
252
	}
M
Miklos Szeredi 已提交
253 254 255 256 257
}

static void fuse_init_inode(struct inode *inode, struct fuse_attr *attr)
{
	inode->i_mode = attr->mode & S_IFMT;
M
Miklos Szeredi 已提交
258
	inode->i_size = attr->size;
M
Maxim Patlasov 已提交
259 260
	inode->i_mtime.tv_sec  = attr->mtime;
	inode->i_mtime.tv_nsec = attr->mtimensec;
M
Maxim Patlasov 已提交
261 262
	inode->i_ctime.tv_sec  = attr->ctime;
	inode->i_ctime.tv_nsec = attr->ctimensec;
263 264
	if (S_ISREG(inode->i_mode)) {
		fuse_init_common(inode);
M
Miklos Szeredi 已提交
265
		fuse_init_file_inode(inode);
266 267 268 269 270 271 272 273 274
	} else if (S_ISDIR(inode->i_mode))
		fuse_init_dir(inode);
	else if (S_ISLNK(inode->i_mode))
		fuse_init_symlink(inode);
	else if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode) ||
		 S_ISFIFO(inode->i_mode) || S_ISSOCK(inode->i_mode)) {
		fuse_init_common(inode);
		init_special_inode(inode, inode->i_mode,
				   new_decode_dev(attr->rdev));
275 276
	} else
		BUG();
M
Miklos Szeredi 已提交
277 278
}

J
John Muir 已提交
279
int fuse_inode_eq(struct inode *inode, void *_nodeidp)
M
Miklos Szeredi 已提交
280
{
M
Miklos Szeredi 已提交
281
	u64 nodeid = *(u64 *) _nodeidp;
M
Miklos Szeredi 已提交
282 283 284 285 286 287 288 289
	if (get_node_id(inode) == nodeid)
		return 1;
	else
		return 0;
}

static int fuse_inode_set(struct inode *inode, void *_nodeidp)
{
M
Miklos Szeredi 已提交
290
	u64 nodeid = *(u64 *) _nodeidp;
M
Miklos Szeredi 已提交
291 292 293 294
	get_fuse_inode(inode)->nodeid = nodeid;
	return 0;
}

M
Miklos Szeredi 已提交
295
struct inode *fuse_iget(struct super_block *sb, u64 nodeid,
296 297
			int generation, struct fuse_attr *attr,
			u64 attr_valid, u64 attr_version)
M
Miklos Szeredi 已提交
298 299
{
	struct inode *inode;
300
	struct fuse_inode *fi;
M
Miklos Szeredi 已提交
301 302 303 304 305 306 307 308
	struct fuse_conn *fc = get_fuse_conn_super(sb);

 retry:
	inode = iget5_locked(sb, nodeid, fuse_inode_eq, fuse_inode_set, &nodeid);
	if (!inode)
		return NULL;

	if ((inode->i_state & I_NEW)) {
M
Maxim Patlasov 已提交
309
		inode->i_flags |= S_NOATIME;
310
		if (!fc->writeback_cache || !S_ISREG(attr->mode))
M
Maxim Patlasov 已提交
311
			inode->i_flags |= S_NOCMTIME;
M
Miklos Szeredi 已提交
312 313 314 315 316 317 318 319 320 321
		inode->i_generation = generation;
		fuse_init_inode(inode, attr);
		unlock_new_inode(inode);
	} else if ((inode->i_mode ^ attr->mode) & S_IFMT) {
		/* Inode has changed type, any I/O on the old should fail */
		make_bad_inode(inode);
		iput(inode);
		goto retry;
	}

322
	fi = get_fuse_inode(inode);
323
	spin_lock(&fi->lock);
M
Miklos Szeredi 已提交
324
	fi->nlookup++;
325
	spin_unlock(&fi->lock);
326 327
	fuse_change_attributes(inode, attr, attr_valid, attr_version);

M
Miklos Szeredi 已提交
328 329 330
	return inode;
}

J
John Muir 已提交
331 332 333 334 335 336 337 338 339 340 341 342
int fuse_reverse_inval_inode(struct super_block *sb, u64 nodeid,
			     loff_t offset, loff_t len)
{
	struct inode *inode;
	pgoff_t pg_start;
	pgoff_t pg_end;

	inode = ilookup5(sb, nodeid, fuse_inode_eq, &nodeid);
	if (!inode)
		return -ENOENT;

	fuse_invalidate_attr(inode);
S
Seth Forshee 已提交
343
	forget_all_cached_acls(inode);
J
John Muir 已提交
344
	if (offset >= 0) {
345
		pg_start = offset >> PAGE_SHIFT;
J
John Muir 已提交
346 347 348
		if (len <= 0)
			pg_end = -1;
		else
349
			pg_end = (offset + len - 1) >> PAGE_SHIFT;
J
John Muir 已提交
350 351 352 353 354 355 356
		invalidate_inode_pages2_range(inode->i_mapping,
					      pg_start, pg_end);
	}
	iput(inode);
	return 0;
}

357
bool fuse_lock_inode(struct inode *inode)
358
{
359 360 361
	bool locked = false;

	if (!get_fuse_conn(inode)->parallel_dirops) {
362
		mutex_lock(&get_fuse_inode(inode)->mutex);
363 364 365 366
		locked = true;
	}

	return locked;
367 368
}

369
void fuse_unlock_inode(struct inode *inode, bool locked)
370
{
371
	if (locked)
372 373 374
		mutex_unlock(&get_fuse_inode(inode)->mutex);
}

375
static void fuse_umount_begin(struct super_block *sb)
376
{
M
Miklos Szeredi 已提交
377
	fuse_abort_conn(get_fuse_conn_super(sb));
378 379
}

380 381 382 383 384 385
static void fuse_send_destroy(struct fuse_conn *fc)
{
	struct fuse_req *req = fc->destroy_req;
	if (req && fc->conn_init) {
		fc->destroy_req = NULL;
		req->in.h.opcode = FUSE_DESTROY;
M
Miklos Szeredi 已提交
386 387
		__set_bit(FR_FORCE, &req->flags);
		__clear_bit(FR_BACKGROUND, &req->flags);
388
		fuse_request_send(fc, req);
389 390 391 392
		fuse_put_request(fc, req);
	}
}

393 394 395 396
static void fuse_put_super(struct super_block *sb)
{
	struct fuse_conn *fc = get_fuse_conn_super(sb);

M
Miklos Szeredi 已提交
397 398 399 400 401
	mutex_lock(&fuse_mutex);
	list_del(&fc->entry);
	fuse_ctl_remove_conn(fc);
	mutex_unlock(&fuse_mutex);

402
	fuse_conn_put(fc);
M
Miklos Szeredi 已提交
403 404
}

405 406 407 408
static void convert_fuse_statfs(struct kstatfs *stbuf, struct fuse_kstatfs *attr)
{
	stbuf->f_type    = FUSE_SUPER_MAGIC;
	stbuf->f_bsize   = attr->bsize;
409
	stbuf->f_frsize  = attr->frsize;
410 411 412 413 414 415 416 417 418
	stbuf->f_blocks  = attr->blocks;
	stbuf->f_bfree   = attr->bfree;
	stbuf->f_bavail  = attr->bavail;
	stbuf->f_files   = attr->files;
	stbuf->f_ffree   = attr->ffree;
	stbuf->f_namelen = attr->namelen;
	/* fsid is left zero */
}

419
static int fuse_statfs(struct dentry *dentry, struct kstatfs *buf)
420
{
421
	struct super_block *sb = dentry->d_sb;
422
	struct fuse_conn *fc = get_fuse_conn_super(sb);
423
	FUSE_ARGS(args);
424 425 426
	struct fuse_statfs_out outarg;
	int err;

427
	if (!fuse_allow_current_process(fc)) {
M
Miklos Szeredi 已提交
428 429 430 431
		buf->f_type = FUSE_SUPER_MAGIC;
		return 0;
	}

432
	memset(&outarg, 0, sizeof(outarg));
433 434
	args.in.numargs = 0;
	args.in.h.opcode = FUSE_STATFS;
435
	args.in.h.nodeid = get_node_id(d_inode(dentry));
436
	args.out.numargs = 1;
437
	args.out.args[0].size = sizeof(outarg);
438 439
	args.out.args[0].value = &outarg;
	err = fuse_simple_request(fc, &args);
440 441 442 443 444
	if (!err)
		convert_fuse_statfs(buf, &outarg.st);
	return err;
}

M
Miklos Szeredi 已提交
445 446 447 448
enum {
	OPT_FD,
	OPT_ROOTMODE,
	OPT_USER_ID,
449
	OPT_GROUP_ID,
M
Miklos Szeredi 已提交
450 451
	OPT_DEFAULT_PERMISSIONS,
	OPT_ALLOW_OTHER,
452
	OPT_MAX_READ,
M
Miklos Szeredi 已提交
453
	OPT_BLKSIZE,
M
Miklos Szeredi 已提交
454 455 456
	OPT_ERR
};

457
static const match_table_t tokens = {
M
Miklos Szeredi 已提交
458 459 460
	{OPT_FD,			"fd=%u"},
	{OPT_ROOTMODE,			"rootmode=%o"},
	{OPT_USER_ID,			"user_id=%u"},
461
	{OPT_GROUP_ID,			"group_id=%u"},
M
Miklos Szeredi 已提交
462 463
	{OPT_DEFAULT_PERMISSIONS,	"default_permissions"},
	{OPT_ALLOW_OTHER,		"allow_other"},
464
	{OPT_MAX_READ,			"max_read=%u"},
M
Miklos Szeredi 已提交
465
	{OPT_BLKSIZE,			"blksize=%u"},
M
Miklos Szeredi 已提交
466 467 468
	{OPT_ERR,			NULL}
};

469 470 471 472 473 474 475 476 477 478 479
static int fuse_match_uint(substring_t *s, unsigned int *res)
{
	int err = -ENOMEM;
	char *buf = match_strdup(s);
	if (buf) {
		err = kstrtouint(buf, 10, res);
		kfree(buf);
	}
	return err;
}

480 481
static int parse_fuse_opt(char *opt, struct fuse_mount_data *d, int is_bdev,
			  struct user_namespace *user_ns)
M
Miklos Szeredi 已提交
482 483 484
{
	char *p;
	memset(d, 0, sizeof(struct fuse_mount_data));
485
	d->max_read = ~0;
M
Miklos Szeredi 已提交
486
	d->blksize = FUSE_DEFAULT_BLKSIZE;
M
Miklos Szeredi 已提交
487 488 489 490

	while ((p = strsep(&opt, ",")) != NULL) {
		int token;
		int value;
491
		unsigned uv;
M
Miklos Szeredi 已提交
492 493 494 495 496 497 498 499 500 501
		substring_t args[MAX_OPT_ARGS];
		if (!*p)
			continue;

		token = match_token(p, tokens, args);
		switch (token) {
		case OPT_FD:
			if (match_int(&args[0], &value))
				return 0;
			d->fd = value;
502
			d->fd_present = 1;
M
Miklos Szeredi 已提交
503 504 505 506 507
			break;

		case OPT_ROOTMODE:
			if (match_octal(&args[0], &value))
				return 0;
508 509
			if (!fuse_valid_type(value))
				return 0;
M
Miklos Szeredi 已提交
510
			d->rootmode = value;
511
			d->rootmode_present = 1;
M
Miklos Szeredi 已提交
512 513 514
			break;

		case OPT_USER_ID:
515
			if (fuse_match_uint(&args[0], &uv))
M
Miklos Szeredi 已提交
516
				return 0;
517
			d->user_id = make_kuid(user_ns, uv);
518 519
			if (!uid_valid(d->user_id))
				return 0;
520
			d->user_id_present = 1;
M
Miklos Szeredi 已提交
521 522
			break;

523
		case OPT_GROUP_ID:
524
			if (fuse_match_uint(&args[0], &uv))
525
				return 0;
526
			d->group_id = make_kgid(user_ns, uv);
527 528
			if (!gid_valid(d->group_id))
				return 0;
529
			d->group_id_present = 1;
530 531
			break;

M
Miklos Szeredi 已提交
532
		case OPT_DEFAULT_PERMISSIONS:
M
Miklos Szeredi 已提交
533
			d->default_permissions = 1;
M
Miklos Szeredi 已提交
534 535 536
			break;

		case OPT_ALLOW_OTHER:
M
Miklos Szeredi 已提交
537
			d->allow_other = 1;
M
Miklos Szeredi 已提交
538 539
			break;

540 541 542 543 544 545
		case OPT_MAX_READ:
			if (match_int(&args[0], &value))
				return 0;
			d->max_read = value;
			break;

M
Miklos Szeredi 已提交
546 547 548 549 550 551
		case OPT_BLKSIZE:
			if (!is_bdev || match_int(&args[0], &value))
				return 0;
			d->blksize = value;
			break;

M
Miklos Szeredi 已提交
552 553 554 555
		default:
			return 0;
		}
	}
556 557 558

	if (!d->fd_present || !d->rootmode_present ||
	    !d->user_id_present || !d->group_id_present)
M
Miklos Szeredi 已提交
559 560 561 562 563
		return 0;

	return 1;
}

564
static int fuse_show_options(struct seq_file *m, struct dentry *root)
M
Miklos Szeredi 已提交
565
{
566 567
	struct super_block *sb = root->d_sb;
	struct fuse_conn *fc = get_fuse_conn_super(sb);
M
Miklos Szeredi 已提交
568

569 570
	seq_printf(m, ",user_id=%u", from_kuid_munged(fc->user_ns, fc->user_id));
	seq_printf(m, ",group_id=%u", from_kgid_munged(fc->user_ns, fc->group_id));
M
Miklos Szeredi 已提交
571
	if (fc->default_permissions)
M
Miklos Szeredi 已提交
572
		seq_puts(m, ",default_permissions");
M
Miklos Szeredi 已提交
573
	if (fc->allow_other)
M
Miklos Szeredi 已提交
574
		seq_puts(m, ",allow_other");
575 576
	if (fc->max_read != ~0)
		seq_printf(m, ",max_read=%u", fc->max_read);
577 578
	if (sb->s_bdev && sb->s_blocksize != FUSE_DEFAULT_BLKSIZE)
		seq_printf(m, ",blksize=%lu", sb->s_blocksize);
M
Miklos Szeredi 已提交
579 580 581
	return 0;
}

M
Miklos Szeredi 已提交
582 583 584 585 586 587 588
static void fuse_iqueue_init(struct fuse_iqueue *fiq)
{
	memset(fiq, 0, sizeof(struct fuse_iqueue));
	init_waitqueue_head(&fiq->waitq);
	INIT_LIST_HEAD(&fiq->pending);
	INIT_LIST_HEAD(&fiq->interrupts);
	fiq->forget_list_tail = &fiq->forget_list_head;
589
	fiq->connected = 1;
M
Miklos Szeredi 已提交
590 591
}

592 593
static void fuse_pqueue_init(struct fuse_pqueue *fpq)
{
594 595
	unsigned int i;

M
Miklos Szeredi 已提交
596
	spin_lock_init(&fpq->lock);
597 598
	for (i = 0; i < FUSE_PQ_HASH_SIZE; i++)
		INIT_LIST_HEAD(&fpq->processing[i]);
599
	INIT_LIST_HEAD(&fpq->io);
600
	fpq->connected = 1;
601 602
}

603
void fuse_conn_init(struct fuse_conn *fc, struct user_namespace *user_ns)
M
Miklos Szeredi 已提交
604
{
605 606
	memset(fc, 0, sizeof(*fc));
	spin_lock_init(&fc->lock);
K
Kirill Tkhai 已提交
607
	spin_lock_init(&fc->bg_lock);
J
John Muir 已提交
608
	init_rwsem(&fc->killsb);
609
	refcount_set(&fc->count, 1);
610
	atomic_set(&fc->dev_count, 1);
611 612
	init_waitqueue_head(&fc->blocked_waitq);
	init_waitqueue_head(&fc->reserved_req_waitq);
M
Miklos Szeredi 已提交
613
	fuse_iqueue_init(&fc->iq);
614 615
	INIT_LIST_HEAD(&fc->bg_queue);
	INIT_LIST_HEAD(&fc->entry);
616
	INIT_LIST_HEAD(&fc->devices);
617
	atomic_set(&fc->num_waiting, 0);
618 619
	fc->max_background = FUSE_DEFAULT_MAX_BACKGROUND;
	fc->congestion_threshold = FUSE_DEFAULT_CONGESTION_THRESHOLD;
M
Miklos Szeredi 已提交
620
	atomic64_set(&fc->khctr, 0);
621
	fc->polled_files = RB_ROOT;
622
	fc->blocked = 0;
M
Maxim Patlasov 已提交
623
	fc->initialized = 0;
624
	fc->connected = 1;
625
	atomic64_set(&fc->attr_version, 1);
626
	get_random_bytes(&fc->scramble_key, sizeof(fc->scramble_key));
627
	fc->pid_ns = get_pid_ns(task_active_pid_ns(current));
628
	fc->user_ns = get_user_ns(user_ns);
M
Miklos Szeredi 已提交
629
	fc->max_pages = FUSE_DEFAULT_MAX_PAGES_PER_REQ;
M
Miklos Szeredi 已提交
630
}
631
EXPORT_SYMBOL_GPL(fuse_conn_init);
M
Miklos Szeredi 已提交
632

633 634
void fuse_conn_put(struct fuse_conn *fc)
{
635
	if (refcount_dec_and_test(&fc->count)) {
636 637
		if (fc->destroy_req)
			fuse_request_free(fc->destroy_req);
638
		put_pid_ns(fc->pid_ns);
639
		put_user_ns(fc->user_ns);
T
Tejun Heo 已提交
640
		fc->release(fc);
641
	}
642
}
643
EXPORT_SYMBOL_GPL(fuse_conn_put);
644 645 646

struct fuse_conn *fuse_conn_get(struct fuse_conn *fc)
{
647
	refcount_inc(&fc->count);
648 649
	return fc;
}
650
EXPORT_SYMBOL_GPL(fuse_conn_get);
651

652
static struct inode *fuse_get_root_inode(struct super_block *sb, unsigned mode)
M
Miklos Szeredi 已提交
653 654 655 656 657 658
{
	struct fuse_attr attr;
	memset(&attr, 0, sizeof(attr));

	attr.mode = mode;
	attr.ino = FUSE_ROOT_ID;
659
	attr.nlink = 1;
660
	return fuse_iget(sb, 1, 0, &attr, 0, 0);
M
Miklos Szeredi 已提交
661 662
}

M
Miklos Szeredi 已提交
663
struct fuse_inode_handle {
M
Miklos Szeredi 已提交
664 665 666 667 668 669 670
	u64 nodeid;
	u32 generation;
};

static struct dentry *fuse_get_dentry(struct super_block *sb,
				      struct fuse_inode_handle *handle)
{
671
	struct fuse_conn *fc = get_fuse_conn_super(sb);
M
Miklos Szeredi 已提交
672 673 674 675 676 677 678 679
	struct inode *inode;
	struct dentry *entry;
	int err = -ESTALE;

	if (handle->nodeid == 0)
		goto out_err;

	inode = ilookup5(sb, handle->nodeid, fuse_inode_eq, &handle->nodeid);
680 681
	if (!inode) {
		struct fuse_entry_out outarg;
A
Al Viro 已提交
682
		const struct qstr name = QSTR_INIT(".", 1);
683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698

		if (!fc->export_support)
			goto out_err;

		err = fuse_lookup_name(sb, handle->nodeid, &name, &outarg,
				       &inode);
		if (err && err != -ENOENT)
			goto out_err;
		if (err || !inode) {
			err = -ESTALE;
			goto out_err;
		}
		err = -EIO;
		if (get_node_id(inode) != handle->nodeid)
			goto out_iput;
	}
M
Miklos Szeredi 已提交
699 700 701 702
	err = -ESTALE;
	if (inode->i_generation != handle->generation)
		goto out_iput;

703
	entry = d_obtain_alias(inode);
A
Al Viro 已提交
704
	if (!IS_ERR(entry) && get_node_id(inode) != FUSE_ROOT_ID)
M
Miklos Szeredi 已提交
705 706 707 708 709 710 711 712 713 714
		fuse_invalidate_entry_cache(entry);

	return entry;

 out_iput:
	iput(inode);
 out_err:
	return ERR_PTR(err);
}

A
Al Viro 已提交
715 716
static int fuse_encode_fh(struct inode *inode, u32 *fh, int *max_len,
			   struct inode *parent)
M
Miklos Szeredi 已提交
717
{
A
Al Viro 已提交
718
	int len = parent ? 6 : 3;
M
Miklos Szeredi 已提交
719 720 721
	u64 nodeid;
	u32 generation;

722 723
	if (*max_len < len) {
		*max_len = len;
724
		return  FILEID_INVALID;
725
	}
M
Miklos Szeredi 已提交
726 727 728 729 730 731 732 733

	nodeid = get_fuse_inode(inode)->nodeid;
	generation = inode->i_generation;

	fh[0] = (u32)(nodeid >> 32);
	fh[1] = (u32)(nodeid & 0xffffffff);
	fh[2] = generation;

A
Al Viro 已提交
734
	if (parent) {
M
Miklos Szeredi 已提交
735 736 737 738 739 740 741 742 743
		nodeid = get_fuse_inode(parent)->nodeid;
		generation = parent->i_generation;

		fh[3] = (u32)(nodeid >> 32);
		fh[4] = (u32)(nodeid & 0xffffffff);
		fh[5] = generation;
	}

	*max_len = len;
A
Al Viro 已提交
744
	return parent ? 0x82 : 0x81;
M
Miklos Szeredi 已提交
745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774
}

static struct dentry *fuse_fh_to_dentry(struct super_block *sb,
		struct fid *fid, int fh_len, int fh_type)
{
	struct fuse_inode_handle handle;

	if ((fh_type != 0x81 && fh_type != 0x82) || fh_len < 3)
		return NULL;

	handle.nodeid = (u64) fid->raw[0] << 32;
	handle.nodeid |= (u64) fid->raw[1];
	handle.generation = fid->raw[2];
	return fuse_get_dentry(sb, &handle);
}

static struct dentry *fuse_fh_to_parent(struct super_block *sb,
		struct fid *fid, int fh_len, int fh_type)
{
	struct fuse_inode_handle parent;

	if (fh_type != 0x82 || fh_len < 6)
		return NULL;

	parent.nodeid = (u64) fid->raw[3] << 32;
	parent.nodeid |= (u64) fid->raw[4];
	parent.generation = fid->raw[5];
	return fuse_get_dentry(sb, &parent);
}

775 776
static struct dentry *fuse_get_parent(struct dentry *child)
{
777
	struct inode *child_inode = d_inode(child);
778 779 780 781
	struct fuse_conn *fc = get_fuse_conn(child_inode);
	struct inode *inode;
	struct dentry *parent;
	struct fuse_entry_out outarg;
A
Al Viro 已提交
782
	const struct qstr name = QSTR_INIT("..", 2);
783 784 785 786 787 788 789
	int err;

	if (!fc->export_support)
		return ERR_PTR(-ESTALE);

	err = fuse_lookup_name(child_inode->i_sb, get_node_id(child_inode),
			       &name, &outarg, &inode);
790 791 792
	if (err) {
		if (err == -ENOENT)
			return ERR_PTR(-ESTALE);
793 794
		return ERR_PTR(err);
	}
795 796

	parent = d_obtain_alias(inode);
A
Al Viro 已提交
797
	if (!IS_ERR(parent) && get_node_id(inode) != FUSE_ROOT_ID)
798 799 800 801
		fuse_invalidate_entry_cache(parent);

	return parent;
}
M
Miklos Szeredi 已提交
802 803 804 805 806

static const struct export_operations fuse_export_operations = {
	.fh_to_dentry	= fuse_fh_to_dentry,
	.fh_to_parent	= fuse_fh_to_parent,
	.encode_fh	= fuse_encode_fh,
807
	.get_parent	= fuse_get_parent,
M
Miklos Szeredi 已提交
808 809
};

810
static const struct super_operations fuse_super_operations = {
M
Miklos Szeredi 已提交
811
	.alloc_inode    = fuse_alloc_inode,
A
Al Viro 已提交
812
	.free_inode     = fuse_free_inode,
813
	.evict_inode	= fuse_evict_inode,
M
Miklos Szeredi 已提交
814
	.write_inode	= fuse_write_inode,
M
Miklos Szeredi 已提交
815
	.drop_inode	= generic_delete_inode,
816
	.remount_fs	= fuse_remount_fs,
M
Miklos Szeredi 已提交
817
	.put_super	= fuse_put_super,
818
	.umount_begin	= fuse_umount_begin,
819
	.statfs		= fuse_statfs,
M
Miklos Szeredi 已提交
820 821 822
	.show_options	= fuse_show_options,
};

823 824 825
static void sanitize_global_limit(unsigned *limit)
{
	if (*limit == 0)
826
		*limit = ((totalram_pages() << PAGE_SHIFT) >> 13) /
827 828 829 830 831 832
			 sizeof(struct fuse_req);

	if (*limit >= 1 << 16)
		*limit = (1 << 16) - 1;
}

833
static int set_global_limit(const char *val, const struct kernel_param *kp)
834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855
{
	int rv;

	rv = param_set_uint(val, kp);
	if (rv)
		return rv;

	sanitize_global_limit((unsigned *)kp->arg);

	return 0;
}

static void process_init_limits(struct fuse_conn *fc, struct fuse_init_out *arg)
{
	int cap_sys_admin = capable(CAP_SYS_ADMIN);

	if (arg->minor < 13)
		return;

	sanitize_global_limit(&max_user_bgreq);
	sanitize_global_limit(&max_user_congthresh);

K
Kirill Tkhai 已提交
856
	spin_lock(&fc->bg_lock);
857 858 859 860 861 862 863 864 865 866 867 868 869
	if (arg->max_background) {
		fc->max_background = arg->max_background;

		if (!cap_sys_admin && fc->max_background > max_user_bgreq)
			fc->max_background = max_user_bgreq;
	}
	if (arg->congestion_threshold) {
		fc->congestion_threshold = arg->congestion_threshold;

		if (!cap_sys_admin &&
		    fc->congestion_threshold > max_user_congthresh)
			fc->congestion_threshold = max_user_congthresh;
	}
K
Kirill Tkhai 已提交
870
	spin_unlock(&fc->bg_lock);
871 872
}

873 874 875 876 877 878 879
static void process_init_reply(struct fuse_conn *fc, struct fuse_req *req)
{
	struct fuse_init_out *arg = &req->misc.init_out;

	if (req->out.h.error || arg->major != FUSE_KERNEL_VERSION)
		fc->conn_error = 1;
	else {
880 881
		unsigned long ra_pages;

882 883
		process_init_limits(fc, arg);

884
		if (arg->minor >= 6) {
885
			ra_pages = arg->max_readahead / PAGE_SIZE;
886 887
			if (arg->flags & FUSE_ASYNC_READ)
				fc->async_read = 1;
888 889
			if (!(arg->flags & FUSE_POSIX_LOCKS))
				fc->no_lock = 1;
M
Miklos Szeredi 已提交
890 891 892
			if (arg->minor >= 17) {
				if (!(arg->flags & FUSE_FLOCK_LOCKS))
					fc->no_flock = 1;
M
Miklos Szeredi 已提交
893 894 895
			} else {
				if (!(arg->flags & FUSE_POSIX_LOCKS))
					fc->no_flock = 1;
M
Miklos Szeredi 已提交
896
			}
897 898
			if (arg->flags & FUSE_ATOMIC_O_TRUNC)
				fc->atomic_o_trunc = 1;
899 900 901 902 903
			if (arg->minor >= 9) {
				/* LOOKUP has dependency on proto version */
				if (arg->flags & FUSE_EXPORT_SUPPORT)
					fc->export_support = 1;
			}
904 905
			if (arg->flags & FUSE_BIG_WRITES)
				fc->big_writes = 1;
906 907
			if (arg->flags & FUSE_DONT_MASK)
				fc->dont_mask = 1;
908 909
			if (arg->flags & FUSE_AUTO_INVAL_DATA)
				fc->auto_inval_data = 1;
910 911
			else if (arg->flags & FUSE_EXPLICIT_INVAL_DATA)
				fc->explicit_inval_data = 1;
912
			if (arg->flags & FUSE_DO_READDIRPLUS) {
913
				fc->do_readdirplus = 1;
914 915 916
				if (arg->flags & FUSE_READDIRPLUS_AUTO)
					fc->readdirplus_auto = 1;
			}
917 918
			if (arg->flags & FUSE_ASYNC_DIO)
				fc->async_dio = 1;
P
Pavel Emelyanov 已提交
919 920
			if (arg->flags & FUSE_WRITEBACK_CACHE)
				fc->writeback_cache = 1;
921 922
			if (arg->flags & FUSE_PARALLEL_DIROPS)
				fc->parallel_dirops = 1;
923 924
			if (arg->flags & FUSE_HANDLE_KILLPRIV)
				fc->handle_killpriv = 1;
925 926
			if (arg->time_gran && arg->time_gran <= 1000000000)
				fc->sb->s_time_gran = arg->time_gran;
S
Seth Forshee 已提交
927
			if ((arg->flags & FUSE_POSIX_ACL)) {
M
Miklos Szeredi 已提交
928
				fc->default_permissions = 1;
S
Seth Forshee 已提交
929 930 931
				fc->posix_acl = 1;
				fc->sb->s_xattr = fuse_acl_xattr_handlers;
			}
D
Dan Schatzberg 已提交
932 933
			if (arg->flags & FUSE_CACHE_SYMLINKS)
				fc->cache_symlinks = 1;
934 935
			if (arg->flags & FUSE_ABORT_ERROR)
				fc->abort_err = 1;
936 937 938 939 940
			if (arg->flags & FUSE_MAX_PAGES) {
				fc->max_pages =
					min_t(unsigned int, FUSE_MAX_MAX_PAGES,
					max_t(unsigned int, arg->max_pages, 1));
			}
941
		} else {
942
			ra_pages = fc->max_read / PAGE_SIZE;
943
			fc->no_lock = 1;
M
Miklos Szeredi 已提交
944
			fc->no_flock = 1;
945
		}
946

947 948
		fc->sb->s_bdi->ra_pages =
				min(fc->sb->s_bdi->ra_pages, ra_pages);
949 950
		fc->minor = arg->minor;
		fc->max_write = arg->minor < 5 ? 4096 : arg->max_write;
951
		fc->max_write = max_t(unsigned, 4096, fc->max_write);
952
		fc->conn_init = 1;
953
	}
954
	fuse_set_initialized(fc);
955
	wake_up_all(&fc->blocked_waitq);
956 957
}

958
static void fuse_send_init(struct fuse_conn *fc, struct fuse_req *req)
959 960
{
	struct fuse_init_in *arg = &req->misc.init_in;
M
Miklos Szeredi 已提交
961

962 963
	arg->major = FUSE_KERNEL_VERSION;
	arg->minor = FUSE_KERNEL_MINOR_VERSION;
964
	arg->max_readahead = fc->sb->s_bdi->ra_pages * PAGE_SIZE;
965
	arg->flags |= FUSE_ASYNC_READ | FUSE_POSIX_LOCKS | FUSE_ATOMIC_O_TRUNC |
M
Miklos Szeredi 已提交
966
		FUSE_EXPORT_SUPPORT | FUSE_BIG_WRITES | FUSE_DONT_MASK |
M
Miklos Szeredi 已提交
967
		FUSE_SPLICE_WRITE | FUSE_SPLICE_MOVE | FUSE_SPLICE_READ |
968
		FUSE_FLOCK_LOCKS | FUSE_HAS_IOCTL_DIR | FUSE_AUTO_INVAL_DATA |
P
Pavel Emelyanov 已提交
969
		FUSE_DO_READDIRPLUS | FUSE_READDIRPLUS_AUTO | FUSE_ASYNC_DIO |
970
		FUSE_WRITEBACK_CACHE | FUSE_NO_OPEN_SUPPORT |
971
		FUSE_PARALLEL_DIROPS | FUSE_HANDLE_KILLPRIV | FUSE_POSIX_ACL |
972
		FUSE_ABORT_ERROR | FUSE_MAX_PAGES | FUSE_CACHE_SYMLINKS |
973
		FUSE_NO_OPENDIR_SUPPORT | FUSE_EXPLICIT_INVAL_DATA;
974 975 976 977 978
	req->in.h.opcode = FUSE_INIT;
	req->in.numargs = 1;
	req->in.args[0].size = sizeof(*arg);
	req->in.args[0].value = arg;
	req->out.numargs = 1;
D
Daniel Mack 已提交
979
	/* Variable length argument used for backward compatibility
980 981 982 983 984 985
	   with interface version < 7.5.  Rest of init_out is zeroed
	   by do_get_request(), so a short reply is not a problem */
	req->out.argvar = 1;
	req->out.args[0].size = sizeof(struct fuse_init_out);
	req->out.args[0].value = &req->misc.init_out;
	req->end = process_init_reply;
986
	fuse_request_send_background(fc, req);
987 988
}

T
Tejun Heo 已提交
989 990
static void fuse_free_conn(struct fuse_conn *fc)
{
991
	WARN_ON(!list_empty(&fc->devices));
A
Al Viro 已提交
992
	kfree_rcu(fc, rcu);
T
Tejun Heo 已提交
993 994
}

995 996 997
static int fuse_bdi_init(struct fuse_conn *fc, struct super_block *sb)
{
	int err;
998
	char *suffix = "";
999

1000
	if (sb->s_bdev) {
1001
		suffix = "-fuseblk";
1002 1003 1004 1005 1006 1007 1008
		/*
		 * sb->s_bdi points to blkdev's bdi however we want to redirect
		 * it to our private bdi...
		 */
		bdi_put(sb->s_bdi);
		sb->s_bdi = &noop_backing_dev_info;
	}
1009 1010
	err = super_setup_bdi_name(sb, "%u:%u%s", MAJOR(fc->dev),
				   MINOR(fc->dev), suffix);
1011 1012 1013
	if (err)
		return err;

1014
	sb->s_bdi->ra_pages = VM_READAHEAD_PAGES;
1015 1016
	/* fuse does it's own writeback accounting */
	sb->s_bdi->capabilities = BDI_CAP_NO_ACCT_WB | BDI_CAP_STRICTLIMIT;
1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029

	/*
	 * For a single fuse filesystem use max 1% of dirty +
	 * writeback threshold.
	 *
	 * This gives about 1M of write buffer for memory maps on a
	 * machine with 1G and 10% dirty_ratio, which should be more
	 * than enough.
	 *
	 * Privileged users can raise it by writing to
	 *
	 *    /sys/class/bdi/<bdi>/max_ratio
	 */
1030
	bdi_set_max_ratio(sb->s_bdi, 1);
1031 1032 1033 1034

	return 0;
}

1035 1036 1037
struct fuse_dev *fuse_dev_alloc(struct fuse_conn *fc)
{
	struct fuse_dev *fud;
1038
	struct list_head *pq;
1039 1040

	fud = kzalloc(sizeof(struct fuse_dev), GFP_KERNEL);
1041 1042
	if (!fud)
		return NULL;
1043

1044 1045 1046 1047
	pq = kcalloc(FUSE_PQ_HASH_SIZE, sizeof(struct list_head), GFP_KERNEL);
	if (!pq) {
		kfree(fud);
		return NULL;
1048 1049
	}

1050 1051 1052 1053 1054 1055 1056 1057
	fud->pq.processing = pq;
	fud->fc = fuse_conn_get(fc);
	fuse_pqueue_init(&fud->pq);

	spin_lock(&fc->lock);
	list_add_tail(&fud->entry, &fc->devices);
	spin_unlock(&fc->lock);

1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072
	return fud;
}
EXPORT_SYMBOL_GPL(fuse_dev_alloc);

void fuse_dev_free(struct fuse_dev *fud)
{
	struct fuse_conn *fc = fud->fc;

	if (fc) {
		spin_lock(&fc->lock);
		list_del(&fud->entry);
		spin_unlock(&fc->lock);

		fuse_conn_put(fc);
	}
1073
	kfree(fud->pq.processing);
1074 1075 1076 1077
	kfree(fud);
}
EXPORT_SYMBOL_GPL(fuse_dev_free);

M
Miklos Szeredi 已提交
1078 1079
static int fuse_fill_super(struct super_block *sb, void *data, int silent)
{
1080
	struct fuse_dev *fud;
M
Miklos Szeredi 已提交
1081 1082 1083 1084
	struct fuse_conn *fc;
	struct inode *root;
	struct fuse_mount_data d;
	struct file *file;
1085
	struct dentry *root_dentry;
1086
	struct fuse_req *init_req;
M
Miklos Szeredi 已提交
1087
	int err;
M
Miklos Szeredi 已提交
1088
	int is_bdev = sb->s_bdev != NULL;
M
Miklos Szeredi 已提交
1089

1090
	err = -EINVAL;
1091
	if (sb->s_flags & SB_MANDLOCK)
1092
		goto err;
1093

1094
	sb->s_flags &= ~(SB_NOSEC | SB_I_VERSION);
A
Al Viro 已提交
1095

1096
	if (!parse_fuse_opt(data, &d, is_bdev, sb->s_user_ns))
1097
		goto err;
M
Miklos Szeredi 已提交
1098

M
Miklos Szeredi 已提交
1099
	if (is_bdev) {
1100
#ifdef CONFIG_BLOCK
1101
		err = -EINVAL;
M
Miklos Szeredi 已提交
1102
		if (!sb_set_blocksize(sb, d.blksize))
1103
			goto err;
1104
#endif
M
Miklos Szeredi 已提交
1105
	} else {
1106 1107
		sb->s_blocksize = PAGE_SIZE;
		sb->s_blocksize_bits = PAGE_SHIFT;
M
Miklos Szeredi 已提交
1108
	}
M
Miklos Szeredi 已提交
1109 1110
	sb->s_magic = FUSE_SUPER_MAGIC;
	sb->s_op = &fuse_super_operations;
S
Seth Forshee 已提交
1111
	sb->s_xattr = fuse_xattr_handlers;
M
Miklos Szeredi 已提交
1112
	sb->s_maxbytes = MAX_LFS_FILESIZE;
1113
	sb->s_time_gran = 1;
M
Miklos Szeredi 已提交
1114
	sb->s_export_op = &fuse_export_operations;
1115 1116 1117
	sb->s_iflags |= SB_I_IMA_UNVERIFIABLE_SIGNATURE;
	if (sb->s_user_ns != &init_user_ns)
		sb->s_iflags |= SB_I_UNTRUSTED_MOUNTER;
M
Miklos Szeredi 已提交
1118 1119

	file = fget(d.fd);
1120
	err = -EINVAL;
M
Miklos Szeredi 已提交
1121
	if (!file)
1122
		goto err;
M
Miklos Szeredi 已提交
1123

1124 1125 1126 1127 1128 1129
	/*
	 * Require mount to happen from the same user namespace which
	 * opened /dev/fuse to prevent potential attacks.
	 */
	if (file->f_op != &fuse_dev_operations ||
	    file->f_cred->user_ns != sb->s_user_ns)
1130
		goto err_fput;
M
Miklos Szeredi 已提交
1131

1132 1133 1134 1135 1136 1137 1138
	/*
	 * If we are not in the initial user namespace posix
	 * acls must be translated.
	 */
	if (sb->s_user_ns != &init_user_ns)
		sb->s_xattr = fuse_no_acl_xattr_handlers;

1139
	fc = kmalloc(sizeof(*fc), GFP_KERNEL);
1140 1141 1142
	err = -ENOMEM;
	if (!fc)
		goto err_fput;
M
Miklos Szeredi 已提交
1143

1144
	fuse_conn_init(fc, sb->s_user_ns);
1145
	fc->release = fuse_free_conn;
1146

1147 1148 1149 1150
	fud = fuse_dev_alloc(fc);
	if (!fud)
		goto err_put_conn;

1151
	fc->dev = sb->s_dev;
J
John Muir 已提交
1152
	fc->sb = sb;
1153 1154
	err = fuse_bdi_init(fc, sb);
	if (err)
1155
		goto err_dev_free;
1156

1157
	/* Handle umasking inside the fuse code */
1158
	if (sb->s_flags & SB_POSIXACL)
1159
		fc->dont_mask = 1;
1160
	sb->s_flags |= SB_POSIXACL;
1161

M
Miklos Szeredi 已提交
1162 1163
	fc->default_permissions = d.default_permissions;
	fc->allow_other = d.allow_other;
M
Miklos Szeredi 已提交
1164
	fc->user_id = d.user_id;
1165
	fc->group_id = d.group_id;
1166
	fc->max_read = max_t(unsigned, 4096, d.max_read);
M
Miklos Szeredi 已提交
1167

1168 1169 1170
	/* Used by get_root_inode() */
	sb->s_fs_info = fc;

M
Miklos Szeredi 已提交
1171
	err = -ENOMEM;
1172
	root = fuse_get_root_inode(sb, d.rootmode);
1173
	sb->s_d_op = &fuse_root_dentry_operations;
1174 1175
	root_dentry = d_make_root(root);
	if (!root_dentry)
1176
		goto err_dev_free;
1177
	/* Root dentry doesn't have .d_revalidate */
A
Al Viro 已提交
1178
	sb->s_d_op = &fuse_dentry_operations;
1179

1180
	init_req = fuse_request_alloc(0);
1181 1182
	if (!init_req)
		goto err_put_root;
M
Miklos Szeredi 已提交
1183
	__set_bit(FR_BACKGROUND, &init_req->flags);
1184

1185
	if (is_bdev) {
1186
		fc->destroy_req = fuse_request_alloc(0);
1187
		if (!fc->destroy_req)
J
Julia Lawall 已提交
1188
			goto err_free_init_req;
1189 1190
	}

1191
	mutex_lock(&fuse_mutex);
1192 1193
	err = -EINVAL;
	if (file->private_data)
1194
		goto err_unlock;
1195

1196 1197 1198 1199 1200
	err = fuse_ctl_add_conn(fc);
	if (err)
		goto err_unlock;

	list_add_tail(&fc->entry, &fuse_conn_list);
1201
	sb->s_root = root_dentry;
1202
	file->private_data = fud;
1203
	mutex_unlock(&fuse_mutex);
M
Miklos Szeredi 已提交
1204 1205 1206 1207 1208 1209
	/*
	 * atomic_dec_and_test() in fput() provides the necessary
	 * memory barrier for file->private_data to be visible on all
	 * CPUs after this
	 */
	fput(file);
1210

1211
	fuse_send_init(fc, init_req);
1212

M
Miklos Szeredi 已提交
1213 1214
	return 0;

1215 1216
 err_unlock:
	mutex_unlock(&fuse_mutex);
J
Julia Lawall 已提交
1217
 err_free_init_req:
1218
	fuse_request_free(init_req);
1219 1220
 err_put_root:
	dput(root_dentry);
1221 1222
 err_dev_free:
	fuse_dev_free(fud);
1223
 err_put_conn:
1224
	fuse_conn_put(fc);
1225
	sb->s_fs_info = NULL;
1226 1227 1228
 err_fput:
	fput(file);
 err:
M
Miklos Szeredi 已提交
1229 1230 1231
	return err;
}

A
Al Viro 已提交
1232
static struct dentry *fuse_mount(struct file_system_type *fs_type,
1233
		       int flags, const char *dev_name,
A
Al Viro 已提交
1234
		       void *raw_data)
M
Miklos Szeredi 已提交
1235
{
A
Al Viro 已提交
1236
	return mount_nodev(fs_type, flags, raw_data, fuse_fill_super);
M
Miklos Szeredi 已提交
1237 1238
}

1239
static void fuse_sb_destroy(struct super_block *sb)
J
John Muir 已提交
1240 1241 1242 1243
{
	struct fuse_conn *fc = get_fuse_conn_super(sb);

	if (fc) {
1244 1245
		fuse_send_destroy(fc);

M
Miklos Szeredi 已提交
1246
		fuse_abort_conn(fc);
1247 1248
		fuse_wait_aborted(fc);

J
John Muir 已提交
1249 1250 1251 1252
		down_write(&fc->killsb);
		fc->sb = NULL;
		up_write(&fc->killsb);
	}
1253
}
J
John Muir 已提交
1254

1255 1256 1257
static void fuse_kill_sb_anon(struct super_block *sb)
{
	fuse_sb_destroy(sb);
J
John Muir 已提交
1258 1259 1260
	kill_anon_super(sb);
}

1261 1262 1263
static struct file_system_type fuse_fs_type = {
	.owner		= THIS_MODULE,
	.name		= "fuse",
1264
	.fs_flags	= FS_HAS_SUBTYPE | FS_USERNS_MOUNT,
A
Al Viro 已提交
1265
	.mount		= fuse_mount,
J
John Muir 已提交
1266
	.kill_sb	= fuse_kill_sb_anon,
1267
};
1268
MODULE_ALIAS_FS("fuse");
1269 1270

#ifdef CONFIG_BLOCK
A
Al Viro 已提交
1271
static struct dentry *fuse_mount_blk(struct file_system_type *fs_type,
1272
			   int flags, const char *dev_name,
A
Al Viro 已提交
1273
			   void *raw_data)
1274
{
A
Al Viro 已提交
1275
	return mount_bdev(fs_type, flags, dev_name, raw_data, fuse_fill_super);
1276 1277
}

J
John Muir 已提交
1278 1279
static void fuse_kill_sb_blk(struct super_block *sb)
{
1280
	fuse_sb_destroy(sb);
J
John Muir 已提交
1281 1282 1283
	kill_block_super(sb);
}

1284 1285 1286
static struct file_system_type fuseblk_fs_type = {
	.owner		= THIS_MODULE,
	.name		= "fuseblk",
A
Al Viro 已提交
1287
	.mount		= fuse_mount_blk,
J
John Muir 已提交
1288
	.kill_sb	= fuse_kill_sb_blk,
A
Alexey Dobriyan 已提交
1289
	.fs_flags	= FS_REQUIRES_DEV | FS_HAS_SUBTYPE,
1290
};
1291
MODULE_ALIAS_FS("fuseblk");
1292

1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312
static inline int register_fuseblk(void)
{
	return register_filesystem(&fuseblk_fs_type);
}

static inline void unregister_fuseblk(void)
{
	unregister_filesystem(&fuseblk_fs_type);
}
#else
static inline int register_fuseblk(void)
{
	return 0;
}

static inline void unregister_fuseblk(void)
{
}
#endif

1313
static void fuse_inode_init_once(void *foo)
M
Miklos Szeredi 已提交
1314
{
M
Miklos Szeredi 已提交
1315
	struct inode *inode = foo;
M
Miklos Szeredi 已提交
1316

C
Christoph Lameter 已提交
1317
	inode_init_once(inode);
M
Miklos Szeredi 已提交
1318 1319 1320 1321 1322 1323
}

static int __init fuse_fs_init(void)
{
	int err;

1324
	fuse_inode_cachep = kmem_cache_create("fuse_inode",
1325 1326 1327
			sizeof(struct fuse_inode), 0,
			SLAB_HWCACHE_ALIGN|SLAB_ACCOUNT|SLAB_RECLAIM_ACCOUNT,
			fuse_inode_init_once);
1328 1329
	err = -ENOMEM;
	if (!fuse_inode_cachep)
1330 1331 1332 1333 1334 1335 1336 1337 1338
		goto out;

	err = register_fuseblk();
	if (err)
		goto out2;

	err = register_filesystem(&fuse_fs_type);
	if (err)
		goto out3;
1339 1340

	return 0;
M
Miklos Szeredi 已提交
1341

1342
 out3:
1343
	unregister_fuseblk();
1344 1345
 out2:
	kmem_cache_destroy(fuse_inode_cachep);
1346
 out:
M
Miklos Szeredi 已提交
1347 1348 1349 1350 1351 1352
	return err;
}

static void fuse_fs_cleanup(void)
{
	unregister_filesystem(&fuse_fs_type);
1353
	unregister_fuseblk();
1354 1355 1356 1357 1358 1359

	/*
	 * Make sure all delayed rcu free inodes are flushed before we
	 * destroy cache.
	 */
	rcu_barrier();
M
Miklos Szeredi 已提交
1360 1361 1362
	kmem_cache_destroy(fuse_inode_cachep);
}

1363 1364
static struct kobject *fuse_kobj;

1365 1366 1367 1368
static int fuse_sysfs_init(void)
{
	int err;

1369
	fuse_kobj = kobject_create_and_add("fuse", fs_kobj);
1370 1371
	if (!fuse_kobj) {
		err = -ENOMEM;
1372
		goto out_err;
1373
	}
1374

1375 1376
	err = sysfs_create_mount_point(fuse_kobj, "connections");
	if (err)
1377 1378 1379 1380 1381
		goto out_fuse_unregister;

	return 0;

 out_fuse_unregister:
1382
	kobject_put(fuse_kobj);
1383 1384 1385 1386 1387 1388
 out_err:
	return err;
}

static void fuse_sysfs_cleanup(void)
{
1389
	sysfs_remove_mount_point(fuse_kobj, "connections");
1390
	kobject_put(fuse_kobj);
1391 1392
}

M
Miklos Szeredi 已提交
1393 1394 1395 1396
static int __init fuse_init(void)
{
	int res;

K
Kirill Smelkov 已提交
1397 1398
	pr_info("init (API version %i.%i)\n",
		FUSE_KERNEL_VERSION, FUSE_KERNEL_MINOR_VERSION);
M
Miklos Szeredi 已提交
1399

1400
	INIT_LIST_HEAD(&fuse_conn_list);
M
Miklos Szeredi 已提交
1401 1402 1403 1404
	res = fuse_fs_init();
	if (res)
		goto err;

M
Miklos Szeredi 已提交
1405 1406 1407 1408
	res = fuse_dev_init();
	if (res)
		goto err_fs_cleanup;

1409 1410 1411 1412
	res = fuse_sysfs_init();
	if (res)
		goto err_dev_cleanup;

1413 1414 1415 1416
	res = fuse_ctl_init();
	if (res)
		goto err_sysfs_cleanup;

1417 1418 1419
	sanitize_global_limit(&max_user_bgreq);
	sanitize_global_limit(&max_user_congthresh);

M
Miklos Szeredi 已提交
1420 1421
	return 0;

1422 1423
 err_sysfs_cleanup:
	fuse_sysfs_cleanup();
1424 1425
 err_dev_cleanup:
	fuse_dev_cleanup();
M
Miklos Szeredi 已提交
1426 1427
 err_fs_cleanup:
	fuse_fs_cleanup();
M
Miklos Szeredi 已提交
1428 1429 1430 1431 1432 1433
 err:
	return res;
}

static void __exit fuse_exit(void)
{
K
Kirill Smelkov 已提交
1434
	pr_debug("exit\n");
M
Miklos Szeredi 已提交
1435

1436
	fuse_ctl_cleanup();
1437
	fuse_sysfs_cleanup();
M
Miklos Szeredi 已提交
1438
	fuse_fs_cleanup();
M
Miklos Szeredi 已提交
1439
	fuse_dev_cleanup();
M
Miklos Szeredi 已提交
1440 1441 1442 1443
}

module_init(fuse_init);
module_exit(fuse_exit);