inode.c 26.7 KB
Newer Older
M
Miklos Szeredi 已提交
1 2
/*
  FUSE: Filesystem in Userspace
M
Miklos Szeredi 已提交
3
  Copyright (C) 2001-2008  Miklos Szeredi <miklos@szeredi.hu>
M
Miklos Szeredi 已提交
4 5 6 7 8 9 10 11 12 13 14 15 16 17 18

  This program can be distributed under the terms of the GNU GPL.
  See the file COPYING.
*/

#include "fuse_i.h"

#include <linux/pagemap.h>
#include <linux/slab.h>
#include <linux/file.h>
#include <linux/seq_file.h>
#include <linux/init.h>
#include <linux/module.h>
#include <linux/parser.h>
#include <linux/statfs.h>
19
#include <linux/random.h>
A
Alexey Dobriyan 已提交
20
#include <linux/sched.h>
M
Miklos Szeredi 已提交
21
#include <linux/exportfs.h>
M
Miklos Szeredi 已提交
22 23 24 25 26

MODULE_AUTHOR("Miklos Szeredi <miklos@szeredi.hu>");
MODULE_DESCRIPTION("Filesystem in Userspace");
MODULE_LICENSE("GPL");

27
static struct kmem_cache *fuse_inode_cachep;
28 29
struct list_head fuse_conn_list;
DEFINE_MUTEX(fuse_mutex);
M
Miklos Szeredi 已提交
30 31 32

#define FUSE_SUPER_MAGIC 0x65735546

M
Miklos Szeredi 已提交
33 34
#define FUSE_DEFAULT_BLKSIZE 512

35 36 37 38 39 40
/** Maximum number of outstanding background requests */
#define FUSE_DEFAULT_MAX_BACKGROUND 12

/** Congestion starts at 75% of maximum */
#define FUSE_DEFAULT_CONGESTION_THRESHOLD (FUSE_DEFAULT_MAX_BACKGROUND * 3 / 4)

M
Miklos Szeredi 已提交
41 42 43 44
struct fuse_mount_data {
	int fd;
	unsigned rootmode;
	unsigned user_id;
45
	unsigned group_id;
M
Miklos Szeredi 已提交
46 47 48 49
	unsigned fd_present:1;
	unsigned rootmode_present:1;
	unsigned user_id_present:1;
	unsigned group_id_present:1;
M
Miklos Szeredi 已提交
50
	unsigned flags;
51
	unsigned max_read;
M
Miklos Szeredi 已提交
52
	unsigned blksize;
M
Miklos Szeredi 已提交
53 54 55 56 57 58 59
};

static struct inode *fuse_alloc_inode(struct super_block *sb)
{
	struct inode *inode;
	struct fuse_inode *fi;

60
	inode = kmem_cache_alloc(fuse_inode_cachep, GFP_KERNEL);
M
Miklos Szeredi 已提交
61 62 63 64
	if (!inode)
		return NULL;

	fi = get_fuse_inode(inode);
M
Miklos Szeredi 已提交
65
	fi->i_time = 0;
M
Miklos Szeredi 已提交
66
	fi->nodeid = 0;
67
	fi->nlookup = 0;
68
	fi->attr_version = 0;
M
Miklos Szeredi 已提交
69
	fi->writectr = 0;
70
	INIT_LIST_HEAD(&fi->write_files);
M
Miklos Szeredi 已提交
71 72 73
	INIT_LIST_HEAD(&fi->queued_writes);
	INIT_LIST_HEAD(&fi->writepages);
	init_waitqueue_head(&fi->page_waitq);
74 75 76 77 78
	fi->forget_req = fuse_request_alloc();
	if (!fi->forget_req) {
		kmem_cache_free(fuse_inode_cachep, inode);
		return NULL;
	}
M
Miklos Szeredi 已提交
79 80 81 82 83 84

	return inode;
}

static void fuse_destroy_inode(struct inode *inode)
{
85
	struct fuse_inode *fi = get_fuse_inode(inode);
86
	BUG_ON(!list_empty(&fi->write_files));
M
Miklos Szeredi 已提交
87
	BUG_ON(!list_empty(&fi->queued_writes));
88 89
	if (fi->forget_req)
		fuse_request_free(fi->forget_req);
M
Miklos Szeredi 已提交
90 91 92
	kmem_cache_free(fuse_inode_cachep, inode);
}

93
void fuse_send_forget(struct fuse_conn *fc, struct fuse_req *req,
M
Miklos Szeredi 已提交
94
		      u64 nodeid, u64 nlookup)
95 96
{
	struct fuse_forget_in *inarg = &req->misc.forget_in;
97
	inarg->nlookup = nlookup;
98 99 100 101 102
	req->in.h.opcode = FUSE_FORGET;
	req->in.h.nodeid = nodeid;
	req->in.numargs = 1;
	req->in.args[0].size = sizeof(struct fuse_forget_in);
	req->in.args[0].value = inarg;
103
	fuse_request_send_noreply(fc, req);
104 105
}

M
Miklos Szeredi 已提交
106 107
static void fuse_clear_inode(struct inode *inode)
{
M
Miklos Szeredi 已提交
108 109
	if (inode->i_sb->s_flags & MS_ACTIVE) {
		struct fuse_conn *fc = get_fuse_conn(inode);
110
		struct fuse_inode *fi = get_fuse_inode(inode);
111
		fuse_send_forget(fc, fi->forget_req, fi->nodeid, fi->nlookup);
112 113
		fi->forget_req = NULL;
	}
M
Miklos Szeredi 已提交
114 115
}

116 117 118 119 120 121 122 123
static int fuse_remount_fs(struct super_block *sb, int *flags, char *data)
{
	if (*flags & MS_MANDLOCK)
		return -EINVAL;

	return 0;
}

M
Miklos Szeredi 已提交
124
void fuse_truncate(struct address_space *mapping, loff_t offset)
125 126 127 128 129 130 131
{
	/* See vmtruncate() */
	unmap_mapping_range(mapping, offset + PAGE_SIZE - 1, 0, 1);
	truncate_inode_pages(mapping, offset);
	unmap_mapping_range(mapping, offset + PAGE_SIZE - 1, 0, 1);
}

M
Miklos Szeredi 已提交
132 133
void fuse_change_attributes_common(struct inode *inode, struct fuse_attr *attr,
				   u64 attr_valid)
M
Miklos Szeredi 已提交
134
{
M
Miklos Szeredi 已提交
135
	struct fuse_conn *fc = get_fuse_conn(inode);
136
	struct fuse_inode *fi = get_fuse_inode(inode);
M
Miklos Szeredi 已提交
137

138 139 140
	fi->attr_version = ++fc->attr_version;
	fi->i_time = attr_valid;

M
Miklos Szeredi 已提交
141
	inode->i_ino     = attr->ino;
142
	inode->i_mode    = (inode->i_mode & S_IFMT) | (attr->mode & 07777);
M
Miklos Szeredi 已提交
143 144 145 146 147 148 149 150 151 152
	inode->i_nlink   = attr->nlink;
	inode->i_uid     = attr->uid;
	inode->i_gid     = attr->gid;
	inode->i_blocks  = attr->blocks;
	inode->i_atime.tv_sec   = attr->atime;
	inode->i_atime.tv_nsec  = attr->atimensec;
	inode->i_mtime.tv_sec   = attr->mtime;
	inode->i_mtime.tv_nsec  = attr->mtimensec;
	inode->i_ctime.tv_sec   = attr->ctime;
	inode->i_ctime.tv_nsec  = attr->ctimensec;
153

154 155 156 157 158
	if (attr->blksize != 0)
		inode->i_blkbits = ilog2(attr->blksize);
	else
		inode->i_blkbits = inode->i_sb->s_blocksize_bits;

159 160 161 162 163 164 165 166
	/*
	 * Don't set the sticky bit in i_mode, unless we want the VFS
	 * to check permissions.  This prevents failures due to the
	 * check in may_delete().
	 */
	fi->orig_i_mode = inode->i_mode;
	if (!(fc->flags & FUSE_DEFAULT_PERMISSIONS))
		inode->i_mode &= ~S_ISVTX;
M
Miklos Szeredi 已提交
167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182
}

void fuse_change_attributes(struct inode *inode, struct fuse_attr *attr,
			    u64 attr_valid, u64 attr_version)
{
	struct fuse_conn *fc = get_fuse_conn(inode);
	struct fuse_inode *fi = get_fuse_inode(inode);
	loff_t oldsize;

	spin_lock(&fc->lock);
	if (attr_version != 0 && fi->attr_version > attr_version) {
		spin_unlock(&fc->lock);
		return;
	}

	fuse_change_attributes_common(inode, attr, attr_valid);
183

184 185 186 187 188 189 190
	oldsize = inode->i_size;
	i_size_write(inode, attr->size);
	spin_unlock(&fc->lock);

	if (S_ISREG(inode->i_mode) && oldsize != attr->size) {
		if (attr->size < oldsize)
			fuse_truncate(inode->i_mapping, attr->size);
M
Miklos Szeredi 已提交
191
		invalidate_inode_pages2(inode->i_mapping);
192
	}
M
Miklos Szeredi 已提交
193 194 195 196 197
}

static void fuse_init_inode(struct inode *inode, struct fuse_attr *attr)
{
	inode->i_mode = attr->mode & S_IFMT;
M
Miklos Szeredi 已提交
198
	inode->i_size = attr->size;
199 200
	if (S_ISREG(inode->i_mode)) {
		fuse_init_common(inode);
M
Miklos Szeredi 已提交
201
		fuse_init_file_inode(inode);
202 203 204 205 206 207 208 209 210
	} else if (S_ISDIR(inode->i_mode))
		fuse_init_dir(inode);
	else if (S_ISLNK(inode->i_mode))
		fuse_init_symlink(inode);
	else if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode) ||
		 S_ISFIFO(inode->i_mode) || S_ISSOCK(inode->i_mode)) {
		fuse_init_common(inode);
		init_special_inode(inode, inode->i_mode,
				   new_decode_dev(attr->rdev));
211 212
	} else
		BUG();
M
Miklos Szeredi 已提交
213 214
}

J
John Muir 已提交
215
int fuse_inode_eq(struct inode *inode, void *_nodeidp)
M
Miklos Szeredi 已提交
216
{
M
Miklos Szeredi 已提交
217
	u64 nodeid = *(u64 *) _nodeidp;
M
Miklos Szeredi 已提交
218 219 220 221 222 223 224 225
	if (get_node_id(inode) == nodeid)
		return 1;
	else
		return 0;
}

static int fuse_inode_set(struct inode *inode, void *_nodeidp)
{
M
Miklos Szeredi 已提交
226
	u64 nodeid = *(u64 *) _nodeidp;
M
Miklos Szeredi 已提交
227 228 229 230
	get_fuse_inode(inode)->nodeid = nodeid;
	return 0;
}

M
Miklos Szeredi 已提交
231
struct inode *fuse_iget(struct super_block *sb, u64 nodeid,
232 233
			int generation, struct fuse_attr *attr,
			u64 attr_valid, u64 attr_version)
M
Miklos Szeredi 已提交
234 235
{
	struct inode *inode;
236
	struct fuse_inode *fi;
M
Miklos Szeredi 已提交
237 238 239 240 241 242 243 244
	struct fuse_conn *fc = get_fuse_conn_super(sb);

 retry:
	inode = iget5_locked(sb, nodeid, fuse_inode_eq, fuse_inode_set, &nodeid);
	if (!inode)
		return NULL;

	if ((inode->i_state & I_NEW)) {
245
		inode->i_flags |= S_NOATIME|S_NOCMTIME;
M
Miklos Szeredi 已提交
246 247 248 249 250 251 252 253 254 255 256
		inode->i_generation = generation;
		inode->i_data.backing_dev_info = &fc->bdi;
		fuse_init_inode(inode, attr);
		unlock_new_inode(inode);
	} else if ((inode->i_mode ^ attr->mode) & S_IFMT) {
		/* Inode has changed type, any I/O on the old should fail */
		make_bad_inode(inode);
		iput(inode);
		goto retry;
	}

257
	fi = get_fuse_inode(inode);
258
	spin_lock(&fc->lock);
M
Miklos Szeredi 已提交
259
	fi->nlookup++;
260
	spin_unlock(&fc->lock);
261 262
	fuse_change_attributes(inode, attr, attr_valid, attr_version);

M
Miklos Szeredi 已提交
263 264 265
	return inode;
}

J
John Muir 已提交
266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290
int fuse_reverse_inval_inode(struct super_block *sb, u64 nodeid,
			     loff_t offset, loff_t len)
{
	struct inode *inode;
	pgoff_t pg_start;
	pgoff_t pg_end;

	inode = ilookup5(sb, nodeid, fuse_inode_eq, &nodeid);
	if (!inode)
		return -ENOENT;

	fuse_invalidate_attr(inode);
	if (offset >= 0) {
		pg_start = offset >> PAGE_CACHE_SHIFT;
		if (len <= 0)
			pg_end = -1;
		else
			pg_end = (offset + len - 1) >> PAGE_CACHE_SHIFT;
		invalidate_inode_pages2_range(inode->i_mapping,
					      pg_start, pg_end);
	}
	iput(inode);
	return 0;
}

291
static void fuse_umount_begin(struct super_block *sb)
292
{
293
	fuse_abort_conn(get_fuse_conn_super(sb));
294 295
}

296 297 298 299 300 301 302
static void fuse_send_destroy(struct fuse_conn *fc)
{
	struct fuse_req *req = fc->destroy_req;
	if (req && fc->conn_init) {
		fc->destroy_req = NULL;
		req->in.h.opcode = FUSE_DESTROY;
		req->force = 1;
303
		fuse_request_send(fc, req);
304 305 306 307
		fuse_put_request(fc, req);
	}
}

308
static void fuse_bdi_destroy(struct fuse_conn *fc)
M
Miklos Szeredi 已提交
309
{
310 311 312
	if (fc->bdi_initialized)
		bdi_destroy(&fc->bdi);
}
M
Miklos Szeredi 已提交
313

314
void fuse_conn_kill(struct fuse_conn *fc)
315
{
316
	spin_lock(&fc->lock);
317
	fc->connected = 0;
318
	fc->blocked = 0;
319
	spin_unlock(&fc->lock);
M
Miklos Szeredi 已提交
320
	/* Flush all readers on this fs */
321
	kill_fasync(&fc->fasync, SIGIO, POLL_IN);
M
Miklos Szeredi 已提交
322
	wake_up_all(&fc->waitq);
323
	wake_up_all(&fc->blocked_waitq);
324
	wake_up_all(&fc->reserved_req_waitq);
325 326 327 328
	mutex_lock(&fuse_mutex);
	list_del(&fc->entry);
	fuse_ctl_remove_conn(fc);
	mutex_unlock(&fuse_mutex);
329 330
	fuse_bdi_destroy(fc);
}
331
EXPORT_SYMBOL_GPL(fuse_conn_kill);
332 333 334 335 336 337 338

static void fuse_put_super(struct super_block *sb)
{
	struct fuse_conn *fc = get_fuse_conn_super(sb);

	fuse_send_destroy(fc);
	fuse_conn_kill(fc);
339
	fuse_conn_put(fc);
M
Miklos Szeredi 已提交
340 341
}

342 343 344 345
static void convert_fuse_statfs(struct kstatfs *stbuf, struct fuse_kstatfs *attr)
{
	stbuf->f_type    = FUSE_SUPER_MAGIC;
	stbuf->f_bsize   = attr->bsize;
346
	stbuf->f_frsize  = attr->frsize;
347 348 349 350 351 352 353 354 355
	stbuf->f_blocks  = attr->blocks;
	stbuf->f_bfree   = attr->bfree;
	stbuf->f_bavail  = attr->bavail;
	stbuf->f_files   = attr->files;
	stbuf->f_ffree   = attr->ffree;
	stbuf->f_namelen = attr->namelen;
	/* fsid is left zero */
}

356
static int fuse_statfs(struct dentry *dentry, struct kstatfs *buf)
357
{
358
	struct super_block *sb = dentry->d_sb;
359 360 361 362 363
	struct fuse_conn *fc = get_fuse_conn_super(sb);
	struct fuse_req *req;
	struct fuse_statfs_out outarg;
	int err;

M
Miklos Szeredi 已提交
364 365 366 367 368
	if (!fuse_allow_task(fc, current)) {
		buf->f_type = FUSE_SUPER_MAGIC;
		return 0;
	}

369 370 371
	req = fuse_get_req(fc);
	if (IS_ERR(req))
		return PTR_ERR(req);
372

373
	memset(&outarg, 0, sizeof(outarg));
374 375
	req->in.numargs = 0;
	req->in.h.opcode = FUSE_STATFS;
376
	req->in.h.nodeid = get_node_id(dentry->d_inode);
377
	req->out.numargs = 1;
378 379
	req->out.args[0].size =
		fc->minor < 4 ? FUSE_COMPAT_STATFS_SIZE : sizeof(outarg);
380
	req->out.args[0].value = &outarg;
381
	fuse_request_send(fc, req);
382 383 384 385 386 387 388
	err = req->out.h.error;
	if (!err)
		convert_fuse_statfs(buf, &outarg.st);
	fuse_put_request(fc, req);
	return err;
}

M
Miklos Szeredi 已提交
389 390 391 392
enum {
	OPT_FD,
	OPT_ROOTMODE,
	OPT_USER_ID,
393
	OPT_GROUP_ID,
M
Miklos Szeredi 已提交
394 395
	OPT_DEFAULT_PERMISSIONS,
	OPT_ALLOW_OTHER,
396
	OPT_MAX_READ,
M
Miklos Szeredi 已提交
397
	OPT_BLKSIZE,
M
Miklos Szeredi 已提交
398 399 400
	OPT_ERR
};

401
static const match_table_t tokens = {
M
Miklos Szeredi 已提交
402 403 404
	{OPT_FD,			"fd=%u"},
	{OPT_ROOTMODE,			"rootmode=%o"},
	{OPT_USER_ID,			"user_id=%u"},
405
	{OPT_GROUP_ID,			"group_id=%u"},
M
Miklos Szeredi 已提交
406 407
	{OPT_DEFAULT_PERMISSIONS,	"default_permissions"},
	{OPT_ALLOW_OTHER,		"allow_other"},
408
	{OPT_MAX_READ,			"max_read=%u"},
M
Miklos Szeredi 已提交
409
	{OPT_BLKSIZE,			"blksize=%u"},
M
Miklos Szeredi 已提交
410 411 412
	{OPT_ERR,			NULL}
};

M
Miklos Szeredi 已提交
413
static int parse_fuse_opt(char *opt, struct fuse_mount_data *d, int is_bdev)
M
Miklos Szeredi 已提交
414 415 416
{
	char *p;
	memset(d, 0, sizeof(struct fuse_mount_data));
417
	d->max_read = ~0;
M
Miklos Szeredi 已提交
418
	d->blksize = FUSE_DEFAULT_BLKSIZE;
M
Miklos Szeredi 已提交
419 420 421 422 423 424 425 426 427 428 429 430 431 432

	while ((p = strsep(&opt, ",")) != NULL) {
		int token;
		int value;
		substring_t args[MAX_OPT_ARGS];
		if (!*p)
			continue;

		token = match_token(p, tokens, args);
		switch (token) {
		case OPT_FD:
			if (match_int(&args[0], &value))
				return 0;
			d->fd = value;
433
			d->fd_present = 1;
M
Miklos Szeredi 已提交
434 435 436 437 438
			break;

		case OPT_ROOTMODE:
			if (match_octal(&args[0], &value))
				return 0;
439 440
			if (!fuse_valid_type(value))
				return 0;
M
Miklos Szeredi 已提交
441
			d->rootmode = value;
442
			d->rootmode_present = 1;
M
Miklos Szeredi 已提交
443 444 445 446 447 448
			break;

		case OPT_USER_ID:
			if (match_int(&args[0], &value))
				return 0;
			d->user_id = value;
449
			d->user_id_present = 1;
M
Miklos Szeredi 已提交
450 451
			break;

452 453 454 455
		case OPT_GROUP_ID:
			if (match_int(&args[0], &value))
				return 0;
			d->group_id = value;
456
			d->group_id_present = 1;
457 458
			break;

M
Miklos Szeredi 已提交
459 460 461 462 463 464 465 466
		case OPT_DEFAULT_PERMISSIONS:
			d->flags |= FUSE_DEFAULT_PERMISSIONS;
			break;

		case OPT_ALLOW_OTHER:
			d->flags |= FUSE_ALLOW_OTHER;
			break;

467 468 469 470 471 472
		case OPT_MAX_READ:
			if (match_int(&args[0], &value))
				return 0;
			d->max_read = value;
			break;

M
Miklos Szeredi 已提交
473 474 475 476 477 478
		case OPT_BLKSIZE:
			if (!is_bdev || match_int(&args[0], &value))
				return 0;
			d->blksize = value;
			break;

M
Miklos Szeredi 已提交
479 480 481 482
		default:
			return 0;
		}
	}
483 484 485

	if (!d->fd_present || !d->rootmode_present ||
	    !d->user_id_present || !d->group_id_present)
M
Miklos Szeredi 已提交
486 487 488 489 490 491 492 493 494 495
		return 0;

	return 1;
}

static int fuse_show_options(struct seq_file *m, struct vfsmount *mnt)
{
	struct fuse_conn *fc = get_fuse_conn_super(mnt->mnt_sb);

	seq_printf(m, ",user_id=%u", fc->user_id);
496
	seq_printf(m, ",group_id=%u", fc->group_id);
M
Miklos Szeredi 已提交
497 498 499 500
	if (fc->flags & FUSE_DEFAULT_PERMISSIONS)
		seq_puts(m, ",default_permissions");
	if (fc->flags & FUSE_ALLOW_OTHER)
		seq_puts(m, ",allow_other");
501 502
	if (fc->max_read != ~0)
		seq_printf(m, ",max_read=%u", fc->max_read);
M
Miklos Szeredi 已提交
503 504 505
	if (mnt->mnt_sb->s_bdev &&
	    mnt->mnt_sb->s_blocksize != FUSE_DEFAULT_BLKSIZE)
		seq_printf(m, ",blksize=%lu", mnt->mnt_sb->s_blocksize);
M
Miklos Szeredi 已提交
506 507 508
	return 0;
}

509
void fuse_conn_init(struct fuse_conn *fc)
M
Miklos Szeredi 已提交
510
{
511 512 513
	memset(fc, 0, sizeof(*fc));
	spin_lock_init(&fc->lock);
	mutex_init(&fc->inst_mutex);
J
John Muir 已提交
514
	init_rwsem(&fc->killsb);
515 516 517 518 519 520 521 522 523 524 525
	atomic_set(&fc->count, 1);
	init_waitqueue_head(&fc->waitq);
	init_waitqueue_head(&fc->blocked_waitq);
	init_waitqueue_head(&fc->reserved_req_waitq);
	INIT_LIST_HEAD(&fc->pending);
	INIT_LIST_HEAD(&fc->processing);
	INIT_LIST_HEAD(&fc->io);
	INIT_LIST_HEAD(&fc->interrupts);
	INIT_LIST_HEAD(&fc->bg_queue);
	INIT_LIST_HEAD(&fc->entry);
	atomic_set(&fc->num_waiting, 0);
526 527
	fc->max_background = FUSE_DEFAULT_MAX_BACKGROUND;
	fc->congestion_threshold = FUSE_DEFAULT_CONGESTION_THRESHOLD;
528 529 530 531 532 533
	fc->khctr = 0;
	fc->polled_files = RB_ROOT;
	fc->reqctr = 0;
	fc->blocked = 1;
	fc->attr_version = 1;
	get_random_bytes(&fc->scramble_key, sizeof(fc->scramble_key));
M
Miklos Szeredi 已提交
534
}
535
EXPORT_SYMBOL_GPL(fuse_conn_init);
M
Miklos Szeredi 已提交
536

537 538
void fuse_conn_put(struct fuse_conn *fc)
{
539
	if (atomic_dec_and_test(&fc->count)) {
540 541
		if (fc->destroy_req)
			fuse_request_free(fc->destroy_req);
542
		mutex_destroy(&fc->inst_mutex);
T
Tejun Heo 已提交
543
		fc->release(fc);
544
	}
545
}
546
EXPORT_SYMBOL_GPL(fuse_conn_put);
547 548 549 550 551 552

struct fuse_conn *fuse_conn_get(struct fuse_conn *fc)
{
	atomic_inc(&fc->count);
	return fc;
}
553
EXPORT_SYMBOL_GPL(fuse_conn_get);
554

555
static struct inode *fuse_get_root_inode(struct super_block *sb, unsigned mode)
M
Miklos Szeredi 已提交
556 557 558 559 560 561
{
	struct fuse_attr attr;
	memset(&attr, 0, sizeof(attr));

	attr.mode = mode;
	attr.ino = FUSE_ROOT_ID;
562
	attr.nlink = 1;
563
	return fuse_iget(sb, 1, 0, &attr, 0, 0);
M
Miklos Szeredi 已提交
564 565
}

M
Miklos Szeredi 已提交
566
struct fuse_inode_handle {
M
Miklos Szeredi 已提交
567 568 569 570 571 572 573
	u64 nodeid;
	u32 generation;
};

static struct dentry *fuse_get_dentry(struct super_block *sb,
				      struct fuse_inode_handle *handle)
{
574
	struct fuse_conn *fc = get_fuse_conn_super(sb);
M
Miklos Szeredi 已提交
575 576 577 578 579 580 581 582
	struct inode *inode;
	struct dentry *entry;
	int err = -ESTALE;

	if (handle->nodeid == 0)
		goto out_err;

	inode = ilookup5(sb, handle->nodeid, fuse_inode_eq, &handle->nodeid);
583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603
	if (!inode) {
		struct fuse_entry_out outarg;
		struct qstr name;

		if (!fc->export_support)
			goto out_err;

		name.len = 1;
		name.name = ".";
		err = fuse_lookup_name(sb, handle->nodeid, &name, &outarg,
				       &inode);
		if (err && err != -ENOENT)
			goto out_err;
		if (err || !inode) {
			err = -ESTALE;
			goto out_err;
		}
		err = -EIO;
		if (get_node_id(inode) != handle->nodeid)
			goto out_iput;
	}
M
Miklos Szeredi 已提交
604 605 606 607
	err = -ESTALE;
	if (inode->i_generation != handle->generation)
		goto out_iput;

608 609
	entry = d_obtain_alias(inode);
	if (!IS_ERR(entry) && get_node_id(inode) != FUSE_ROOT_ID) {
M
Miklos Szeredi 已提交
610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686
		entry->d_op = &fuse_dentry_operations;
		fuse_invalidate_entry_cache(entry);
	}

	return entry;

 out_iput:
	iput(inode);
 out_err:
	return ERR_PTR(err);
}

static int fuse_encode_fh(struct dentry *dentry, u32 *fh, int *max_len,
			   int connectable)
{
	struct inode *inode = dentry->d_inode;
	bool encode_parent = connectable && !S_ISDIR(inode->i_mode);
	int len = encode_parent ? 6 : 3;
	u64 nodeid;
	u32 generation;

	if (*max_len < len)
		return  255;

	nodeid = get_fuse_inode(inode)->nodeid;
	generation = inode->i_generation;

	fh[0] = (u32)(nodeid >> 32);
	fh[1] = (u32)(nodeid & 0xffffffff);
	fh[2] = generation;

	if (encode_parent) {
		struct inode *parent;

		spin_lock(&dentry->d_lock);
		parent = dentry->d_parent->d_inode;
		nodeid = get_fuse_inode(parent)->nodeid;
		generation = parent->i_generation;
		spin_unlock(&dentry->d_lock);

		fh[3] = (u32)(nodeid >> 32);
		fh[4] = (u32)(nodeid & 0xffffffff);
		fh[5] = generation;
	}

	*max_len = len;
	return encode_parent ? 0x82 : 0x81;
}

static struct dentry *fuse_fh_to_dentry(struct super_block *sb,
		struct fid *fid, int fh_len, int fh_type)
{
	struct fuse_inode_handle handle;

	if ((fh_type != 0x81 && fh_type != 0x82) || fh_len < 3)
		return NULL;

	handle.nodeid = (u64) fid->raw[0] << 32;
	handle.nodeid |= (u64) fid->raw[1];
	handle.generation = fid->raw[2];
	return fuse_get_dentry(sb, &handle);
}

static struct dentry *fuse_fh_to_parent(struct super_block *sb,
		struct fid *fid, int fh_len, int fh_type)
{
	struct fuse_inode_handle parent;

	if (fh_type != 0x82 || fh_len < 6)
		return NULL;

	parent.nodeid = (u64) fid->raw[3] << 32;
	parent.nodeid |= (u64) fid->raw[4];
	parent.generation = fid->raw[5];
	return fuse_get_dentry(sb, &parent);
}

687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703
static struct dentry *fuse_get_parent(struct dentry *child)
{
	struct inode *child_inode = child->d_inode;
	struct fuse_conn *fc = get_fuse_conn(child_inode);
	struct inode *inode;
	struct dentry *parent;
	struct fuse_entry_out outarg;
	struct qstr name;
	int err;

	if (!fc->export_support)
		return ERR_PTR(-ESTALE);

	name.len = 2;
	name.name = "..";
	err = fuse_lookup_name(child_inode->i_sb, get_node_id(child_inode),
			       &name, &outarg, &inode);
704 705 706
	if (err) {
		if (err == -ENOENT)
			return ERR_PTR(-ESTALE);
707 708
		return ERR_PTR(err);
	}
709 710 711

	parent = d_obtain_alias(inode);
	if (!IS_ERR(parent) && get_node_id(inode) != FUSE_ROOT_ID) {
712 713 714 715 716 717
		parent->d_op = &fuse_dentry_operations;
		fuse_invalidate_entry_cache(parent);
	}

	return parent;
}
M
Miklos Szeredi 已提交
718 719 720 721 722

static const struct export_operations fuse_export_operations = {
	.fh_to_dentry	= fuse_fh_to_dentry,
	.fh_to_parent	= fuse_fh_to_parent,
	.encode_fh	= fuse_encode_fh,
723
	.get_parent	= fuse_get_parent,
M
Miklos Szeredi 已提交
724 725
};

726
static const struct super_operations fuse_super_operations = {
M
Miklos Szeredi 已提交
727 728 729
	.alloc_inode    = fuse_alloc_inode,
	.destroy_inode  = fuse_destroy_inode,
	.clear_inode	= fuse_clear_inode,
M
Miklos Szeredi 已提交
730
	.drop_inode	= generic_delete_inode,
731
	.remount_fs	= fuse_remount_fs,
M
Miklos Szeredi 已提交
732
	.put_super	= fuse_put_super,
733
	.umount_begin	= fuse_umount_begin,
734
	.statfs		= fuse_statfs,
M
Miklos Szeredi 已提交
735 736 737
	.show_options	= fuse_show_options,
};

738 739 740 741 742 743 744
static void process_init_reply(struct fuse_conn *fc, struct fuse_req *req)
{
	struct fuse_init_out *arg = &req->misc.init_out;

	if (req->out.h.error || arg->major != FUSE_KERNEL_VERSION)
		fc->conn_error = 1;
	else {
745 746
		unsigned long ra_pages;

747 748 749 750 751 752
		if (arg->minor >= 13) {
			if (arg->max_background)
				fc->max_background = arg->max_background;
			if (arg->congestion_threshold)
				fc->congestion_threshold = arg->congestion_threshold;
		}
753 754 755 756
		if (arg->minor >= 6) {
			ra_pages = arg->max_readahead / PAGE_CACHE_SIZE;
			if (arg->flags & FUSE_ASYNC_READ)
				fc->async_read = 1;
757 758
			if (!(arg->flags & FUSE_POSIX_LOCKS))
				fc->no_lock = 1;
759 760
			if (arg->flags & FUSE_ATOMIC_O_TRUNC)
				fc->atomic_o_trunc = 1;
761 762 763 764 765
			if (arg->minor >= 9) {
				/* LOOKUP has dependency on proto version */
				if (arg->flags & FUSE_EXPORT_SUPPORT)
					fc->export_support = 1;
			}
766 767
			if (arg->flags & FUSE_BIG_WRITES)
				fc->big_writes = 1;
768 769
			if (arg->flags & FUSE_DONT_MASK)
				fc->dont_mask = 1;
770
		} else {
771
			ra_pages = fc->max_read / PAGE_CACHE_SIZE;
772 773
			fc->no_lock = 1;
		}
774 775

		fc->bdi.ra_pages = min(fc->bdi.ra_pages, ra_pages);
776 777
		fc->minor = arg->minor;
		fc->max_write = arg->minor < 5 ? 4096 : arg->max_write;
778
		fc->max_write = max_t(unsigned, 4096, fc->max_write);
779
		fc->conn_init = 1;
780
	}
781 782
	fc->blocked = 0;
	wake_up_all(&fc->blocked_waitq);
783 784
}

785
static void fuse_send_init(struct fuse_conn *fc, struct fuse_req *req)
786 787
{
	struct fuse_init_in *arg = &req->misc.init_in;
M
Miklos Szeredi 已提交
788

789 790
	arg->major = FUSE_KERNEL_VERSION;
	arg->minor = FUSE_KERNEL_MINOR_VERSION;
791
	arg->max_readahead = fc->bdi.ra_pages * PAGE_CACHE_SIZE;
792
	arg->flags |= FUSE_ASYNC_READ | FUSE_POSIX_LOCKS | FUSE_ATOMIC_O_TRUNC |
793
		FUSE_EXPORT_SUPPORT | FUSE_BIG_WRITES | FUSE_DONT_MASK;
794 795 796 797 798 799 800 801 802 803 804 805
	req->in.h.opcode = FUSE_INIT;
	req->in.numargs = 1;
	req->in.args[0].size = sizeof(*arg);
	req->in.args[0].value = arg;
	req->out.numargs = 1;
	/* Variable length arguement used for backward compatibility
	   with interface version < 7.5.  Rest of init_out is zeroed
	   by do_get_request(), so a short reply is not a problem */
	req->out.argvar = 1;
	req->out.args[0].size = sizeof(struct fuse_init_out);
	req->out.args[0].value = &req->misc.init_out;
	req->end = process_init_reply;
806
	fuse_request_send_background(fc, req);
807 808
}

T
Tejun Heo 已提交
809 810 811 812 813
static void fuse_free_conn(struct fuse_conn *fc)
{
	kfree(fc);
}

814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855
static int fuse_bdi_init(struct fuse_conn *fc, struct super_block *sb)
{
	int err;

	fc->bdi.ra_pages = (VM_MAX_READAHEAD * 1024) / PAGE_CACHE_SIZE;
	fc->bdi.unplug_io_fn = default_unplug_io_fn;
	/* fuse does it's own writeback accounting */
	fc->bdi.capabilities = BDI_CAP_NO_ACCT_WB;

	err = bdi_init(&fc->bdi);
	if (err)
		return err;

	fc->bdi_initialized = 1;

	if (sb->s_bdev) {
		err =  bdi_register(&fc->bdi, NULL, "%u:%u-fuseblk",
				    MAJOR(fc->dev), MINOR(fc->dev));
	} else {
		err = bdi_register_dev(&fc->bdi, fc->dev);
	}

	if (err)
		return err;

	/*
	 * For a single fuse filesystem use max 1% of dirty +
	 * writeback threshold.
	 *
	 * This gives about 1M of write buffer for memory maps on a
	 * machine with 1G and 10% dirty_ratio, which should be more
	 * than enough.
	 *
	 * Privileged users can raise it by writing to
	 *
	 *    /sys/class/bdi/<bdi>/max_ratio
	 */
	bdi_set_max_ratio(&fc->bdi, 1);

	return 0;
}

M
Miklos Szeredi 已提交
856 857 858 859 860 861
static int fuse_fill_super(struct super_block *sb, void *data, int silent)
{
	struct fuse_conn *fc;
	struct inode *root;
	struct fuse_mount_data d;
	struct file *file;
862
	struct dentry *root_dentry;
863
	struct fuse_req *init_req;
M
Miklos Szeredi 已提交
864
	int err;
M
Miklos Szeredi 已提交
865
	int is_bdev = sb->s_bdev != NULL;
M
Miklos Szeredi 已提交
866

867
	err = -EINVAL;
868
	if (sb->s_flags & MS_MANDLOCK)
869
		goto err;
870

M
Miklos Szeredi 已提交
871
	if (!parse_fuse_opt((char *) data, &d, is_bdev))
872
		goto err;
M
Miklos Szeredi 已提交
873

M
Miklos Szeredi 已提交
874
	if (is_bdev) {
875
#ifdef CONFIG_BLOCK
876
		err = -EINVAL;
M
Miklos Szeredi 已提交
877
		if (!sb_set_blocksize(sb, d.blksize))
878
			goto err;
879
#endif
M
Miklos Szeredi 已提交
880 881 882 883
	} else {
		sb->s_blocksize = PAGE_CACHE_SIZE;
		sb->s_blocksize_bits = PAGE_CACHE_SHIFT;
	}
M
Miklos Szeredi 已提交
884 885 886
	sb->s_magic = FUSE_SUPER_MAGIC;
	sb->s_op = &fuse_super_operations;
	sb->s_maxbytes = MAX_LFS_FILESIZE;
M
Miklos Szeredi 已提交
887
	sb->s_export_op = &fuse_export_operations;
M
Miklos Szeredi 已提交
888 889

	file = fget(d.fd);
890
	err = -EINVAL;
M
Miklos Szeredi 已提交
891
	if (!file)
892
		goto err;
M
Miklos Szeredi 已提交
893

894 895
	if (file->f_op != &fuse_dev_operations)
		goto err_fput;
M
Miklos Szeredi 已提交
896

897
	fc = kmalloc(sizeof(*fc), GFP_KERNEL);
898 899 900
	err = -ENOMEM;
	if (!fc)
		goto err_fput;
M
Miklos Szeredi 已提交
901

902 903 904
	fuse_conn_init(fc);

	fc->dev = sb->s_dev;
J
John Muir 已提交
905
	fc->sb = sb;
906 907 908
	err = fuse_bdi_init(fc, sb);
	if (err)
		goto err_put_conn;
909

910 911 912 913 914
	/* Handle umasking inside the fuse code */
	if (sb->s_flags & MS_POSIXACL)
		fc->dont_mask = 1;
	sb->s_flags |= MS_POSIXACL;

T
Tejun Heo 已提交
915
	fc->release = fuse_free_conn;
M
Miklos Szeredi 已提交
916
	fc->flags = d.flags;
M
Miklos Szeredi 已提交
917
	fc->user_id = d.user_id;
918
	fc->group_id = d.group_id;
919
	fc->max_read = max_t(unsigned, 4096, d.max_read);
M
Miklos Szeredi 已提交
920

921 922 923
	/* Used by get_root_inode() */
	sb->s_fs_info = fc;

M
Miklos Szeredi 已提交
924
	err = -ENOMEM;
925
	root = fuse_get_root_inode(sb, d.rootmode);
926
	if (!root)
927
		goto err_put_conn;
M
Miklos Szeredi 已提交
928

929 930
	root_dentry = d_alloc_root(root);
	if (!root_dentry) {
M
Miklos Szeredi 已提交
931
		iput(root);
932
		goto err_put_conn;
M
Miklos Szeredi 已提交
933
	}
934

935 936 937 938
	init_req = fuse_request_alloc();
	if (!init_req)
		goto err_put_root;

939 940 941
	if (is_bdev) {
		fc->destroy_req = fuse_request_alloc();
		if (!fc->destroy_req)
J
Julia Lawall 已提交
942
			goto err_free_init_req;
943 944
	}

945
	mutex_lock(&fuse_mutex);
946 947
	err = -EINVAL;
	if (file->private_data)
948
		goto err_unlock;
949

950 951 952 953 954
	err = fuse_ctl_add_conn(fc);
	if (err)
		goto err_unlock;

	list_add_tail(&fc->entry, &fuse_conn_list);
955 956
	sb->s_root = root_dentry;
	fc->connected = 1;
957 958
	file->private_data = fuse_conn_get(fc);
	mutex_unlock(&fuse_mutex);
M
Miklos Szeredi 已提交
959 960 961 962 963 964
	/*
	 * atomic_dec_and_test() in fput() provides the necessary
	 * memory barrier for file->private_data to be visible on all
	 * CPUs after this
	 */
	fput(file);
965

966
	fuse_send_init(fc, init_req);
967

M
Miklos Szeredi 已提交
968 969
	return 0;

970 971
 err_unlock:
	mutex_unlock(&fuse_mutex);
J
Julia Lawall 已提交
972
 err_free_init_req:
973
	fuse_request_free(init_req);
974 975
 err_put_root:
	dput(root_dentry);
976
 err_put_conn:
977
	fuse_bdi_destroy(fc);
978
	fuse_conn_put(fc);
979 980 981
 err_fput:
	fput(file);
 err:
M
Miklos Szeredi 已提交
982 983 984
	return err;
}

985 986 987
static int fuse_get_sb(struct file_system_type *fs_type,
		       int flags, const char *dev_name,
		       void *raw_data, struct vfsmount *mnt)
M
Miklos Szeredi 已提交
988
{
989
	return get_sb_nodev(fs_type, flags, raw_data, fuse_fill_super, mnt);
M
Miklos Szeredi 已提交
990 991
}

J
John Muir 已提交
992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004
static void fuse_kill_sb_anon(struct super_block *sb)
{
	struct fuse_conn *fc = get_fuse_conn_super(sb);

	if (fc) {
		down_write(&fc->killsb);
		fc->sb = NULL;
		up_write(&fc->killsb);
	}

	kill_anon_super(sb);
}

1005 1006 1007
static struct file_system_type fuse_fs_type = {
	.owner		= THIS_MODULE,
	.name		= "fuse",
M
Miklos Szeredi 已提交
1008
	.fs_flags	= FS_HAS_SUBTYPE,
1009
	.get_sb		= fuse_get_sb,
J
John Muir 已提交
1010
	.kill_sb	= fuse_kill_sb_anon,
1011 1012 1013
};

#ifdef CONFIG_BLOCK
1014 1015 1016 1017 1018 1019 1020 1021
static int fuse_get_sb_blk(struct file_system_type *fs_type,
			   int flags, const char *dev_name,
			   void *raw_data, struct vfsmount *mnt)
{
	return get_sb_bdev(fs_type, flags, dev_name, raw_data, fuse_fill_super,
			   mnt);
}

J
John Muir 已提交
1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034
static void fuse_kill_sb_blk(struct super_block *sb)
{
	struct fuse_conn *fc = get_fuse_conn_super(sb);

	if (fc) {
		down_write(&fc->killsb);
		fc->sb = NULL;
		up_write(&fc->killsb);
	}

	kill_block_super(sb);
}

1035 1036 1037 1038
static struct file_system_type fuseblk_fs_type = {
	.owner		= THIS_MODULE,
	.name		= "fuseblk",
	.get_sb		= fuse_get_sb_blk,
J
John Muir 已提交
1039
	.kill_sb	= fuse_kill_sb_blk,
A
Alexey Dobriyan 已提交
1040
	.fs_flags	= FS_REQUIRES_DEV | FS_HAS_SUBTYPE,
1041 1042
};

1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062
static inline int register_fuseblk(void)
{
	return register_filesystem(&fuseblk_fs_type);
}

static inline void unregister_fuseblk(void)
{
	unregister_filesystem(&fuseblk_fs_type);
}
#else
static inline int register_fuseblk(void)
{
	return 0;
}

static inline void unregister_fuseblk(void)
{
}
#endif

1063
static void fuse_inode_init_once(void *foo)
M
Miklos Szeredi 已提交
1064
{
M
Miklos Szeredi 已提交
1065
	struct inode *inode = foo;
M
Miklos Szeredi 已提交
1066

C
Christoph Lameter 已提交
1067
	inode_init_once(inode);
M
Miklos Szeredi 已提交
1068 1069 1070 1071 1072 1073 1074 1075
}

static int __init fuse_fs_init(void)
{
	int err;

	err = register_filesystem(&fuse_fs_type);
	if (err)
1076 1077
		goto out;

1078
	err = register_fuseblk();
1079 1080 1081 1082 1083 1084
	if (err)
		goto out_unreg;

	fuse_inode_cachep = kmem_cache_create("fuse_inode",
					      sizeof(struct fuse_inode),
					      0, SLAB_HWCACHE_ALIGN,
1085
					      fuse_inode_init_once);
1086 1087 1088 1089 1090
	err = -ENOMEM;
	if (!fuse_inode_cachep)
		goto out_unreg2;

	return 0;
M
Miklos Szeredi 已提交
1091

1092
 out_unreg2:
1093
	unregister_fuseblk();
1094 1095 1096
 out_unreg:
	unregister_filesystem(&fuse_fs_type);
 out:
M
Miklos Szeredi 已提交
1097 1098 1099 1100 1101 1102
	return err;
}

static void fuse_fs_cleanup(void)
{
	unregister_filesystem(&fuse_fs_type);
1103
	unregister_fuseblk();
M
Miklos Szeredi 已提交
1104 1105 1106
	kmem_cache_destroy(fuse_inode_cachep);
}

1107 1108 1109
static struct kobject *fuse_kobj;
static struct kobject *connections_kobj;

1110 1111 1112 1113
static int fuse_sysfs_init(void)
{
	int err;

1114
	fuse_kobj = kobject_create_and_add("fuse", fs_kobj);
1115 1116
	if (!fuse_kobj) {
		err = -ENOMEM;
1117
		goto out_err;
1118
	}
1119

1120 1121 1122
	connections_kobj = kobject_create_and_add("connections", fuse_kobj);
	if (!connections_kobj) {
		err = -ENOMEM;
1123
		goto out_fuse_unregister;
1124
	}
1125 1126 1127 1128

	return 0;

 out_fuse_unregister:
1129
	kobject_put(fuse_kobj);
1130 1131 1132 1133 1134 1135
 out_err:
	return err;
}

static void fuse_sysfs_cleanup(void)
{
1136 1137
	kobject_put(connections_kobj);
	kobject_put(fuse_kobj);
1138 1139
}

M
Miklos Szeredi 已提交
1140 1141 1142 1143
static int __init fuse_init(void)
{
	int res;

M
Miklos Szeredi 已提交
1144
	printk(KERN_INFO "fuse init (API version %i.%i)\n",
M
Miklos Szeredi 已提交
1145 1146
	       FUSE_KERNEL_VERSION, FUSE_KERNEL_MINOR_VERSION);

1147
	INIT_LIST_HEAD(&fuse_conn_list);
M
Miklos Szeredi 已提交
1148 1149 1150 1151
	res = fuse_fs_init();
	if (res)
		goto err;

M
Miklos Szeredi 已提交
1152 1153 1154 1155
	res = fuse_dev_init();
	if (res)
		goto err_fs_cleanup;

1156 1157 1158 1159
	res = fuse_sysfs_init();
	if (res)
		goto err_dev_cleanup;

1160 1161 1162 1163
	res = fuse_ctl_init();
	if (res)
		goto err_sysfs_cleanup;

M
Miklos Szeredi 已提交
1164 1165
	return 0;

1166 1167
 err_sysfs_cleanup:
	fuse_sysfs_cleanup();
1168 1169
 err_dev_cleanup:
	fuse_dev_cleanup();
M
Miklos Szeredi 已提交
1170 1171
 err_fs_cleanup:
	fuse_fs_cleanup();
M
Miklos Szeredi 已提交
1172 1173 1174 1175 1176 1177 1178 1179
 err:
	return res;
}

static void __exit fuse_exit(void)
{
	printk(KERN_DEBUG "fuse exit\n");

1180
	fuse_ctl_cleanup();
1181
	fuse_sysfs_cleanup();
M
Miklos Szeredi 已提交
1182
	fuse_fs_cleanup();
M
Miklos Szeredi 已提交
1183
	fuse_dev_cleanup();
M
Miklos Szeredi 已提交
1184 1185 1186 1187
}

module_init(fuse_init);
module_exit(fuse_exit);