dir.c 42.7 KB
Newer Older
1 2
/*
  FUSE: Filesystem in Userspace
M
Miklos Szeredi 已提交
3
  Copyright (C) 2001-2008  Miklos Szeredi <miklos@szeredi.hu>
4 5 6 7 8 9 10 11 12 13 14

  This program can be distributed under the terms of the GNU GPL.
  See the file COPYING.
*/

#include "fuse_i.h"

#include <linux/pagemap.h>
#include <linux/file.h>
#include <linux/sched.h>
#include <linux/namei.h>
15
#include <linux/slab.h>
S
Seth Forshee 已提交
16
#include <linux/xattr.h>
17
#include <linux/iversion.h>
S
Seth Forshee 已提交
18
#include <linux/posix_acl.h>
19

20 21 22 23 24 25 26
static void fuse_advise_use_readdirplus(struct inode *dir)
{
	struct fuse_inode *fi = get_fuse_inode(dir);

	set_bit(FUSE_I_ADVISE_RDPLUS, &fi->state);
}

M
Miklos Szeredi 已提交
27 28 29 30 31
union fuse_dentry {
	u64 time;
	struct rcu_head rcu;
};

32
static void fuse_dentry_settime(struct dentry *dentry, u64 time)
M
Miklos Szeredi 已提交
33
{
34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50
	struct fuse_conn *fc = get_fuse_conn_super(dentry->d_sb);
	bool delete = !time && fc->delete_stale;
	/*
	 * Mess with DCACHE_OP_DELETE because dput() will be faster without it.
	 * Don't care about races, either way it's just an optimization
	 */
	if ((!delete && (dentry->d_flags & DCACHE_OP_DELETE)) ||
	    (delete && !(dentry->d_flags & DCACHE_OP_DELETE))) {
		spin_lock(&dentry->d_lock);
		if (!delete)
			dentry->d_flags &= ~DCACHE_OP_DELETE;
		else
			dentry->d_flags |= DCACHE_OP_DELETE;
		spin_unlock(&dentry->d_lock);
	}

	((union fuse_dentry *) dentry->d_fsdata)->time = time;
M
Miklos Szeredi 已提交
51 52
}

53
static inline u64 fuse_dentry_time(const struct dentry *entry)
M
Miklos Szeredi 已提交
54
{
M
Miklos Szeredi 已提交
55
	return ((union fuse_dentry *) entry->d_fsdata)->time;
M
Miklos Szeredi 已提交
56 57
}

58 59 60
/*
 * FUSE caches dentries and attributes with separate timeout.  The
 * time in jiffies until the dentry/attributes are valid is stored in
M
Miklos Szeredi 已提交
61
 * dentry->d_fsdata and fuse_inode->i_time respectively.
62 63 64 65 66
 */

/*
 * Calculate the time in jiffies until a dentry/attributes are valid
 */
M
Miklos Szeredi 已提交
67
static u64 time_to_jiffies(u64 sec, u32 nsec)
68
{
M
Miklos Szeredi 已提交
69
	if (sec || nsec) {
M
Miklos Szeredi 已提交
70 71
		struct timespec64 ts = {
			sec,
72
			min_t(u32, nsec, NSEC_PER_SEC - 1)
M
Miklos Szeredi 已提交
73 74 75
		};

		return get_jiffies_64() + timespec64_to_jiffies(&ts);
M
Miklos Szeredi 已提交
76
	} else
M
Miklos Szeredi 已提交
77
		return 0;
78 79
}

80 81 82 83
/*
 * Set dentry and possibly attribute timeouts from the lookup/mk*
 * replies
 */
M
Miklos Szeredi 已提交
84
void fuse_change_entry_timeout(struct dentry *entry, struct fuse_entry_out *o)
85
{
M
Miklos Szeredi 已提交
86 87
	fuse_dentry_settime(entry,
		time_to_jiffies(o->entry_valid, o->entry_valid_nsec));
88 89 90 91 92 93 94
}

static u64 attr_timeout(struct fuse_attr_out *o)
{
	return time_to_jiffies(o->attr_valid, o->attr_valid_nsec);
}

M
Miklos Szeredi 已提交
95
u64 entry_attr_timeout(struct fuse_entry_out *o)
96 97
{
	return time_to_jiffies(o->attr_valid, o->attr_valid_nsec);
98 99
}

100 101 102 103 104
static void fuse_invalidate_attr_mask(struct inode *inode, u32 mask)
{
	set_mask_bits(&get_fuse_inode(inode)->inval_mask, 0, mask);
}

105 106 107 108
/*
 * Mark the attributes as stale, so that at the next call to
 * ->getattr() they will be fetched from userspace
 */
109 110
void fuse_invalidate_attr(struct inode *inode)
{
111
	fuse_invalidate_attr_mask(inode, STATX_BASIC_STATS);
112 113
}

114 115 116 117 118 119
static void fuse_dir_changed(struct inode *dir)
{
	fuse_invalidate_attr(dir);
	inode_maybe_inc_iversion(dir, false);
}

120 121 122 123 124 125 126
/**
 * Mark the attributes as stale due to an atime change.  Avoid the invalidate if
 * atime is not used.
 */
void fuse_invalidate_atime(struct inode *inode)
{
	if (!IS_RDONLY(inode))
127
		fuse_invalidate_attr_mask(inode, STATX_ATIME);
128 129
}

130 131 132 133 134 135 136 137
/*
 * Just mark the entry as stale, so that a next attempt to look it up
 * will result in a new lookup call to userspace
 *
 * This is called when a dentry is about to become negative and the
 * timeout is unknown (unlink, rmdir, rename and in some cases
 * lookup)
 */
M
Miklos Szeredi 已提交
138
void fuse_invalidate_entry_cache(struct dentry *entry)
139
{
M
Miklos Szeredi 已提交
140
	fuse_dentry_settime(entry, 0);
141 142
}

143 144 145 146
/*
 * Same as fuse_invalidate_entry_cache(), but also try to remove the
 * dentry from the hash
 */
147 148 149 150
static void fuse_invalidate_entry(struct dentry *entry)
{
	d_invalidate(entry);
	fuse_invalidate_entry_cache(entry);
151 152
}

153
static void fuse_lookup_init(struct fuse_conn *fc, struct fuse_args *args,
A
Al Viro 已提交
154
			     u64 nodeid, const struct qstr *name,
155 156
			     struct fuse_entry_out *outarg)
{
157
	memset(outarg, 0, sizeof(struct fuse_entry_out));
158 159 160 161 162 163 164 165
	args->opcode = FUSE_LOOKUP;
	args->nodeid = nodeid;
	args->in_numargs = 1;
	args->in_args[0].size = name->len + 1;
	args->in_args[0].value = name->name;
	args->out_numargs = 1;
	args->out_args[0].size = sizeof(struct fuse_entry_out);
	args->out_args[0].value = outarg;
166 167
}

168 169 170 171 172 173 174 175 176
/*
 * Check whether the dentry is still valid
 *
 * If the entry validity timeout has expired and the dentry is
 * positive, try to redo the lookup.  If the lookup results in a
 * different inode, then let the VFS invalidate the dentry and redo
 * the lookup once more.  If the lookup results in the same inode,
 * then refresh the attributes, timeouts and mark the dentry valid.
 */
177
static int fuse_dentry_revalidate(struct dentry *entry, unsigned int flags)
178
{
179
	struct inode *inode;
180 181
	struct dentry *parent;
	struct fuse_conn *fc;
M
Miklos Szeredi 已提交
182
	struct fuse_inode *fi;
183
	int ret;
184

185
	inode = d_inode_rcu(entry);
186
	if (inode && is_bad_inode(inode))
187
		goto invalid;
188 189
	else if (time_before64(fuse_dentry_time(entry), get_jiffies_64()) ||
		 (flags & LOOKUP_REVAL)) {
190
		struct fuse_entry_out outarg;
191
		FUSE_ARGS(args);
192
		struct fuse_forget_link *forget;
193
		u64 attr_version;
194

195
		/* For negative dentries, always do a fresh lookup */
196
		if (!inode)
197
			goto invalid;
198

199
		ret = -ECHILD;
200
		if (flags & LOOKUP_RCU)
201
			goto out;
202

203
		fc = get_fuse_conn(inode);
204

205
		forget = fuse_alloc_forget();
206 207
		ret = -ENOMEM;
		if (!forget)
208
			goto out;
M
Miklos Szeredi 已提交
209

210
		attr_version = fuse_get_attr_version(fc);
211

212
		parent = dget_parent(entry);
213
		fuse_lookup_init(fc, &args, get_node_id(d_inode(parent)),
214
				 &entry->d_name, &outarg);
215
		ret = fuse_simple_request(fc, &args);
216
		dput(parent);
217
		/* Zero nodeid is same as -ENOENT */
218 219 220
		if (!ret && !outarg.nodeid)
			ret = -ENOENT;
		if (!ret) {
M
Miklos Szeredi 已提交
221
			fi = get_fuse_inode(inode);
222
			if (outarg.nodeid != get_node_id(inode)) {
223
				fuse_queue_forget(fc, forget, outarg.nodeid, 1);
224
				goto invalid;
225
			}
226
			spin_lock(&fi->lock);
M
Miklos Szeredi 已提交
227
			fi->nlookup++;
228
			spin_unlock(&fi->lock);
229
		}
230
		kfree(forget);
231 232 233
		if (ret == -ENOMEM)
			goto out;
		if (ret || (outarg.attr.mode ^ inode->i_mode) & S_IFMT)
234
			goto invalid;
235

S
Seth Forshee 已提交
236
		forget_all_cached_acls(inode);
237 238 239 240
		fuse_change_attributes(inode, &outarg.attr,
				       entry_attr_timeout(&outarg),
				       attr_version);
		fuse_change_entry_timeout(entry, &outarg);
241
	} else if (inode) {
M
Miklos Szeredi 已提交
242 243 244 245 246
		fi = get_fuse_inode(inode);
		if (flags & LOOKUP_RCU) {
			if (test_bit(FUSE_I_INIT_RDPLUS, &fi->state))
				return -ECHILD;
		} else if (test_and_clear_bit(FUSE_I_INIT_RDPLUS, &fi->state)) {
247
			parent = dget_parent(entry);
248
			fuse_advise_use_readdirplus(d_inode(parent));
249 250
			dput(parent);
		}
251
	}
252 253 254 255 256 257 258
	ret = 1;
out:
	return ret;

invalid:
	ret = 0;
	goto out;
259 260
}

M
Miklos Szeredi 已提交
261 262 263 264 265 266 267 268 269 270 271 272 273
static int fuse_dentry_init(struct dentry *dentry)
{
	dentry->d_fsdata = kzalloc(sizeof(union fuse_dentry), GFP_KERNEL);

	return dentry->d_fsdata ? 0 : -ENOMEM;
}
static void fuse_dentry_release(struct dentry *dentry)
{
	union fuse_dentry *fd = dentry->d_fsdata;

	kfree_rcu(fd, rcu);
}

274 275 276 277 278
static int fuse_dentry_delete(const struct dentry *dentry)
{
	return time_before64(fuse_dentry_time(dentry), get_jiffies_64());
}

A
Al Viro 已提交
279
const struct dentry_operations fuse_dentry_operations = {
280
	.d_revalidate	= fuse_dentry_revalidate,
281
	.d_delete	= fuse_dentry_delete,
M
Miklos Szeredi 已提交
282 283
	.d_init		= fuse_dentry_init,
	.d_release	= fuse_dentry_release,
284 285
};

286 287 288 289 290
const struct dentry_operations fuse_root_dentry_operations = {
	.d_init		= fuse_dentry_init,
	.d_release	= fuse_dentry_release,
};

291
int fuse_valid_type(int m)
292 293 294 295 296
{
	return S_ISREG(m) || S_ISDIR(m) || S_ISLNK(m) || S_ISCHR(m) ||
		S_ISBLK(m) || S_ISFIFO(m) || S_ISSOCK(m);
}

A
Al Viro 已提交
297
int fuse_lookup_name(struct super_block *sb, u64 nodeid, const struct qstr *name,
298
		     struct fuse_entry_out *outarg, struct inode **inode)
299
{
300
	struct fuse_conn *fc = get_fuse_conn_super(sb);
301
	FUSE_ARGS(args);
302
	struct fuse_forget_link *forget;
303
	u64 attr_version;
304
	int err;
305

306 307 308 309
	*inode = NULL;
	err = -ENAMETOOLONG;
	if (name->len > FUSE_NAME_MAX)
		goto out;
310 311


312 313
	forget = fuse_alloc_forget();
	err = -ENOMEM;
314
	if (!forget)
315
		goto out;
M
Miklos Szeredi 已提交
316

317
	attr_version = fuse_get_attr_version(fc);
318

319 320
	fuse_lookup_init(fc, &args, nodeid, name, outarg);
	err = fuse_simple_request(fc, &args);
321
	/* Zero nodeid is same as -ENOENT, but with valid timeout */
322 323 324 325 326 327 328 329 330 331 332 333 334 335
	if (err || !outarg->nodeid)
		goto out_put_forget;

	err = -EIO;
	if (!outarg->nodeid)
		goto out_put_forget;
	if (!fuse_valid_type(outarg->attr.mode))
		goto out_put_forget;

	*inode = fuse_iget(sb, outarg->nodeid, outarg->generation,
			   &outarg->attr, entry_attr_timeout(outarg),
			   attr_version);
	err = -ENOMEM;
	if (!*inode) {
336
		fuse_queue_forget(fc, forget, outarg->nodeid, 1);
337
		goto out;
338
	}
339 340 341
	err = 0;

 out_put_forget:
342
	kfree(forget);
343 344 345 346 347
 out:
	return err;
}

static struct dentry *fuse_lookup(struct inode *dir, struct dentry *entry,
A
Al Viro 已提交
348
				  unsigned int flags)
349 350 351 352 353 354
{
	int err;
	struct fuse_entry_out outarg;
	struct inode *inode;
	struct dentry *newent;
	bool outarg_valid = true;
355
	bool locked;
356

357
	locked = fuse_lock_inode(dir);
358 359
	err = fuse_lookup_name(dir->i_sb, get_node_id(dir), &entry->d_name,
			       &outarg, &inode);
360
	fuse_unlock_inode(dir, locked);
361 362 363 364 365 366 367 368 369 370
	if (err == -ENOENT) {
		outarg_valid = false;
		err = 0;
	}
	if (err)
		goto out_err;

	err = -EIO;
	if (inode && get_node_id(inode) == FUSE_ROOT_ID)
		goto out_iput;
371

372
	newent = d_splice_alias(inode, entry);
373 374 375
	err = PTR_ERR(newent);
	if (IS_ERR(newent))
		goto out_err;
376

377
	entry = newent ? newent : entry;
378
	if (outarg_valid)
379
		fuse_change_entry_timeout(entry, &outarg);
380 381
	else
		fuse_invalidate_entry_cache(entry);
382

383
	fuse_advise_use_readdirplus(dir);
384
	return newent;
385 386 387 388 389

 out_iput:
	iput(inode);
 out_err:
	return ERR_PTR(err);
390 391
}

392 393 394 395 396 397
/*
 * Atomic create+open operation
 *
 * If the filesystem doesn't support this, then fall back to separate
 * 'mknod' + 'open' requests.
 */
A
Al Viro 已提交
398
static int fuse_create_open(struct inode *dir, struct dentry *entry,
A
Al Viro 已提交
399
			    struct file *file, unsigned flags,
400
			    umode_t mode)
M
Miklos Szeredi 已提交
401 402 403 404
{
	int err;
	struct inode *inode;
	struct fuse_conn *fc = get_fuse_conn(dir);
405
	FUSE_ARGS(args);
406
	struct fuse_forget_link *forget;
407
	struct fuse_create_in inarg;
M
Miklos Szeredi 已提交
408 409
	struct fuse_open_out outopen;
	struct fuse_entry_out outentry;
410
	struct fuse_inode *fi;
M
Miklos Szeredi 已提交
411 412
	struct fuse_file *ff;

413 414 415
	/* Userspace expects S_IFREG in create mode */
	BUG_ON((mode & S_IFMT) != S_IFREG);

416
	forget = fuse_alloc_forget();
417
	err = -ENOMEM;
418
	if (!forget)
419
		goto out_err;
420

421
	err = -ENOMEM;
T
Tejun Heo 已提交
422
	ff = fuse_file_alloc(fc);
M
Miklos Szeredi 已提交
423
	if (!ff)
424
		goto out_put_forget_req;
M
Miklos Szeredi 已提交
425

426 427 428
	if (!fc->dont_mask)
		mode &= ~current_umask();

M
Miklos Szeredi 已提交
429 430
	flags &= ~O_NOCTTY;
	memset(&inarg, 0, sizeof(inarg));
431
	memset(&outentry, 0, sizeof(outentry));
M
Miklos Szeredi 已提交
432 433
	inarg.flags = flags;
	inarg.mode = mode;
434
	inarg.umask = current_umask();
435 436 437 438 439 440 441 442 443 444 445 446
	args.opcode = FUSE_CREATE;
	args.nodeid = get_node_id(dir);
	args.in_numargs = 2;
	args.in_args[0].size = sizeof(inarg);
	args.in_args[0].value = &inarg;
	args.in_args[1].size = entry->d_name.len + 1;
	args.in_args[1].value = entry->d_name.name;
	args.out_numargs = 2;
	args.out_args[0].size = sizeof(outentry);
	args.out_args[0].value = &outentry;
	args.out_args[1].size = sizeof(outopen);
	args.out_args[1].value = &outopen;
447
	err = fuse_simple_request(fc, &args);
448
	if (err)
M
Miklos Szeredi 已提交
449 450 451
		goto out_free_ff;

	err = -EIO;
452
	if (!S_ISREG(outentry.attr.mode) || invalid_nodeid(outentry.nodeid))
M
Miklos Szeredi 已提交
453 454
		goto out_free_ff;

455 456 457
	ff->fh = outopen.fh;
	ff->nodeid = outentry.nodeid;
	ff->open_flags = outopen.open_flags;
M
Miklos Szeredi 已提交
458
	inode = fuse_iget(dir->i_sb, outentry.nodeid, outentry.generation,
459
			  &outentry.attr, entry_attr_timeout(&outentry), 0);
M
Miklos Szeredi 已提交
460 461
	if (!inode) {
		flags &= ~(O_CREAT | O_EXCL | O_TRUNC);
462
		fuse_sync_release(NULL, ff, flags);
463
		fuse_queue_forget(fc, forget, outentry.nodeid, 1);
464 465
		err = -ENOMEM;
		goto out_err;
M
Miklos Szeredi 已提交
466
	}
467
	kfree(forget);
M
Miklos Szeredi 已提交
468
	d_instantiate(entry, inode);
469
	fuse_change_entry_timeout(entry, &outentry);
470
	fuse_dir_changed(dir);
471
	err = finish_open(file, entry, generic_file_open);
A
Al Viro 已提交
472
	if (err) {
473 474
		fi = get_fuse_inode(inode);
		fuse_sync_release(fi, ff, flags);
475
	} else {
476
		file->private_data = ff;
477
		fuse_finish_open(inode, file);
M
Miklos Szeredi 已提交
478
	}
A
Al Viro 已提交
479
	return err;
M
Miklos Szeredi 已提交
480

481
out_free_ff:
M
Miklos Szeredi 已提交
482
	fuse_file_free(ff);
483
out_put_forget_req:
484
	kfree(forget);
485
out_err:
A
Al Viro 已提交
486
	return err;
487 488 489
}

static int fuse_mknod(struct inode *, struct dentry *, umode_t, dev_t);
A
Al Viro 已提交
490
static int fuse_atomic_open(struct inode *dir, struct dentry *entry,
A
Al Viro 已提交
491
			    struct file *file, unsigned flags,
492
			    umode_t mode)
493 494 495 496 497
{
	int err;
	struct fuse_conn *fc = get_fuse_conn(dir);
	struct dentry *res = NULL;

498
	if (d_in_lookup(entry)) {
A
Al Viro 已提交
499
		res = fuse_lookup(dir, entry, 0);
500
		if (IS_ERR(res))
A
Al Viro 已提交
501
			return PTR_ERR(res);
502 503 504 505 506

		if (res)
			entry = res;
	}

507
	if (!(flags & O_CREAT) || d_really_is_positive(entry))
508 509 510
		goto no_open;

	/* Only creates */
511
	file->f_mode |= FMODE_CREATED;
512 513 514 515

	if (fc->no_create)
		goto mknod;

516
	err = fuse_create_open(dir, entry, file, flags, mode);
A
Al Viro 已提交
517
	if (err == -ENOSYS) {
518 519 520 521 522
		fc->no_create = 1;
		goto mknod;
	}
out_dput:
	dput(res);
A
Al Viro 已提交
523
	return err;
524 525 526

mknod:
	err = fuse_mknod(dir, entry, mode, 0);
A
Al Viro 已提交
527
	if (err)
528 529
		goto out_dput;
no_open:
A
Al Viro 已提交
530
	return finish_no_open(file, res);
M
Miklos Szeredi 已提交
531 532
}

533 534 535
/*
 * Code shared between mknod, mkdir, symlink and link
 */
536
static int create_new_entry(struct fuse_conn *fc, struct fuse_args *args,
537
			    struct inode *dir, struct dentry *entry,
A
Al Viro 已提交
538
			    umode_t mode)
539 540 541
{
	struct fuse_entry_out outarg;
	struct inode *inode;
A
Al Viro 已提交
542
	struct dentry *d;
543
	int err;
544
	struct fuse_forget_link *forget;
M
Miklos Szeredi 已提交
545

546
	forget = fuse_alloc_forget();
547
	if (!forget)
548
		return -ENOMEM;
549

550
	memset(&outarg, 0, sizeof(outarg));
551 552 553 554
	args->nodeid = get_node_id(dir);
	args->out_numargs = 1;
	args->out_args[0].size = sizeof(outarg);
	args->out_args[0].value = &outarg;
555
	err = fuse_simple_request(fc, args);
M
Miklos Szeredi 已提交
556 557 558
	if (err)
		goto out_put_forget_req;

559 560
	err = -EIO;
	if (invalid_nodeid(outarg.nodeid))
M
Miklos Szeredi 已提交
561
		goto out_put_forget_req;
562 563

	if ((outarg.attr.mode ^ mode) & S_IFMT)
M
Miklos Szeredi 已提交
564
		goto out_put_forget_req;
565

566
	inode = fuse_iget(dir->i_sb, outarg.nodeid, outarg.generation,
567
			  &outarg.attr, entry_attr_timeout(&outarg), 0);
568
	if (!inode) {
569
		fuse_queue_forget(fc, forget, outarg.nodeid, 1);
570 571
		return -ENOMEM;
	}
572
	kfree(forget);
573

A
Al Viro 已提交
574 575 576 577
	d_drop(entry);
	d = d_splice_alias(inode, entry);
	if (IS_ERR(d))
		return PTR_ERR(d);
578

A
Al Viro 已提交
579 580 581 582 583 584
	if (d) {
		fuse_change_entry_timeout(d, &outarg);
		dput(d);
	} else {
		fuse_change_entry_timeout(entry, &outarg);
	}
585
	fuse_dir_changed(dir);
586
	return 0;
587

M
Miklos Szeredi 已提交
588
 out_put_forget_req:
589
	kfree(forget);
590
	return err;
591 592
}

A
Al Viro 已提交
593
static int fuse_mknod(struct inode *dir, struct dentry *entry, umode_t mode,
594 595 596 597
		      dev_t rdev)
{
	struct fuse_mknod_in inarg;
	struct fuse_conn *fc = get_fuse_conn(dir);
598
	FUSE_ARGS(args);
599

600 601 602
	if (!fc->dont_mask)
		mode &= ~current_umask();

603 604 605
	memset(&inarg, 0, sizeof(inarg));
	inarg.mode = mode;
	inarg.rdev = new_encode_dev(rdev);
606
	inarg.umask = current_umask();
607 608 609 610 611 612
	args.opcode = FUSE_MKNOD;
	args.in_numargs = 2;
	args.in_args[0].size = sizeof(inarg);
	args.in_args[0].value = &inarg;
	args.in_args[1].size = entry->d_name.len + 1;
	args.in_args[1].value = entry->d_name.name;
613
	return create_new_entry(fc, &args, dir, entry, mode);
614 615
}

A
Al Viro 已提交
616
static int fuse_create(struct inode *dir, struct dentry *entry, umode_t mode,
A
Al Viro 已提交
617
		       bool excl)
618 619 620 621
{
	return fuse_mknod(dir, entry, mode, 0);
}

622
static int fuse_mkdir(struct inode *dir, struct dentry *entry, umode_t mode)
623 624 625
{
	struct fuse_mkdir_in inarg;
	struct fuse_conn *fc = get_fuse_conn(dir);
626
	FUSE_ARGS(args);
627

628 629 630
	if (!fc->dont_mask)
		mode &= ~current_umask();

631 632
	memset(&inarg, 0, sizeof(inarg));
	inarg.mode = mode;
633
	inarg.umask = current_umask();
634 635 636 637 638 639
	args.opcode = FUSE_MKDIR;
	args.in_numargs = 2;
	args.in_args[0].size = sizeof(inarg);
	args.in_args[0].value = &inarg;
	args.in_args[1].size = entry->d_name.len + 1;
	args.in_args[1].value = entry->d_name.name;
640
	return create_new_entry(fc, &args, dir, entry, S_IFDIR);
641 642 643 644 645 646 647
}

static int fuse_symlink(struct inode *dir, struct dentry *entry,
			const char *link)
{
	struct fuse_conn *fc = get_fuse_conn(dir);
	unsigned len = strlen(link) + 1;
648
	FUSE_ARGS(args);
649

650 651 652 653 654 655
	args.opcode = FUSE_SYMLINK;
	args.in_numargs = 2;
	args.in_args[0].size = entry->d_name.len + 1;
	args.in_args[0].value = entry->d_name.name;
	args.in_args[1].size = len;
	args.in_args[1].value = link;
656
	return create_new_entry(fc, &args, dir, entry, S_IFLNK);
657 658
}

S
Seth Forshee 已提交
659
void fuse_update_ctime(struct inode *inode)
M
Maxim Patlasov 已提交
660 661
{
	if (!IS_NOCMTIME(inode)) {
662
		inode->i_ctime = current_time(inode);
M
Maxim Patlasov 已提交
663 664 665 666
		mark_inode_dirty_sync(inode);
	}
}

667 668 669 670
static int fuse_unlink(struct inode *dir, struct dentry *entry)
{
	int err;
	struct fuse_conn *fc = get_fuse_conn(dir);
671 672
	FUSE_ARGS(args);

673 674 675 676 677
	args.opcode = FUSE_UNLINK;
	args.nodeid = get_node_id(dir);
	args.in_numargs = 1;
	args.in_args[0].size = entry->d_name.len + 1;
	args.in_args[0].value = entry->d_name.name;
678
	err = fuse_simple_request(fc, &args);
679
	if (!err) {
680
		struct inode *inode = d_inode(entry);
M
Miklos Szeredi 已提交
681
		struct fuse_inode *fi = get_fuse_inode(inode);
682

683
		spin_lock(&fi->lock);
684
		fi->attr_version = atomic64_inc_return(&fc->attr_version);
685 686 687 688 689 690 691 692
		/*
		 * If i_nlink == 0 then unlink doesn't make sense, yet this can
		 * happen if userspace filesystem is careless.  It would be
		 * difficult to enforce correct nlink usage so just ignore this
		 * condition here
		 */
		if (inode->i_nlink > 0)
			drop_nlink(inode);
693
		spin_unlock(&fi->lock);
694
		fuse_invalidate_attr(inode);
695
		fuse_dir_changed(dir);
696
		fuse_invalidate_entry_cache(entry);
M
Maxim Patlasov 已提交
697
		fuse_update_ctime(inode);
698 699 700 701 702 703 704 705 706
	} else if (err == -EINTR)
		fuse_invalidate_entry(entry);
	return err;
}

static int fuse_rmdir(struct inode *dir, struct dentry *entry)
{
	int err;
	struct fuse_conn *fc = get_fuse_conn(dir);
707 708
	FUSE_ARGS(args);

709 710 711 712 713
	args.opcode = FUSE_RMDIR;
	args.nodeid = get_node_id(dir);
	args.in_numargs = 1;
	args.in_args[0].size = entry->d_name.len + 1;
	args.in_args[0].value = entry->d_name.name;
714
	err = fuse_simple_request(fc, &args);
715
	if (!err) {
716
		clear_nlink(d_inode(entry));
717
		fuse_dir_changed(dir);
718
		fuse_invalidate_entry_cache(entry);
719 720 721 722 723
	} else if (err == -EINTR)
		fuse_invalidate_entry(entry);
	return err;
}

M
Miklos Szeredi 已提交
724 725 726
static int fuse_rename_common(struct inode *olddir, struct dentry *oldent,
			      struct inode *newdir, struct dentry *newent,
			      unsigned int flags, int opcode, size_t argsize)
727 728
{
	int err;
M
Miklos Szeredi 已提交
729
	struct fuse_rename2_in inarg;
730
	struct fuse_conn *fc = get_fuse_conn(olddir);
731
	FUSE_ARGS(args);
732

M
Miklos Szeredi 已提交
733
	memset(&inarg, 0, argsize);
734
	inarg.newdir = get_node_id(newdir);
M
Miklos Szeredi 已提交
735
	inarg.flags = flags;
736 737 738 739 740 741 742 743 744
	args.opcode = opcode;
	args.nodeid = get_node_id(olddir);
	args.in_numargs = 3;
	args.in_args[0].size = argsize;
	args.in_args[0].value = &inarg;
	args.in_args[1].size = oldent->d_name.len + 1;
	args.in_args[1].value = oldent->d_name.name;
	args.in_args[2].size = newent->d_name.len + 1;
	args.in_args[2].value = newent->d_name.name;
745
	err = fuse_simple_request(fc, &args);
746
	if (!err) {
747
		/* ctime changes */
748 749
		fuse_invalidate_attr(d_inode(oldent));
		fuse_update_ctime(d_inode(oldent));
750

M
Miklos Szeredi 已提交
751
		if (flags & RENAME_EXCHANGE) {
752 753
			fuse_invalidate_attr(d_inode(newent));
			fuse_update_ctime(d_inode(newent));
M
Miklos Szeredi 已提交
754 755
		}

756
		fuse_dir_changed(olddir);
757
		if (olddir != newdir)
758
			fuse_dir_changed(newdir);
759 760

		/* newent will end up negative */
761 762
		if (!(flags & RENAME_EXCHANGE) && d_really_is_positive(newent)) {
			fuse_invalidate_attr(d_inode(newent));
763
			fuse_invalidate_entry_cache(newent);
764
			fuse_update_ctime(d_inode(newent));
M
Miklos Szeredi 已提交
765
		}
766 767 768 769 770 771 772
	} else if (err == -EINTR) {
		/* If request was interrupted, DEITY only knows if the
		   rename actually took place.  If the invalidation
		   fails (e.g. some process has CWD under the renamed
		   directory), then there can be inconsistency between
		   the dcache and the real filesystem.  Tough luck. */
		fuse_invalidate_entry(oldent);
773
		if (d_really_is_positive(newent))
774 775 776 777 778 779
			fuse_invalidate_entry(newent);
	}

	return err;
}

M
Miklos Szeredi 已提交
780 781 782 783 784 785 786 787 788 789
static int fuse_rename2(struct inode *olddir, struct dentry *oldent,
			struct inode *newdir, struct dentry *newent,
			unsigned int flags)
{
	struct fuse_conn *fc = get_fuse_conn(olddir);
	int err;

	if (flags & ~(RENAME_NOREPLACE | RENAME_EXCHANGE))
		return -EINVAL;

M
Miklos Szeredi 已提交
790 791 792
	if (flags) {
		if (fc->no_rename2 || fc->minor < 23)
			return -EINVAL;
M
Miklos Szeredi 已提交
793

M
Miklos Szeredi 已提交
794 795 796 797 798 799 800 801 802 803 804
		err = fuse_rename_common(olddir, oldent, newdir, newent, flags,
					 FUSE_RENAME2,
					 sizeof(struct fuse_rename2_in));
		if (err == -ENOSYS) {
			fc->no_rename2 = 1;
			err = -EINVAL;
		}
	} else {
		err = fuse_rename_common(olddir, oldent, newdir, newent, 0,
					 FUSE_RENAME,
					 sizeof(struct fuse_rename_in));
M
Miklos Szeredi 已提交
805
	}
M
Miklos Szeredi 已提交
806

M
Miklos Szeredi 已提交
807
	return err;
M
Miklos Szeredi 已提交
808
}
M
Miklos Szeredi 已提交
809

810 811 812 813 814
static int fuse_link(struct dentry *entry, struct inode *newdir,
		     struct dentry *newent)
{
	int err;
	struct fuse_link_in inarg;
815
	struct inode *inode = d_inode(entry);
816
	struct fuse_conn *fc = get_fuse_conn(inode);
817
	FUSE_ARGS(args);
818 819 820

	memset(&inarg, 0, sizeof(inarg));
	inarg.oldnodeid = get_node_id(inode);
821 822 823 824 825 826
	args.opcode = FUSE_LINK;
	args.in_numargs = 2;
	args.in_args[0].size = sizeof(inarg);
	args.in_args[0].value = &inarg;
	args.in_args[1].size = newent->d_name.len + 1;
	args.in_args[1].value = newent->d_name.name;
827
	err = create_new_entry(fc, &args, newdir, newent, inode->i_mode);
828 829 830 831 832 833
	/* Contrary to "normal" filesystems it can happen that link
	   makes two "logical" inodes point to the same "physical"
	   inode.  We invalidate the attributes of the old one, so it
	   will reflect changes in the backing inode (link count,
	   etc.)
	*/
M
Miklos Szeredi 已提交
834 835 836
	if (!err) {
		struct fuse_inode *fi = get_fuse_inode(inode);

837
		spin_lock(&fi->lock);
838
		fi->attr_version = atomic64_inc_return(&fc->attr_version);
M
Miklos Szeredi 已提交
839
		inc_nlink(inode);
840
		spin_unlock(&fi->lock);
841
		fuse_invalidate_attr(inode);
M
Maxim Patlasov 已提交
842
		fuse_update_ctime(inode);
M
Miklos Szeredi 已提交
843 844 845
	} else if (err == -EINTR) {
		fuse_invalidate_attr(inode);
	}
846 847 848
	return err;
}

849 850 851
static void fuse_fillattr(struct inode *inode, struct fuse_attr *attr,
			  struct kstat *stat)
{
M
Miklos Szeredi 已提交
852
	unsigned int blkbits;
P
Pavel Emelyanov 已提交
853 854 855
	struct fuse_conn *fc = get_fuse_conn(inode);

	/* see the comment in fuse_change_attributes() */
M
Maxim Patlasov 已提交
856
	if (fc->writeback_cache && S_ISREG(inode->i_mode)) {
P
Pavel Emelyanov 已提交
857
		attr->size = i_size_read(inode);
M
Maxim Patlasov 已提交
858 859
		attr->mtime = inode->i_mtime.tv_sec;
		attr->mtimensec = inode->i_mtime.tv_nsec;
M
Maxim Patlasov 已提交
860 861
		attr->ctime = inode->i_ctime.tv_sec;
		attr->ctimensec = inode->i_ctime.tv_nsec;
M
Maxim Patlasov 已提交
862
	}
M
Miklos Szeredi 已提交
863

864 865 866 867
	stat->dev = inode->i_sb->s_dev;
	stat->ino = attr->ino;
	stat->mode = (inode->i_mode & S_IFMT) | (attr->mode & 07777);
	stat->nlink = attr->nlink;
868 869
	stat->uid = make_kuid(fc->user_ns, attr->uid);
	stat->gid = make_kgid(fc->user_ns, attr->gid);
870 871 872 873 874 875 876 877 878
	stat->rdev = inode->i_rdev;
	stat->atime.tv_sec = attr->atime;
	stat->atime.tv_nsec = attr->atimensec;
	stat->mtime.tv_sec = attr->mtime;
	stat->mtime.tv_nsec = attr->mtimensec;
	stat->ctime.tv_sec = attr->ctime;
	stat->ctime.tv_nsec = attr->ctimensec;
	stat->size = attr->size;
	stat->blocks = attr->blocks;
M
Miklos Szeredi 已提交
879 880 881 882 883 884 885

	if (attr->blksize != 0)
		blkbits = ilog2(attr->blksize);
	else
		blkbits = inode->i_sb->s_blocksize_bits;

	stat->blksize = 1 << blkbits;
886 887
}

888 889
static int fuse_do_getattr(struct inode *inode, struct kstat *stat,
			   struct file *file)
890 891
{
	int err;
892 893
	struct fuse_getattr_in inarg;
	struct fuse_attr_out outarg;
894
	struct fuse_conn *fc = get_fuse_conn(inode);
895
	FUSE_ARGS(args);
896 897
	u64 attr_version;

898
	attr_version = fuse_get_attr_version(fc);
899

900
	memset(&inarg, 0, sizeof(inarg));
901
	memset(&outarg, 0, sizeof(outarg));
902 903 904 905 906 907 908
	/* Directories have separate file-handle space */
	if (file && S_ISREG(inode->i_mode)) {
		struct fuse_file *ff = file->private_data;

		inarg.getattr_flags |= FUSE_GETATTR_FH;
		inarg.fh = ff->fh;
	}
909 910 911 912 913 914 915 916
	args.opcode = FUSE_GETATTR;
	args.nodeid = get_node_id(inode);
	args.in_numargs = 1;
	args.in_args[0].size = sizeof(inarg);
	args.in_args[0].value = &inarg;
	args.out_numargs = 1;
	args.out_args[0].size = sizeof(outarg);
	args.out_args[0].value = &outarg;
917
	err = fuse_simple_request(fc, &args);
918
	if (!err) {
919
		if ((inode->i_mode ^ outarg.attr.mode) & S_IFMT) {
920 921 922
			make_bad_inode(inode);
			err = -EIO;
		} else {
923 924
			fuse_change_attributes(inode, &outarg.attr,
					       attr_timeout(&outarg),
925 926
					       attr_version);
			if (stat)
927
				fuse_fillattr(inode, &outarg.attr, stat);
928 929 930 931 932
		}
	}
	return err;
}

M
Miklos Szeredi 已提交
933
static int fuse_update_get_attr(struct inode *inode, struct file *file,
934 935
				struct kstat *stat, u32 request_mask,
				unsigned int flags)
M
Miklos Szeredi 已提交
936 937
{
	struct fuse_inode *fi = get_fuse_inode(inode);
M
Miklos Szeredi 已提交
938
	int err = 0;
M
Miklos Szeredi 已提交
939
	bool sync;
M
Miklos Szeredi 已提交
940

M
Miklos Szeredi 已提交
941 942 943 944
	if (flags & AT_STATX_FORCE_SYNC)
		sync = true;
	else if (flags & AT_STATX_DONT_SYNC)
		sync = false;
945 946
	else if (request_mask & READ_ONCE(fi->inval_mask))
		sync = true;
M
Miklos Szeredi 已提交
947 948 949 950
	else
		sync = time_before64(fi->i_time, get_jiffies_64());

	if (sync) {
S
Seth Forshee 已提交
951
		forget_all_cached_acls(inode);
M
Miklos Szeredi 已提交
952
		err = fuse_do_getattr(inode, stat, file);
M
Miklos Szeredi 已提交
953 954 955 956
	} else if (stat) {
		generic_fillattr(inode, stat);
		stat->mode = fi->orig_i_mode;
		stat->ino = fi->orig_ino;
M
Miklos Szeredi 已提交
957 958 959 960 961
	}

	return err;
}

M
Miklos Szeredi 已提交
962 963
int fuse_update_attributes(struct inode *inode, struct file *file)
{
964 965 966
	/* Do *not* need to get atime for internal purposes */
	return fuse_update_get_attr(inode, file, NULL,
				    STATX_BASIC_STATS & ~STATX_ATIME, 0);
M
Miklos Szeredi 已提交
967 968
}

J
John Muir 已提交
969
int fuse_reverse_inval_entry(struct super_block *sb, u64 parent_nodeid,
970
			     u64 child_nodeid, struct qstr *name)
J
John Muir 已提交
971 972 973 974 975 976 977 978 979 980
{
	int err = -ENOTDIR;
	struct inode *parent;
	struct dentry *dir;
	struct dentry *entry;

	parent = ilookup5(sb, parent_nodeid, fuse_inode_eq, &parent_nodeid);
	if (!parent)
		return -ENOENT;

A
Al Viro 已提交
981
	inode_lock(parent);
J
John Muir 已提交
982 983 984 985 986 987 988 989
	if (!S_ISDIR(parent->i_mode))
		goto unlock;

	err = -ENOENT;
	dir = d_find_alias(parent);
	if (!dir)
		goto unlock;

990
	name->hash = full_name_hash(dir, name->name, name->len);
J
John Muir 已提交
991 992 993 994 995
	entry = d_lookup(dir, name);
	dput(dir);
	if (!entry)
		goto unlock;

996
	fuse_dir_changed(parent);
J
John Muir 已提交
997
	fuse_invalidate_entry(entry);
998

999
	if (child_nodeid != 0 && d_really_is_positive(entry)) {
A
Al Viro 已提交
1000
		inode_lock(d_inode(entry));
1001
		if (get_node_id(d_inode(entry)) != child_nodeid) {
1002 1003 1004 1005 1006 1007 1008
			err = -ENOENT;
			goto badentry;
		}
		if (d_mountpoint(entry)) {
			err = -EBUSY;
			goto badentry;
		}
1009
		if (d_is_dir(entry)) {
1010 1011 1012 1013 1014
			shrink_dcache_parent(entry);
			if (!simple_empty(entry)) {
				err = -ENOTEMPTY;
				goto badentry;
			}
1015
			d_inode(entry)->i_flags |= S_DEAD;
1016 1017
		}
		dont_mount(entry);
1018
		clear_nlink(d_inode(entry));
1019 1020
		err = 0;
 badentry:
A
Al Viro 已提交
1021
		inode_unlock(d_inode(entry));
1022 1023 1024 1025 1026
		if (!err)
			d_delete(entry);
	} else {
		err = 0;
	}
J
John Muir 已提交
1027 1028 1029
	dput(entry);

 unlock:
A
Al Viro 已提交
1030
	inode_unlock(parent);
J
John Muir 已提交
1031 1032 1033 1034
	iput(parent);
	return err;
}

1035 1036
/*
 * Calling into a user-controlled filesystem gives the filesystem
1037
 * daemon ptrace-like capabilities over the current process.  This
1038 1039 1040 1041 1042 1043 1044 1045 1046 1047
 * means, that the filesystem daemon is able to record the exact
 * filesystem operations performed, and can also control the behavior
 * of the requester process in otherwise impossible ways.  For example
 * it can delay the operation for arbitrary length of time allowing
 * DoS against the requester.
 *
 * For this reason only those processes can call into the filesystem,
 * for which the owner of the mount has ptrace privilege.  This
 * excludes processes started by other users, suid or sgid processes.
 */
1048
int fuse_allow_current_process(struct fuse_conn *fc)
1049
{
1050
	const struct cred *cred;
1051

M
Miklos Szeredi 已提交
1052
	if (fc->allow_other)
1053
		return current_in_userns(fc->user_ns);
1054

1055
	cred = current_cred();
1056 1057 1058 1059 1060 1061
	if (uid_eq(cred->euid, fc->user_id) &&
	    uid_eq(cred->suid, fc->user_id) &&
	    uid_eq(cred->uid,  fc->user_id) &&
	    gid_eq(cred->egid, fc->group_id) &&
	    gid_eq(cred->sgid, fc->group_id) &&
	    gid_eq(cred->gid,  fc->group_id))
1062
		return 1;
1063

1064
	return 0;
1065 1066
}

M
Miklos Szeredi 已提交
1067 1068 1069
static int fuse_access(struct inode *inode, int mask)
{
	struct fuse_conn *fc = get_fuse_conn(inode);
1070
	FUSE_ARGS(args);
M
Miklos Szeredi 已提交
1071 1072 1073
	struct fuse_access_in inarg;
	int err;

1074 1075
	BUG_ON(mask & MAY_NOT_BLOCK);

M
Miklos Szeredi 已提交
1076 1077 1078 1079
	if (fc->no_access)
		return 0;

	memset(&inarg, 0, sizeof(inarg));
1080
	inarg.mask = mask & (MAY_READ | MAY_WRITE | MAY_EXEC);
1081 1082 1083 1084 1085
	args.opcode = FUSE_ACCESS;
	args.nodeid = get_node_id(inode);
	args.in_numargs = 1;
	args.in_args[0].size = sizeof(inarg);
	args.in_args[0].value = &inarg;
1086
	err = fuse_simple_request(fc, &args);
M
Miklos Szeredi 已提交
1087 1088 1089 1090 1091 1092 1093
	if (err == -ENOSYS) {
		fc->no_access = 1;
		err = 0;
	}
	return err;
}

1094
static int fuse_perm_getattr(struct inode *inode, int mask)
1095
{
1096
	if (mask & MAY_NOT_BLOCK)
1097 1098
		return -ECHILD;

S
Seth Forshee 已提交
1099
	forget_all_cached_acls(inode);
1100 1101 1102
	return fuse_do_getattr(inode, NULL, NULL);
}

1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115
/*
 * Check permission.  The two basic access models of FUSE are:
 *
 * 1) Local access checking ('default_permissions' mount option) based
 * on file mode.  This is the plain old disk filesystem permission
 * modell.
 *
 * 2) "Remote" access checking, where server is responsible for
 * checking permission in each inode operation.  An exception to this
 * is if ->permission() was invoked from sys_access() in which case an
 * access request is sent.  Execute permission is still checked
 * locally based on file mode.
 */
1116
static int fuse_permission(struct inode *inode, int mask)
1117 1118
{
	struct fuse_conn *fc = get_fuse_conn(inode);
1119 1120
	bool refreshed = false;
	int err = 0;
1121

1122
	if (!fuse_allow_current_process(fc))
1123
		return -EACCES;
1124 1125

	/*
1126
	 * If attributes are needed, refresh them before proceeding
1127
	 */
M
Miklos Szeredi 已提交
1128
	if (fc->default_permissions ||
1129
	    ((mask & MAY_EXEC) && S_ISREG(inode->i_mode))) {
1130
		struct fuse_inode *fi = get_fuse_inode(inode);
1131
		u32 perm_mask = STATX_MODE | STATX_UID | STATX_GID;
1132

1133 1134
		if (perm_mask & READ_ONCE(fi->inval_mask) ||
		    time_before64(fi->i_time, get_jiffies_64())) {
1135 1136
			refreshed = true;

1137
			err = fuse_perm_getattr(inode, mask);
1138 1139 1140
			if (err)
				return err;
		}
1141 1142
	}

M
Miklos Szeredi 已提交
1143
	if (fc->default_permissions) {
1144
		err = generic_permission(inode, mask);
M
Miklos Szeredi 已提交
1145 1146 1147 1148

		/* If permission is denied, try to refresh file
		   attributes.  This is also needed, because the root
		   node will at first have no permissions */
1149
		if (err == -EACCES && !refreshed) {
1150
			err = fuse_perm_getattr(inode, mask);
M
Miklos Szeredi 已提交
1151
			if (!err)
1152
				err = generic_permission(inode, mask);
M
Miklos Szeredi 已提交
1153 1154
		}

1155 1156 1157 1158
		/* Note: the opposite of the above test does not
		   exist.  So if permissions are revoked this won't be
		   noticed immediately, only after the attribute
		   timeout has expired */
E
Eric Paris 已提交
1159
	} else if (mask & (MAY_ACCESS | MAY_CHDIR)) {
1160 1161 1162 1163 1164 1165
		err = fuse_access(inode, mask);
	} else if ((mask & MAY_EXEC) && S_ISREG(inode->i_mode)) {
		if (!(inode->i_mode & S_IXUGO)) {
			if (refreshed)
				return -EACCES;

1166
			err = fuse_perm_getattr(inode, mask);
1167 1168 1169
			if (!err && !(inode->i_mode & S_IXUGO))
				return -EACCES;
		}
1170
	}
1171
	return err;
1172 1173
}

D
Dan Schatzberg 已提交
1174
static int fuse_readlink_page(struct inode *inode, struct page *page)
1175 1176
{
	struct fuse_conn *fc = get_fuse_conn(inode);
1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193
	struct fuse_page_desc desc = { .length = PAGE_SIZE - 1 };
	struct fuse_args_pages ap = {
		.num_pages = 1,
		.pages = &page,
		.descs = &desc,
	};
	char *link;
	ssize_t res;

	ap.args.opcode = FUSE_READLINK;
	ap.args.nodeid = get_node_id(inode);
	ap.args.out_pages = true;
	ap.args.out_argvar = true;
	ap.args.page_zeroing = true;
	ap.args.out_numargs = 1;
	ap.args.out_args[0].size = desc.length;
	res = fuse_simple_request(fc, &ap.args);
1194

1195
	fuse_invalidate_atime(inode);
1196

1197 1198
	if (res < 0)
		return res;
1199

1200 1201
	if (WARN_ON(res >= PAGE_SIZE))
		return -EIO;
D
Dan Schatzberg 已提交
1202

1203 1204
	link = page_address(page);
	link[res] = '\0';
D
Dan Schatzberg 已提交
1205

1206
	return 0;
D
Dan Schatzberg 已提交
1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243
}

static const char *fuse_get_link(struct dentry *dentry, struct inode *inode,
				 struct delayed_call *callback)
{
	struct fuse_conn *fc = get_fuse_conn(inode);
	struct page *page;
	int err;

	err = -EIO;
	if (is_bad_inode(inode))
		goto out_err;

	if (fc->cache_symlinks)
		return page_get_link(dentry, inode, callback);

	err = -ECHILD;
	if (!dentry)
		goto out_err;

	page = alloc_page(GFP_KERNEL);
	err = -ENOMEM;
	if (!page)
		goto out_err;

	err = fuse_readlink_page(inode, page);
	if (err) {
		__free_page(page);
		goto out_err;
	}

	set_delayed_call(callback, page_put_link, page);

	return page_address(page);

out_err:
	return ERR_PTR(err);
1244 1245 1246 1247
}

static int fuse_dir_open(struct inode *inode, struct file *file)
{
1248
	return fuse_open_common(inode, file, true);
1249 1250 1251 1252
}

static int fuse_dir_release(struct inode *inode, struct file *file)
{
1253
	fuse_release_common(file, true);
1254 1255

	return 0;
1256 1257
}

1258 1259
static int fuse_dir_fsync(struct file *file, loff_t start, loff_t end,
			  int datasync)
1260
{
M
Miklos Szeredi 已提交
1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279
	struct inode *inode = file->f_mapping->host;
	struct fuse_conn *fc = get_fuse_conn(inode);
	int err;

	if (is_bad_inode(inode))
		return -EIO;

	if (fc->no_fsyncdir)
		return 0;

	inode_lock(inode);
	err = fuse_fsync_common(file, start, end, datasync, FUSE_FSYNCDIR);
	if (err == -ENOSYS) {
		fc->no_fsyncdir = 1;
		err = 0;
	}
	inode_unlock(inode);

	return err;
1280 1281
}

1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305
static long fuse_dir_ioctl(struct file *file, unsigned int cmd,
			    unsigned long arg)
{
	struct fuse_conn *fc = get_fuse_conn(file->f_mapping->host);

	/* FUSE_IOCTL_DIR only supported for API version >= 7.18 */
	if (fc->minor < 18)
		return -ENOTTY;

	return fuse_ioctl_common(file, cmd, arg, FUSE_IOCTL_DIR);
}

static long fuse_dir_compat_ioctl(struct file *file, unsigned int cmd,
				   unsigned long arg)
{
	struct fuse_conn *fc = get_fuse_conn(file->f_mapping->host);

	if (fc->minor < 18)
		return -ENOTTY;

	return fuse_ioctl_common(file, cmd, arg,
				 FUSE_IOCTL_COMPAT | FUSE_IOCTL_DIR);
}

M
Maxim Patlasov 已提交
1306
static bool update_mtime(unsigned ivalid, bool trust_local_mtime)
M
Miklos Szeredi 已提交
1307 1308 1309 1310 1311
{
	/* Always update if mtime is explicitly set  */
	if (ivalid & ATTR_MTIME_SET)
		return true;

M
Maxim Patlasov 已提交
1312 1313 1314 1315
	/* Or if kernel i_mtime is the official one */
	if (trust_local_mtime)
		return true;

M
Miklos Szeredi 已提交
1316 1317 1318 1319 1320 1321 1322 1323
	/* If it's an open(O_TRUNC) or an ftruncate(), don't update */
	if ((ivalid & ATTR_SIZE) && (ivalid & (ATTR_OPEN | ATTR_FILE)))
		return false;

	/* In all other cases update */
	return true;
}

1324 1325
static void iattr_to_fattr(struct fuse_conn *fc, struct iattr *iattr,
			   struct fuse_setattr_in *arg, bool trust_local_cmtime)
1326 1327 1328 1329
{
	unsigned ivalid = iattr->ia_valid;

	if (ivalid & ATTR_MODE)
1330
		arg->valid |= FATTR_MODE,   arg->mode = iattr->ia_mode;
1331
	if (ivalid & ATTR_UID)
1332
		arg->valid |= FATTR_UID,    arg->uid = from_kuid(fc->user_ns, iattr->ia_uid);
1333
	if (ivalid & ATTR_GID)
1334
		arg->valid |= FATTR_GID,    arg->gid = from_kgid(fc->user_ns, iattr->ia_gid);
1335
	if (ivalid & ATTR_SIZE)
1336
		arg->valid |= FATTR_SIZE,   arg->size = iattr->ia_size;
M
Miklos Szeredi 已提交
1337 1338
	if (ivalid & ATTR_ATIME) {
		arg->valid |= FATTR_ATIME;
1339
		arg->atime = iattr->ia_atime.tv_sec;
M
Miklos Szeredi 已提交
1340 1341 1342 1343
		arg->atimensec = iattr->ia_atime.tv_nsec;
		if (!(ivalid & ATTR_ATIME_SET))
			arg->valid |= FATTR_ATIME_NOW;
	}
1344
	if ((ivalid & ATTR_MTIME) && update_mtime(ivalid, trust_local_cmtime)) {
M
Miklos Szeredi 已提交
1345
		arg->valid |= FATTR_MTIME;
1346
		arg->mtime = iattr->ia_mtime.tv_sec;
M
Miklos Szeredi 已提交
1347
		arg->mtimensec = iattr->ia_mtime.tv_nsec;
1348
		if (!(ivalid & ATTR_MTIME_SET) && !trust_local_cmtime)
M
Miklos Szeredi 已提交
1349
			arg->valid |= FATTR_MTIME_NOW;
1350
	}
1351 1352 1353 1354 1355
	if ((ivalid & ATTR_CTIME) && trust_local_cmtime) {
		arg->valid |= FATTR_CTIME;
		arg->ctime = iattr->ia_ctime.tv_sec;
		arg->ctimensec = iattr->ia_ctime.tv_nsec;
	}
1356 1357
}

M
Miklos Szeredi 已提交
1358 1359 1360 1361 1362 1363 1364 1365 1366 1367
/*
 * Prevent concurrent writepages on inode
 *
 * This is done by adding a negative bias to the inode write counter
 * and waiting for all pending writes to finish.
 */
void fuse_set_nowrite(struct inode *inode)
{
	struct fuse_inode *fi = get_fuse_inode(inode);

A
Al Viro 已提交
1368
	BUG_ON(!inode_is_locked(inode));
M
Miklos Szeredi 已提交
1369

1370
	spin_lock(&fi->lock);
M
Miklos Szeredi 已提交
1371 1372
	BUG_ON(fi->writectr < 0);
	fi->writectr += FUSE_NOWRITE;
1373
	spin_unlock(&fi->lock);
M
Miklos Szeredi 已提交
1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393
	wait_event(fi->page_waitq, fi->writectr == FUSE_NOWRITE);
}

/*
 * Allow writepages on inode
 *
 * Remove the bias from the writecounter and send any queued
 * writepages.
 */
static void __fuse_release_nowrite(struct inode *inode)
{
	struct fuse_inode *fi = get_fuse_inode(inode);

	BUG_ON(fi->writectr != FUSE_NOWRITE);
	fi->writectr = 0;
	fuse_flush_writepages(inode);
}

void fuse_release_nowrite(struct inode *inode)
{
1394
	struct fuse_inode *fi = get_fuse_inode(inode);
M
Miklos Szeredi 已提交
1395

1396
	spin_lock(&fi->lock);
M
Miklos Szeredi 已提交
1397
	__fuse_release_nowrite(inode);
1398
	spin_unlock(&fi->lock);
M
Miklos Szeredi 已提交
1399 1400
}

1401
static void fuse_setattr_fill(struct fuse_conn *fc, struct fuse_args *args,
M
Maxim Patlasov 已提交
1402 1403 1404 1405
			      struct inode *inode,
			      struct fuse_setattr_in *inarg_p,
			      struct fuse_attr_out *outarg_p)
{
1406 1407 1408 1409 1410 1411 1412 1413
	args->opcode = FUSE_SETATTR;
	args->nodeid = get_node_id(inode);
	args->in_numargs = 1;
	args->in_args[0].size = sizeof(*inarg_p);
	args->in_args[0].value = inarg_p;
	args->out_numargs = 1;
	args->out_args[0].size = sizeof(*outarg_p);
	args->out_args[0].value = outarg_p;
M
Maxim Patlasov 已提交
1414 1415 1416 1417 1418
}

/*
 * Flush inode->i_mtime to the server
 */
1419
int fuse_flush_times(struct inode *inode, struct fuse_file *ff)
M
Maxim Patlasov 已提交
1420 1421
{
	struct fuse_conn *fc = get_fuse_conn(inode);
1422
	FUSE_ARGS(args);
M
Maxim Patlasov 已提交
1423 1424 1425 1426 1427 1428
	struct fuse_setattr_in inarg;
	struct fuse_attr_out outarg;

	memset(&inarg, 0, sizeof(inarg));
	memset(&outarg, 0, sizeof(outarg));

1429
	inarg.valid = FATTR_MTIME;
M
Maxim Patlasov 已提交
1430 1431
	inarg.mtime = inode->i_mtime.tv_sec;
	inarg.mtimensec = inode->i_mtime.tv_nsec;
1432 1433 1434 1435 1436
	if (fc->minor >= 23) {
		inarg.valid |= FATTR_CTIME;
		inarg.ctime = inode->i_ctime.tv_sec;
		inarg.ctimensec = inode->i_ctime.tv_nsec;
	}
M
Miklos Szeredi 已提交
1437 1438 1439 1440
	if (ff) {
		inarg.valid |= FATTR_FH;
		inarg.fh = ff->fh;
	}
1441
	fuse_setattr_fill(fc, &args, inode, &inarg, &outarg);
M
Maxim Patlasov 已提交
1442

1443
	return fuse_simple_request(fc, &args);
M
Maxim Patlasov 已提交
1444 1445
}

1446 1447 1448 1449 1450
/*
 * Set attributes, and at the same time refresh them.
 *
 * Truncation is slightly complicated, because the 'truncate' request
 * may fail, in which case we don't want to touch the mapping.
M
Miklos Szeredi 已提交
1451 1452
 * vmtruncate() doesn't allow for this case, so do the rlimit checking
 * and the actual truncation by hand.
1453
 */
1454
int fuse_do_setattr(struct dentry *dentry, struct iattr *attr,
1455
		    struct file *file)
1456
{
1457
	struct inode *inode = d_inode(dentry);
1458
	struct fuse_conn *fc = get_fuse_conn(inode);
1459
	struct fuse_inode *fi = get_fuse_inode(inode);
1460
	FUSE_ARGS(args);
1461 1462
	struct fuse_setattr_in inarg;
	struct fuse_attr_out outarg;
M
Miklos Szeredi 已提交
1463
	bool is_truncate = false;
P
Pavel Emelyanov 已提交
1464
	bool is_wb = fc->writeback_cache;
M
Miklos Szeredi 已提交
1465
	loff_t oldsize;
1466
	int err;
1467
	bool trust_local_cmtime = is_wb && S_ISREG(inode->i_mode);
1468

M
Miklos Szeredi 已提交
1469
	if (!fc->default_permissions)
1470 1471
		attr->ia_valid |= ATTR_FORCE;

1472
	err = setattr_prepare(dentry, attr);
1473 1474
	if (err)
		return err;
M
Miklos Szeredi 已提交
1475

M
Miklos Szeredi 已提交
1476
	if (attr->ia_valid & ATTR_OPEN) {
1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487
		/* This is coming from open(..., ... | O_TRUNC); */
		WARN_ON(!(attr->ia_valid & ATTR_SIZE));
		WARN_ON(attr->ia_size != 0);
		if (fc->atomic_o_trunc) {
			/*
			 * No need to send request to userspace, since actual
			 * truncation has already been done by OPEN.  But still
			 * need to truncate page cache.
			 */
			i_size_write(inode, 0);
			truncate_pagecache(inode, 0);
M
Miklos Szeredi 已提交
1488
			return 0;
1489
		}
M
Miklos Szeredi 已提交
1490 1491
		file = NULL;
	}
1492

1493 1494 1495
	if (attr->ia_valid & ATTR_SIZE) {
		if (WARN_ON(!S_ISREG(inode->i_mode)))
			return -EIO;
M
Miklos Szeredi 已提交
1496
		is_truncate = true;
1497
	}
1498

1499
	if (is_truncate) {
M
Miklos Szeredi 已提交
1500
		fuse_set_nowrite(inode);
1501
		set_bit(FUSE_I_SIZE_UNSTABLE, &fi->state);
1502 1503
		if (trust_local_cmtime && attr->ia_size != inode->i_size)
			attr->ia_valid |= ATTR_MTIME | ATTR_CTIME;
1504
	}
M
Miklos Szeredi 已提交
1505

1506
	memset(&inarg, 0, sizeof(inarg));
1507
	memset(&outarg, 0, sizeof(outarg));
1508
	iattr_to_fattr(fc, attr, &inarg, trust_local_cmtime);
1509 1510 1511 1512 1513
	if (file) {
		struct fuse_file *ff = file->private_data;
		inarg.valid |= FATTR_FH;
		inarg.fh = ff->fh;
	}
1514 1515 1516 1517 1518
	if (attr->ia_valid & ATTR_SIZE) {
		/* For mandatory locking in truncate */
		inarg.valid |= FATTR_LOCKOWNER;
		inarg.lock_owner = fuse_lock_owner_id(fc, current->files);
	}
1519 1520
	fuse_setattr_fill(fc, &args, inode, &inarg, &outarg);
	err = fuse_simple_request(fc, &args);
1521 1522 1523
	if (err) {
		if (err == -EINTR)
			fuse_invalidate_attr(inode);
M
Miklos Szeredi 已提交
1524
		goto error;
1525
	}
1526

1527 1528
	if ((inode->i_mode ^ outarg.attr.mode) & S_IFMT) {
		make_bad_inode(inode);
M
Miklos Szeredi 已提交
1529 1530 1531 1532
		err = -EIO;
		goto error;
	}

1533
	spin_lock(&fi->lock);
M
Maxim Patlasov 已提交
1534
	/* the kernel maintains i_mtime locally */
1535 1536 1537 1538 1539
	if (trust_local_cmtime) {
		if (attr->ia_valid & ATTR_MTIME)
			inode->i_mtime = attr->ia_mtime;
		if (attr->ia_valid & ATTR_CTIME)
			inode->i_ctime = attr->ia_ctime;
M
Miklos Szeredi 已提交
1540
		/* FIXME: clear I_DIRTY_SYNC? */
M
Maxim Patlasov 已提交
1541 1542
	}

M
Miklos Szeredi 已提交
1543 1544 1545
	fuse_change_attributes_common(inode, &outarg.attr,
				      attr_timeout(&outarg));
	oldsize = inode->i_size;
P
Pavel Emelyanov 已提交
1546 1547 1548
	/* see the comment in fuse_change_attributes() */
	if (!is_wb || is_truncate || !S_ISREG(inode->i_mode))
		i_size_write(inode, outarg.attr.size);
M
Miklos Szeredi 已提交
1549 1550

	if (is_truncate) {
1551
		/* NOTE: this may release/reacquire fi->lock */
M
Miklos Szeredi 已提交
1552 1553
		__fuse_release_nowrite(inode);
	}
1554
	spin_unlock(&fi->lock);
M
Miklos Szeredi 已提交
1555 1556 1557 1558 1559

	/*
	 * Only call invalidate_inode_pages2() after removing
	 * FUSE_NOWRITE, otherwise fuse_launder_page() would deadlock.
	 */
P
Pavel Emelyanov 已提交
1560 1561
	if ((is_truncate || !is_wb) &&
	    S_ISREG(inode->i_mode) && oldsize != outarg.attr.size) {
1562
		truncate_pagecache(inode, outarg.attr.size);
M
Miklos Szeredi 已提交
1563
		invalidate_inode_pages2(inode->i_mapping);
1564 1565
	}

1566
	clear_bit(FUSE_I_SIZE_UNSTABLE, &fi->state);
1567
	return 0;
M
Miklos Szeredi 已提交
1568 1569 1570 1571 1572

error:
	if (is_truncate)
		fuse_release_nowrite(inode);

1573
	clear_bit(FUSE_I_SIZE_UNSTABLE, &fi->state);
M
Miklos Szeredi 已提交
1574
	return err;
1575 1576
}

1577 1578
static int fuse_setattr(struct dentry *entry, struct iattr *attr)
{
1579
	struct inode *inode = d_inode(entry);
1580
	struct fuse_conn *fc = get_fuse_conn(inode);
1581
	struct file *file = (attr->ia_valid & ATTR_FILE) ? attr->ia_file : NULL;
1582
	int ret;
1583 1584 1585 1586

	if (!fuse_allow_current_process(get_fuse_conn(inode)))
		return -EACCES;

1587 1588 1589
	if (attr->ia_valid & (ATTR_KILL_SUID | ATTR_KILL_SGID)) {
		attr->ia_valid &= ~(ATTR_KILL_SUID | ATTR_KILL_SGID |
				    ATTR_MODE);
1590

1591
		/*
1592 1593 1594 1595
		 * The only sane way to reliably kill suid/sgid is to do it in
		 * the userspace filesystem
		 *
		 * This should be done on write(), truncate() and chown().
1596
		 */
1597 1598 1599 1600 1601 1602 1603 1604 1605 1606
		if (!fc->handle_killpriv) {
			/*
			 * ia_mode calculation may have used stale i_mode.
			 * Refresh and recalculate.
			 */
			ret = fuse_do_getattr(inode, NULL, file);
			if (ret)
				return ret;

			attr->ia_mode = inode->i_mode;
1607
			if (inode->i_mode & S_ISUID) {
1608 1609 1610
				attr->ia_valid |= ATTR_MODE;
				attr->ia_mode &= ~S_ISUID;
			}
1611
			if ((inode->i_mode & (S_ISGID | S_IXGRP)) == (S_ISGID | S_IXGRP)) {
1612 1613 1614
				attr->ia_valid |= ATTR_MODE;
				attr->ia_mode &= ~S_ISGID;
			}
1615 1616 1617 1618
		}
	}
	if (!attr->ia_valid)
		return 0;
1619

1620
	ret = fuse_do_setattr(entry, attr, file);
1621
	if (!ret) {
S
Seth Forshee 已提交
1622 1623 1624 1625 1626 1627 1628
		/*
		 * If filesystem supports acls it may have updated acl xattrs in
		 * the filesystem, so forget cached acls for the inode.
		 */
		if (fc->posix_acl)
			forget_all_cached_acls(inode);

1629 1630 1631 1632 1633
		/* Directory mode changed, may need to revalidate access */
		if (d_is_dir(entry) && (attr->ia_valid & ATTR_MODE))
			fuse_invalidate_entry_cache(entry);
	}
	return ret;
1634 1635
}

1636 1637
static int fuse_getattr(const struct path *path, struct kstat *stat,
			u32 request_mask, unsigned int flags)
1638
{
1639
	struct inode *inode = d_inode(path->dentry);
1640 1641
	struct fuse_conn *fc = get_fuse_conn(inode);

1642
	if (!fuse_allow_current_process(fc))
1643 1644
		return -EACCES;

1645
	return fuse_update_get_attr(inode, NULL, stat, request_mask, flags);
1646 1647
}

1648
static const struct inode_operations fuse_dir_inode_operations = {
1649
	.lookup		= fuse_lookup,
1650 1651 1652 1653
	.mkdir		= fuse_mkdir,
	.symlink	= fuse_symlink,
	.unlink		= fuse_unlink,
	.rmdir		= fuse_rmdir,
1654
	.rename		= fuse_rename2,
1655 1656 1657
	.link		= fuse_link,
	.setattr	= fuse_setattr,
	.create		= fuse_create,
1658
	.atomic_open	= fuse_atomic_open,
1659
	.mknod		= fuse_mknod,
1660 1661
	.permission	= fuse_permission,
	.getattr	= fuse_getattr,
1662
	.listxattr	= fuse_listxattr,
S
Seth Forshee 已提交
1663 1664
	.get_acl	= fuse_get_acl,
	.set_acl	= fuse_set_acl,
1665 1666
};

1667
static const struct file_operations fuse_dir_operations = {
M
Miklos Szeredi 已提交
1668
	.llseek		= generic_file_llseek,
1669
	.read		= generic_read_dir,
A
Al Viro 已提交
1670
	.iterate_shared	= fuse_readdir,
1671 1672
	.open		= fuse_dir_open,
	.release	= fuse_dir_release,
1673
	.fsync		= fuse_dir_fsync,
1674 1675
	.unlocked_ioctl	= fuse_dir_ioctl,
	.compat_ioctl	= fuse_dir_compat_ioctl,
1676 1677
};

1678
static const struct inode_operations fuse_common_inode_operations = {
1679
	.setattr	= fuse_setattr,
1680 1681
	.permission	= fuse_permission,
	.getattr	= fuse_getattr,
1682
	.listxattr	= fuse_listxattr,
S
Seth Forshee 已提交
1683 1684
	.get_acl	= fuse_get_acl,
	.set_acl	= fuse_set_acl,
1685 1686
};

1687
static const struct inode_operations fuse_symlink_inode_operations = {
1688
	.setattr	= fuse_setattr,
1689
	.get_link	= fuse_get_link,
1690
	.getattr	= fuse_getattr,
1691
	.listxattr	= fuse_listxattr,
1692 1693 1694 1695 1696 1697 1698 1699 1700
};

void fuse_init_common(struct inode *inode)
{
	inode->i_op = &fuse_common_inode_operations;
}

void fuse_init_dir(struct inode *inode)
{
1701 1702
	struct fuse_inode *fi = get_fuse_inode(inode);

1703 1704
	inode->i_op = &fuse_dir_inode_operations;
	inode->i_fop = &fuse_dir_operations;
1705 1706 1707 1708 1709 1710

	spin_lock_init(&fi->rdc.lock);
	fi->rdc.cached = false;
	fi->rdc.size = 0;
	fi->rdc.pos = 0;
	fi->rdc.version = 0;
1711 1712
}

D
Dan Schatzberg 已提交
1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728
static int fuse_symlink_readpage(struct file *null, struct page *page)
{
	int err = fuse_readlink_page(page->mapping->host, page);

	if (!err)
		SetPageUptodate(page);

	unlock_page(page);

	return err;
}

static const struct address_space_operations fuse_symlink_aops = {
	.readpage	= fuse_symlink_readpage,
};

1729 1730 1731
void fuse_init_symlink(struct inode *inode)
{
	inode->i_op = &fuse_symlink_inode_operations;
D
Dan Schatzberg 已提交
1732 1733
	inode->i_data.a_ops = &fuse_symlink_aops;
	inode_nohighmem(inode);
1734
}