dir.c 43.5 KB
Newer Older
1 2
/*
  FUSE: Filesystem in Userspace
M
Miklos Szeredi 已提交
3
  Copyright (C) 2001-2008  Miklos Szeredi <miklos@szeredi.hu>
4 5 6 7 8 9 10 11 12 13 14

  This program can be distributed under the terms of the GNU GPL.
  See the file COPYING.
*/

#include "fuse_i.h"

#include <linux/pagemap.h>
#include <linux/file.h>
#include <linux/sched.h>
#include <linux/namei.h>
15
#include <linux/slab.h>
S
Seth Forshee 已提交
16
#include <linux/xattr.h>
17
#include <linux/iversion.h>
S
Seth Forshee 已提交
18
#include <linux/posix_acl.h>
19

20 21 22 23 24 25 26
static void fuse_advise_use_readdirplus(struct inode *dir)
{
	struct fuse_inode *fi = get_fuse_inode(dir);

	set_bit(FUSE_I_ADVISE_RDPLUS, &fi->state);
}

27 28 29 30 31 32 33 34 35 36 37 38
#if BITS_PER_LONG >= 64
static inline void __fuse_dentry_settime(struct dentry *entry, u64 time)
{
	entry->d_fsdata = (void *) time;
}

static inline u64 fuse_dentry_time(const struct dentry *entry)
{
	return (u64)entry->d_fsdata;
}

#else
M
Miklos Szeredi 已提交
39 40 41 42 43
union fuse_dentry {
	u64 time;
	struct rcu_head rcu;
};

44 45 46 47 48 49 50 51 52 53 54
static inline void __fuse_dentry_settime(struct dentry *dentry, u64 time)
{
	((union fuse_dentry *) dentry->d_fsdata)->time = time;
}

static inline u64 fuse_dentry_time(const struct dentry *entry)
{
	return ((union fuse_dentry *) entry->d_fsdata)->time;
}
#endif

55
static void fuse_dentry_settime(struct dentry *dentry, u64 time)
M
Miklos Szeredi 已提交
56
{
57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72
	struct fuse_conn *fc = get_fuse_conn_super(dentry->d_sb);
	bool delete = !time && fc->delete_stale;
	/*
	 * Mess with DCACHE_OP_DELETE because dput() will be faster without it.
	 * Don't care about races, either way it's just an optimization
	 */
	if ((!delete && (dentry->d_flags & DCACHE_OP_DELETE)) ||
	    (delete && !(dentry->d_flags & DCACHE_OP_DELETE))) {
		spin_lock(&dentry->d_lock);
		if (!delete)
			dentry->d_flags &= ~DCACHE_OP_DELETE;
		else
			dentry->d_flags |= DCACHE_OP_DELETE;
		spin_unlock(&dentry->d_lock);
	}

73
	__fuse_dentry_settime(dentry, time);
M
Miklos Szeredi 已提交
74 75
}

76 77 78
/*
 * FUSE caches dentries and attributes with separate timeout.  The
 * time in jiffies until the dentry/attributes are valid is stored in
M
Miklos Szeredi 已提交
79
 * dentry->d_fsdata and fuse_inode->i_time respectively.
80 81 82 83 84
 */

/*
 * Calculate the time in jiffies until a dentry/attributes are valid
 */
M
Miklos Szeredi 已提交
85
static u64 time_to_jiffies(u64 sec, u32 nsec)
86
{
M
Miklos Szeredi 已提交
87
	if (sec || nsec) {
M
Miklos Szeredi 已提交
88 89
		struct timespec64 ts = {
			sec,
90
			min_t(u32, nsec, NSEC_PER_SEC - 1)
M
Miklos Szeredi 已提交
91 92 93
		};

		return get_jiffies_64() + timespec64_to_jiffies(&ts);
M
Miklos Szeredi 已提交
94
	} else
M
Miklos Szeredi 已提交
95
		return 0;
96 97
}

98 99 100 101
/*
 * Set dentry and possibly attribute timeouts from the lookup/mk*
 * replies
 */
M
Miklos Szeredi 已提交
102
void fuse_change_entry_timeout(struct dentry *entry, struct fuse_entry_out *o)
103
{
M
Miklos Szeredi 已提交
104 105
	fuse_dentry_settime(entry,
		time_to_jiffies(o->entry_valid, o->entry_valid_nsec));
106 107 108 109 110 111 112
}

static u64 attr_timeout(struct fuse_attr_out *o)
{
	return time_to_jiffies(o->attr_valid, o->attr_valid_nsec);
}

M
Miklos Szeredi 已提交
113
u64 entry_attr_timeout(struct fuse_entry_out *o)
114 115
{
	return time_to_jiffies(o->attr_valid, o->attr_valid_nsec);
116 117
}

118 119 120 121 122
static void fuse_invalidate_attr_mask(struct inode *inode, u32 mask)
{
	set_mask_bits(&get_fuse_inode(inode)->inval_mask, 0, mask);
}

123 124 125 126
/*
 * Mark the attributes as stale, so that at the next call to
 * ->getattr() they will be fetched from userspace
 */
127 128
void fuse_invalidate_attr(struct inode *inode)
{
129
	fuse_invalidate_attr_mask(inode, STATX_BASIC_STATS);
130 131
}

132 133 134 135 136 137
static void fuse_dir_changed(struct inode *dir)
{
	fuse_invalidate_attr(dir);
	inode_maybe_inc_iversion(dir, false);
}

138 139 140 141 142 143 144
/**
 * Mark the attributes as stale due to an atime change.  Avoid the invalidate if
 * atime is not used.
 */
void fuse_invalidate_atime(struct inode *inode)
{
	if (!IS_RDONLY(inode))
145
		fuse_invalidate_attr_mask(inode, STATX_ATIME);
146 147
}

148 149 150 151 152 153 154 155
/*
 * Just mark the entry as stale, so that a next attempt to look it up
 * will result in a new lookup call to userspace
 *
 * This is called when a dentry is about to become negative and the
 * timeout is unknown (unlink, rmdir, rename and in some cases
 * lookup)
 */
M
Miklos Szeredi 已提交
156
void fuse_invalidate_entry_cache(struct dentry *entry)
157
{
M
Miklos Szeredi 已提交
158
	fuse_dentry_settime(entry, 0);
159 160
}

161 162 163 164
/*
 * Same as fuse_invalidate_entry_cache(), but also try to remove the
 * dentry from the hash
 */
165 166 167 168
static void fuse_invalidate_entry(struct dentry *entry)
{
	d_invalidate(entry);
	fuse_invalidate_entry_cache(entry);
169 170
}

171
static void fuse_lookup_init(struct fuse_conn *fc, struct fuse_args *args,
A
Al Viro 已提交
172
			     u64 nodeid, const struct qstr *name,
173 174
			     struct fuse_entry_out *outarg)
{
175
	memset(outarg, 0, sizeof(struct fuse_entry_out));
176 177 178 179 180 181 182 183
	args->opcode = FUSE_LOOKUP;
	args->nodeid = nodeid;
	args->in_numargs = 1;
	args->in_args[0].size = name->len + 1;
	args->in_args[0].value = name->name;
	args->out_numargs = 1;
	args->out_args[0].size = sizeof(struct fuse_entry_out);
	args->out_args[0].value = outarg;
184 185
}

186 187 188 189 190 191 192 193 194
/*
 * Check whether the dentry is still valid
 *
 * If the entry validity timeout has expired and the dentry is
 * positive, try to redo the lookup.  If the lookup results in a
 * different inode, then let the VFS invalidate the dentry and redo
 * the lookup once more.  If the lookup results in the same inode,
 * then refresh the attributes, timeouts and mark the dentry valid.
 */
195
static int fuse_dentry_revalidate(struct dentry *entry, unsigned int flags)
196
{
197
	struct inode *inode;
198 199
	struct dentry *parent;
	struct fuse_conn *fc;
M
Miklos Szeredi 已提交
200
	struct fuse_inode *fi;
201
	int ret;
202

203
	inode = d_inode_rcu(entry);
204
	if (inode && is_bad_inode(inode))
205
		goto invalid;
206 207
	else if (time_before64(fuse_dentry_time(entry), get_jiffies_64()) ||
		 (flags & LOOKUP_REVAL)) {
208
		struct fuse_entry_out outarg;
209
		FUSE_ARGS(args);
210
		struct fuse_forget_link *forget;
211
		u64 attr_version;
212

213
		/* For negative dentries, always do a fresh lookup */
214
		if (!inode)
215
			goto invalid;
216

217
		ret = -ECHILD;
218
		if (flags & LOOKUP_RCU)
219
			goto out;
220

221
		fc = get_fuse_conn(inode);
222

223
		forget = fuse_alloc_forget();
224 225
		ret = -ENOMEM;
		if (!forget)
226
			goto out;
M
Miklos Szeredi 已提交
227

228
		attr_version = fuse_get_attr_version(fc);
229

230
		parent = dget_parent(entry);
231
		fuse_lookup_init(fc, &args, get_node_id(d_inode(parent)),
232
				 &entry->d_name, &outarg);
233
		ret = fuse_simple_request(fc, &args);
234
		dput(parent);
235
		/* Zero nodeid is same as -ENOENT */
236 237 238
		if (!ret && !outarg.nodeid)
			ret = -ENOENT;
		if (!ret) {
M
Miklos Szeredi 已提交
239
			fi = get_fuse_inode(inode);
240
			if (outarg.nodeid != get_node_id(inode)) {
241
				fuse_queue_forget(fc, forget, outarg.nodeid, 1);
242
				goto invalid;
243
			}
244
			spin_lock(&fi->lock);
M
Miklos Szeredi 已提交
245
			fi->nlookup++;
246
			spin_unlock(&fi->lock);
247
		}
248
		kfree(forget);
249 250 251
		if (ret == -ENOMEM)
			goto out;
		if (ret || (outarg.attr.mode ^ inode->i_mode) & S_IFMT)
252
			goto invalid;
253

S
Seth Forshee 已提交
254
		forget_all_cached_acls(inode);
255 256 257 258
		fuse_change_attributes(inode, &outarg.attr,
				       entry_attr_timeout(&outarg),
				       attr_version);
		fuse_change_entry_timeout(entry, &outarg);
259
	} else if (inode) {
M
Miklos Szeredi 已提交
260 261 262 263 264
		fi = get_fuse_inode(inode);
		if (flags & LOOKUP_RCU) {
			if (test_bit(FUSE_I_INIT_RDPLUS, &fi->state))
				return -ECHILD;
		} else if (test_and_clear_bit(FUSE_I_INIT_RDPLUS, &fi->state)) {
265
			parent = dget_parent(entry);
266
			fuse_advise_use_readdirplus(d_inode(parent));
267 268
			dput(parent);
		}
269
	}
270 271 272 273 274 275 276
	ret = 1;
out:
	return ret;

invalid:
	ret = 0;
	goto out;
277 278
}

279
#if BITS_PER_LONG < 64
M
Miklos Szeredi 已提交
280 281
static int fuse_dentry_init(struct dentry *dentry)
{
282 283
	dentry->d_fsdata = kzalloc(sizeof(union fuse_dentry),
				   GFP_KERNEL_ACCOUNT | __GFP_RECLAIMABLE);
M
Miklos Szeredi 已提交
284 285 286 287 288 289 290 291 292

	return dentry->d_fsdata ? 0 : -ENOMEM;
}
static void fuse_dentry_release(struct dentry *dentry)
{
	union fuse_dentry *fd = dentry->d_fsdata;

	kfree_rcu(fd, rcu);
}
293
#endif
M
Miklos Szeredi 已提交
294

295 296 297 298 299
static int fuse_dentry_delete(const struct dentry *dentry)
{
	return time_before64(fuse_dentry_time(dentry), get_jiffies_64());
}

A
Al Viro 已提交
300
const struct dentry_operations fuse_dentry_operations = {
301
	.d_revalidate	= fuse_dentry_revalidate,
302
	.d_delete	= fuse_dentry_delete,
303
#if BITS_PER_LONG < 64
M
Miklos Szeredi 已提交
304 305
	.d_init		= fuse_dentry_init,
	.d_release	= fuse_dentry_release,
306
#endif
307 308
};

309
const struct dentry_operations fuse_root_dentry_operations = {
310
#if BITS_PER_LONG < 64
311 312
	.d_init		= fuse_dentry_init,
	.d_release	= fuse_dentry_release,
313
#endif
314 315
};

316
int fuse_valid_type(int m)
317 318 319 320 321
{
	return S_ISREG(m) || S_ISDIR(m) || S_ISLNK(m) || S_ISCHR(m) ||
		S_ISBLK(m) || S_ISFIFO(m) || S_ISSOCK(m);
}

A
Al Viro 已提交
322
int fuse_lookup_name(struct super_block *sb, u64 nodeid, const struct qstr *name,
323
		     struct fuse_entry_out *outarg, struct inode **inode)
324
{
325
	struct fuse_conn *fc = get_fuse_conn_super(sb);
326
	FUSE_ARGS(args);
327
	struct fuse_forget_link *forget;
328
	u64 attr_version;
329
	int err;
330

331 332 333 334
	*inode = NULL;
	err = -ENAMETOOLONG;
	if (name->len > FUSE_NAME_MAX)
		goto out;
335 336


337 338
	forget = fuse_alloc_forget();
	err = -ENOMEM;
339
	if (!forget)
340
		goto out;
M
Miklos Szeredi 已提交
341

342
	attr_version = fuse_get_attr_version(fc);
343

344 345
	fuse_lookup_init(fc, &args, nodeid, name, outarg);
	err = fuse_simple_request(fc, &args);
346
	/* Zero nodeid is same as -ENOENT, but with valid timeout */
347 348 349 350 351 352 353 354 355 356 357 358 359 360
	if (err || !outarg->nodeid)
		goto out_put_forget;

	err = -EIO;
	if (!outarg->nodeid)
		goto out_put_forget;
	if (!fuse_valid_type(outarg->attr.mode))
		goto out_put_forget;

	*inode = fuse_iget(sb, outarg->nodeid, outarg->generation,
			   &outarg->attr, entry_attr_timeout(outarg),
			   attr_version);
	err = -ENOMEM;
	if (!*inode) {
361
		fuse_queue_forget(fc, forget, outarg->nodeid, 1);
362
		goto out;
363
	}
364 365 366
	err = 0;

 out_put_forget:
367
	kfree(forget);
368 369 370 371 372
 out:
	return err;
}

static struct dentry *fuse_lookup(struct inode *dir, struct dentry *entry,
A
Al Viro 已提交
373
				  unsigned int flags)
374 375 376 377 378 379
{
	int err;
	struct fuse_entry_out outarg;
	struct inode *inode;
	struct dentry *newent;
	bool outarg_valid = true;
380
	bool locked;
381

382
	locked = fuse_lock_inode(dir);
383 384
	err = fuse_lookup_name(dir->i_sb, get_node_id(dir), &entry->d_name,
			       &outarg, &inode);
385
	fuse_unlock_inode(dir, locked);
386 387 388 389 390 391 392 393 394 395
	if (err == -ENOENT) {
		outarg_valid = false;
		err = 0;
	}
	if (err)
		goto out_err;

	err = -EIO;
	if (inode && get_node_id(inode) == FUSE_ROOT_ID)
		goto out_iput;
396

397
	newent = d_splice_alias(inode, entry);
398 399 400
	err = PTR_ERR(newent);
	if (IS_ERR(newent))
		goto out_err;
401

402
	entry = newent ? newent : entry;
403
	if (outarg_valid)
404
		fuse_change_entry_timeout(entry, &outarg);
405 406
	else
		fuse_invalidate_entry_cache(entry);
407

408 409
	if (inode)
		fuse_advise_use_readdirplus(dir);
410
	return newent;
411 412 413 414 415

 out_iput:
	iput(inode);
 out_err:
	return ERR_PTR(err);
416 417
}

418 419 420 421 422 423
/*
 * Atomic create+open operation
 *
 * If the filesystem doesn't support this, then fall back to separate
 * 'mknod' + 'open' requests.
 */
A
Al Viro 已提交
424
static int fuse_create_open(struct inode *dir, struct dentry *entry,
A
Al Viro 已提交
425
			    struct file *file, unsigned flags,
426
			    umode_t mode)
M
Miklos Szeredi 已提交
427 428 429 430
{
	int err;
	struct inode *inode;
	struct fuse_conn *fc = get_fuse_conn(dir);
431
	FUSE_ARGS(args);
432
	struct fuse_forget_link *forget;
433
	struct fuse_create_in inarg;
M
Miklos Szeredi 已提交
434 435
	struct fuse_open_out outopen;
	struct fuse_entry_out outentry;
436
	struct fuse_inode *fi;
M
Miklos Szeredi 已提交
437 438
	struct fuse_file *ff;

439 440 441
	/* Userspace expects S_IFREG in create mode */
	BUG_ON((mode & S_IFMT) != S_IFREG);

442
	forget = fuse_alloc_forget();
443
	err = -ENOMEM;
444
	if (!forget)
445
		goto out_err;
446

447
	err = -ENOMEM;
T
Tejun Heo 已提交
448
	ff = fuse_file_alloc(fc);
M
Miklos Szeredi 已提交
449
	if (!ff)
450
		goto out_put_forget_req;
M
Miklos Szeredi 已提交
451

452 453 454
	if (!fc->dont_mask)
		mode &= ~current_umask();

M
Miklos Szeredi 已提交
455 456
	flags &= ~O_NOCTTY;
	memset(&inarg, 0, sizeof(inarg));
457
	memset(&outentry, 0, sizeof(outentry));
M
Miklos Szeredi 已提交
458 459
	inarg.flags = flags;
	inarg.mode = mode;
460
	inarg.umask = current_umask();
461 462 463 464 465 466 467 468 469 470 471 472
	args.opcode = FUSE_CREATE;
	args.nodeid = get_node_id(dir);
	args.in_numargs = 2;
	args.in_args[0].size = sizeof(inarg);
	args.in_args[0].value = &inarg;
	args.in_args[1].size = entry->d_name.len + 1;
	args.in_args[1].value = entry->d_name.name;
	args.out_numargs = 2;
	args.out_args[0].size = sizeof(outentry);
	args.out_args[0].value = &outentry;
	args.out_args[1].size = sizeof(outopen);
	args.out_args[1].value = &outopen;
473
	err = fuse_simple_request(fc, &args);
474
	if (err)
M
Miklos Szeredi 已提交
475 476 477
		goto out_free_ff;

	err = -EIO;
478
	if (!S_ISREG(outentry.attr.mode) || invalid_nodeid(outentry.nodeid))
M
Miklos Szeredi 已提交
479 480
		goto out_free_ff;

481 482 483
	ff->fh = outopen.fh;
	ff->nodeid = outentry.nodeid;
	ff->open_flags = outopen.open_flags;
M
Miklos Szeredi 已提交
484
	inode = fuse_iget(dir->i_sb, outentry.nodeid, outentry.generation,
485
			  &outentry.attr, entry_attr_timeout(&outentry), 0);
M
Miklos Szeredi 已提交
486 487
	if (!inode) {
		flags &= ~(O_CREAT | O_EXCL | O_TRUNC);
488
		fuse_sync_release(NULL, ff, flags);
489
		fuse_queue_forget(fc, forget, outentry.nodeid, 1);
490 491
		err = -ENOMEM;
		goto out_err;
M
Miklos Szeredi 已提交
492
	}
493
	kfree(forget);
M
Miklos Szeredi 已提交
494
	d_instantiate(entry, inode);
495
	fuse_change_entry_timeout(entry, &outentry);
496
	fuse_dir_changed(dir);
497
	err = finish_open(file, entry, generic_file_open);
A
Al Viro 已提交
498
	if (err) {
499 500
		fi = get_fuse_inode(inode);
		fuse_sync_release(fi, ff, flags);
501
	} else {
502
		file->private_data = ff;
503
		fuse_finish_open(inode, file);
M
Miklos Szeredi 已提交
504
	}
A
Al Viro 已提交
505
	return err;
M
Miklos Szeredi 已提交
506

507
out_free_ff:
M
Miklos Szeredi 已提交
508
	fuse_file_free(ff);
509
out_put_forget_req:
510
	kfree(forget);
511
out_err:
A
Al Viro 已提交
512
	return err;
513 514 515
}

static int fuse_mknod(struct inode *, struct dentry *, umode_t, dev_t);
A
Al Viro 已提交
516
static int fuse_atomic_open(struct inode *dir, struct dentry *entry,
A
Al Viro 已提交
517
			    struct file *file, unsigned flags,
518
			    umode_t mode)
519 520 521 522 523
{
	int err;
	struct fuse_conn *fc = get_fuse_conn(dir);
	struct dentry *res = NULL;

524
	if (d_in_lookup(entry)) {
A
Al Viro 已提交
525
		res = fuse_lookup(dir, entry, 0);
526
		if (IS_ERR(res))
A
Al Viro 已提交
527
			return PTR_ERR(res);
528 529 530 531 532

		if (res)
			entry = res;
	}

533
	if (!(flags & O_CREAT) || d_really_is_positive(entry))
534 535 536
		goto no_open;

	/* Only creates */
537
	file->f_mode |= FMODE_CREATED;
538 539 540 541

	if (fc->no_create)
		goto mknod;

542
	err = fuse_create_open(dir, entry, file, flags, mode);
A
Al Viro 已提交
543
	if (err == -ENOSYS) {
544 545 546 547 548
		fc->no_create = 1;
		goto mknod;
	}
out_dput:
	dput(res);
A
Al Viro 已提交
549
	return err;
550 551 552

mknod:
	err = fuse_mknod(dir, entry, mode, 0);
A
Al Viro 已提交
553
	if (err)
554 555
		goto out_dput;
no_open:
A
Al Viro 已提交
556
	return finish_no_open(file, res);
M
Miklos Szeredi 已提交
557 558
}

559 560 561
/*
 * Code shared between mknod, mkdir, symlink and link
 */
562
static int create_new_entry(struct fuse_conn *fc, struct fuse_args *args,
563
			    struct inode *dir, struct dentry *entry,
A
Al Viro 已提交
564
			    umode_t mode)
565 566 567
{
	struct fuse_entry_out outarg;
	struct inode *inode;
A
Al Viro 已提交
568
	struct dentry *d;
569
	int err;
570
	struct fuse_forget_link *forget;
M
Miklos Szeredi 已提交
571

572
	forget = fuse_alloc_forget();
573
	if (!forget)
574
		return -ENOMEM;
575

576
	memset(&outarg, 0, sizeof(outarg));
577 578 579 580
	args->nodeid = get_node_id(dir);
	args->out_numargs = 1;
	args->out_args[0].size = sizeof(outarg);
	args->out_args[0].value = &outarg;
581
	err = fuse_simple_request(fc, args);
M
Miklos Szeredi 已提交
582 583 584
	if (err)
		goto out_put_forget_req;

585 586
	err = -EIO;
	if (invalid_nodeid(outarg.nodeid))
M
Miklos Szeredi 已提交
587
		goto out_put_forget_req;
588 589

	if ((outarg.attr.mode ^ mode) & S_IFMT)
M
Miklos Szeredi 已提交
590
		goto out_put_forget_req;
591

592
	inode = fuse_iget(dir->i_sb, outarg.nodeid, outarg.generation,
593
			  &outarg.attr, entry_attr_timeout(&outarg), 0);
594
	if (!inode) {
595
		fuse_queue_forget(fc, forget, outarg.nodeid, 1);
596 597
		return -ENOMEM;
	}
598
	kfree(forget);
599

A
Al Viro 已提交
600 601 602 603
	d_drop(entry);
	d = d_splice_alias(inode, entry);
	if (IS_ERR(d))
		return PTR_ERR(d);
604

A
Al Viro 已提交
605 606 607 608 609 610
	if (d) {
		fuse_change_entry_timeout(d, &outarg);
		dput(d);
	} else {
		fuse_change_entry_timeout(entry, &outarg);
	}
611
	fuse_dir_changed(dir);
612
	return 0;
613

M
Miklos Szeredi 已提交
614
 out_put_forget_req:
615
	kfree(forget);
616
	return err;
617 618
}

A
Al Viro 已提交
619
static int fuse_mknod(struct inode *dir, struct dentry *entry, umode_t mode,
620 621 622 623
		      dev_t rdev)
{
	struct fuse_mknod_in inarg;
	struct fuse_conn *fc = get_fuse_conn(dir);
624
	FUSE_ARGS(args);
625

626 627 628
	if (!fc->dont_mask)
		mode &= ~current_umask();

629 630 631
	memset(&inarg, 0, sizeof(inarg));
	inarg.mode = mode;
	inarg.rdev = new_encode_dev(rdev);
632
	inarg.umask = current_umask();
633 634 635 636 637 638
	args.opcode = FUSE_MKNOD;
	args.in_numargs = 2;
	args.in_args[0].size = sizeof(inarg);
	args.in_args[0].value = &inarg;
	args.in_args[1].size = entry->d_name.len + 1;
	args.in_args[1].value = entry->d_name.name;
639
	return create_new_entry(fc, &args, dir, entry, mode);
640 641
}

A
Al Viro 已提交
642
static int fuse_create(struct inode *dir, struct dentry *entry, umode_t mode,
A
Al Viro 已提交
643
		       bool excl)
644 645 646 647
{
	return fuse_mknod(dir, entry, mode, 0);
}

648
static int fuse_mkdir(struct inode *dir, struct dentry *entry, umode_t mode)
649 650 651
{
	struct fuse_mkdir_in inarg;
	struct fuse_conn *fc = get_fuse_conn(dir);
652
	FUSE_ARGS(args);
653

654 655 656
	if (!fc->dont_mask)
		mode &= ~current_umask();

657 658
	memset(&inarg, 0, sizeof(inarg));
	inarg.mode = mode;
659
	inarg.umask = current_umask();
660 661 662 663 664 665
	args.opcode = FUSE_MKDIR;
	args.in_numargs = 2;
	args.in_args[0].size = sizeof(inarg);
	args.in_args[0].value = &inarg;
	args.in_args[1].size = entry->d_name.len + 1;
	args.in_args[1].value = entry->d_name.name;
666
	return create_new_entry(fc, &args, dir, entry, S_IFDIR);
667 668 669 670 671 672 673
}

static int fuse_symlink(struct inode *dir, struct dentry *entry,
			const char *link)
{
	struct fuse_conn *fc = get_fuse_conn(dir);
	unsigned len = strlen(link) + 1;
674
	FUSE_ARGS(args);
675

676 677 678 679 680 681
	args.opcode = FUSE_SYMLINK;
	args.in_numargs = 2;
	args.in_args[0].size = entry->d_name.len + 1;
	args.in_args[0].value = entry->d_name.name;
	args.in_args[1].size = len;
	args.in_args[1].value = link;
682
	return create_new_entry(fc, &args, dir, entry, S_IFLNK);
683 684
}

S
Seth Forshee 已提交
685
void fuse_update_ctime(struct inode *inode)
M
Maxim Patlasov 已提交
686 687
{
	if (!IS_NOCMTIME(inode)) {
688
		inode->i_ctime = current_time(inode);
M
Maxim Patlasov 已提交
689 690 691 692
		mark_inode_dirty_sync(inode);
	}
}

693 694 695 696
static int fuse_unlink(struct inode *dir, struct dentry *entry)
{
	int err;
	struct fuse_conn *fc = get_fuse_conn(dir);
697 698
	FUSE_ARGS(args);

699 700 701 702 703
	args.opcode = FUSE_UNLINK;
	args.nodeid = get_node_id(dir);
	args.in_numargs = 1;
	args.in_args[0].size = entry->d_name.len + 1;
	args.in_args[0].value = entry->d_name.name;
704
	err = fuse_simple_request(fc, &args);
705
	if (!err) {
706
		struct inode *inode = d_inode(entry);
M
Miklos Szeredi 已提交
707
		struct fuse_inode *fi = get_fuse_inode(inode);
708

709
		spin_lock(&fi->lock);
710
		fi->attr_version = atomic64_inc_return(&fc->attr_version);
711 712 713 714 715 716 717 718
		/*
		 * If i_nlink == 0 then unlink doesn't make sense, yet this can
		 * happen if userspace filesystem is careless.  It would be
		 * difficult to enforce correct nlink usage so just ignore this
		 * condition here
		 */
		if (inode->i_nlink > 0)
			drop_nlink(inode);
719
		spin_unlock(&fi->lock);
720
		fuse_invalidate_attr(inode);
721
		fuse_dir_changed(dir);
722
		fuse_invalidate_entry_cache(entry);
M
Maxim Patlasov 已提交
723
		fuse_update_ctime(inode);
724 725 726 727 728 729 730 731 732
	} else if (err == -EINTR)
		fuse_invalidate_entry(entry);
	return err;
}

static int fuse_rmdir(struct inode *dir, struct dentry *entry)
{
	int err;
	struct fuse_conn *fc = get_fuse_conn(dir);
733 734
	FUSE_ARGS(args);

735 736 737 738 739
	args.opcode = FUSE_RMDIR;
	args.nodeid = get_node_id(dir);
	args.in_numargs = 1;
	args.in_args[0].size = entry->d_name.len + 1;
	args.in_args[0].value = entry->d_name.name;
740
	err = fuse_simple_request(fc, &args);
741
	if (!err) {
742
		clear_nlink(d_inode(entry));
743
		fuse_dir_changed(dir);
744
		fuse_invalidate_entry_cache(entry);
745 746 747 748 749
	} else if (err == -EINTR)
		fuse_invalidate_entry(entry);
	return err;
}

M
Miklos Szeredi 已提交
750 751 752
static int fuse_rename_common(struct inode *olddir, struct dentry *oldent,
			      struct inode *newdir, struct dentry *newent,
			      unsigned int flags, int opcode, size_t argsize)
753 754
{
	int err;
M
Miklos Szeredi 已提交
755
	struct fuse_rename2_in inarg;
756
	struct fuse_conn *fc = get_fuse_conn(olddir);
757
	FUSE_ARGS(args);
758

M
Miklos Szeredi 已提交
759
	memset(&inarg, 0, argsize);
760
	inarg.newdir = get_node_id(newdir);
M
Miklos Szeredi 已提交
761
	inarg.flags = flags;
762 763 764 765 766 767 768 769 770
	args.opcode = opcode;
	args.nodeid = get_node_id(olddir);
	args.in_numargs = 3;
	args.in_args[0].size = argsize;
	args.in_args[0].value = &inarg;
	args.in_args[1].size = oldent->d_name.len + 1;
	args.in_args[1].value = oldent->d_name.name;
	args.in_args[2].size = newent->d_name.len + 1;
	args.in_args[2].value = newent->d_name.name;
771
	err = fuse_simple_request(fc, &args);
772
	if (!err) {
773
		/* ctime changes */
774 775
		fuse_invalidate_attr(d_inode(oldent));
		fuse_update_ctime(d_inode(oldent));
776

M
Miklos Szeredi 已提交
777
		if (flags & RENAME_EXCHANGE) {
778 779
			fuse_invalidate_attr(d_inode(newent));
			fuse_update_ctime(d_inode(newent));
M
Miklos Szeredi 已提交
780 781
		}

782
		fuse_dir_changed(olddir);
783
		if (olddir != newdir)
784
			fuse_dir_changed(newdir);
785 786

		/* newent will end up negative */
787 788
		if (!(flags & RENAME_EXCHANGE) && d_really_is_positive(newent)) {
			fuse_invalidate_attr(d_inode(newent));
789
			fuse_invalidate_entry_cache(newent);
790
			fuse_update_ctime(d_inode(newent));
M
Miklos Szeredi 已提交
791
		}
792 793 794 795 796 797 798
	} else if (err == -EINTR) {
		/* If request was interrupted, DEITY only knows if the
		   rename actually took place.  If the invalidation
		   fails (e.g. some process has CWD under the renamed
		   directory), then there can be inconsistency between
		   the dcache and the real filesystem.  Tough luck. */
		fuse_invalidate_entry(oldent);
799
		if (d_really_is_positive(newent))
800 801 802 803 804 805
			fuse_invalidate_entry(newent);
	}

	return err;
}

M
Miklos Szeredi 已提交
806 807 808 809 810 811 812 813 814 815
static int fuse_rename2(struct inode *olddir, struct dentry *oldent,
			struct inode *newdir, struct dentry *newent,
			unsigned int flags)
{
	struct fuse_conn *fc = get_fuse_conn(olddir);
	int err;

	if (flags & ~(RENAME_NOREPLACE | RENAME_EXCHANGE))
		return -EINVAL;

M
Miklos Szeredi 已提交
816 817 818
	if (flags) {
		if (fc->no_rename2 || fc->minor < 23)
			return -EINVAL;
M
Miklos Szeredi 已提交
819

M
Miklos Szeredi 已提交
820 821 822 823 824 825 826 827 828 829 830
		err = fuse_rename_common(olddir, oldent, newdir, newent, flags,
					 FUSE_RENAME2,
					 sizeof(struct fuse_rename2_in));
		if (err == -ENOSYS) {
			fc->no_rename2 = 1;
			err = -EINVAL;
		}
	} else {
		err = fuse_rename_common(olddir, oldent, newdir, newent, 0,
					 FUSE_RENAME,
					 sizeof(struct fuse_rename_in));
M
Miklos Szeredi 已提交
831
	}
M
Miklos Szeredi 已提交
832

M
Miklos Szeredi 已提交
833
	return err;
M
Miklos Szeredi 已提交
834
}
M
Miklos Szeredi 已提交
835

836 837 838 839 840
static int fuse_link(struct dentry *entry, struct inode *newdir,
		     struct dentry *newent)
{
	int err;
	struct fuse_link_in inarg;
841
	struct inode *inode = d_inode(entry);
842
	struct fuse_conn *fc = get_fuse_conn(inode);
843
	FUSE_ARGS(args);
844 845 846

	memset(&inarg, 0, sizeof(inarg));
	inarg.oldnodeid = get_node_id(inode);
847 848 849 850 851 852
	args.opcode = FUSE_LINK;
	args.in_numargs = 2;
	args.in_args[0].size = sizeof(inarg);
	args.in_args[0].value = &inarg;
	args.in_args[1].size = newent->d_name.len + 1;
	args.in_args[1].value = newent->d_name.name;
853
	err = create_new_entry(fc, &args, newdir, newent, inode->i_mode);
854 855 856 857 858 859
	/* Contrary to "normal" filesystems it can happen that link
	   makes two "logical" inodes point to the same "physical"
	   inode.  We invalidate the attributes of the old one, so it
	   will reflect changes in the backing inode (link count,
	   etc.)
	*/
M
Miklos Szeredi 已提交
860 861 862
	if (!err) {
		struct fuse_inode *fi = get_fuse_inode(inode);

863
		spin_lock(&fi->lock);
864
		fi->attr_version = atomic64_inc_return(&fc->attr_version);
M
Miklos Szeredi 已提交
865
		inc_nlink(inode);
866
		spin_unlock(&fi->lock);
867
		fuse_invalidate_attr(inode);
M
Maxim Patlasov 已提交
868
		fuse_update_ctime(inode);
M
Miklos Szeredi 已提交
869 870 871
	} else if (err == -EINTR) {
		fuse_invalidate_attr(inode);
	}
872 873 874
	return err;
}

875 876 877
static void fuse_fillattr(struct inode *inode, struct fuse_attr *attr,
			  struct kstat *stat)
{
M
Miklos Szeredi 已提交
878
	unsigned int blkbits;
P
Pavel Emelyanov 已提交
879 880 881
	struct fuse_conn *fc = get_fuse_conn(inode);

	/* see the comment in fuse_change_attributes() */
M
Maxim Patlasov 已提交
882
	if (fc->writeback_cache && S_ISREG(inode->i_mode)) {
P
Pavel Emelyanov 已提交
883
		attr->size = i_size_read(inode);
M
Maxim Patlasov 已提交
884 885
		attr->mtime = inode->i_mtime.tv_sec;
		attr->mtimensec = inode->i_mtime.tv_nsec;
M
Maxim Patlasov 已提交
886 887
		attr->ctime = inode->i_ctime.tv_sec;
		attr->ctimensec = inode->i_ctime.tv_nsec;
M
Maxim Patlasov 已提交
888
	}
M
Miklos Szeredi 已提交
889

890 891 892 893
	stat->dev = inode->i_sb->s_dev;
	stat->ino = attr->ino;
	stat->mode = (inode->i_mode & S_IFMT) | (attr->mode & 07777);
	stat->nlink = attr->nlink;
894 895
	stat->uid = make_kuid(fc->user_ns, attr->uid);
	stat->gid = make_kgid(fc->user_ns, attr->gid);
896 897 898 899 900 901 902 903 904
	stat->rdev = inode->i_rdev;
	stat->atime.tv_sec = attr->atime;
	stat->atime.tv_nsec = attr->atimensec;
	stat->mtime.tv_sec = attr->mtime;
	stat->mtime.tv_nsec = attr->mtimensec;
	stat->ctime.tv_sec = attr->ctime;
	stat->ctime.tv_nsec = attr->ctimensec;
	stat->size = attr->size;
	stat->blocks = attr->blocks;
M
Miklos Szeredi 已提交
905 906 907 908 909 910 911

	if (attr->blksize != 0)
		blkbits = ilog2(attr->blksize);
	else
		blkbits = inode->i_sb->s_blocksize_bits;

	stat->blksize = 1 << blkbits;
912 913
}

914 915
static int fuse_do_getattr(struct inode *inode, struct kstat *stat,
			   struct file *file)
916 917
{
	int err;
918 919
	struct fuse_getattr_in inarg;
	struct fuse_attr_out outarg;
920
	struct fuse_conn *fc = get_fuse_conn(inode);
921
	FUSE_ARGS(args);
922 923
	u64 attr_version;

924
	attr_version = fuse_get_attr_version(fc);
925

926
	memset(&inarg, 0, sizeof(inarg));
927
	memset(&outarg, 0, sizeof(outarg));
928 929 930 931 932 933 934
	/* Directories have separate file-handle space */
	if (file && S_ISREG(inode->i_mode)) {
		struct fuse_file *ff = file->private_data;

		inarg.getattr_flags |= FUSE_GETATTR_FH;
		inarg.fh = ff->fh;
	}
935 936 937 938 939 940 941 942
	args.opcode = FUSE_GETATTR;
	args.nodeid = get_node_id(inode);
	args.in_numargs = 1;
	args.in_args[0].size = sizeof(inarg);
	args.in_args[0].value = &inarg;
	args.out_numargs = 1;
	args.out_args[0].size = sizeof(outarg);
	args.out_args[0].value = &outarg;
943
	err = fuse_simple_request(fc, &args);
944
	if (!err) {
945
		if ((inode->i_mode ^ outarg.attr.mode) & S_IFMT) {
946 947 948
			make_bad_inode(inode);
			err = -EIO;
		} else {
949 950
			fuse_change_attributes(inode, &outarg.attr,
					       attr_timeout(&outarg),
951 952
					       attr_version);
			if (stat)
953
				fuse_fillattr(inode, &outarg.attr, stat);
954 955 956 957 958
		}
	}
	return err;
}

M
Miklos Szeredi 已提交
959
static int fuse_update_get_attr(struct inode *inode, struct file *file,
960 961
				struct kstat *stat, u32 request_mask,
				unsigned int flags)
M
Miklos Szeredi 已提交
962 963
{
	struct fuse_inode *fi = get_fuse_inode(inode);
M
Miklos Szeredi 已提交
964
	int err = 0;
M
Miklos Szeredi 已提交
965
	bool sync;
M
Miklos Szeredi 已提交
966

M
Miklos Szeredi 已提交
967 968 969 970
	if (flags & AT_STATX_FORCE_SYNC)
		sync = true;
	else if (flags & AT_STATX_DONT_SYNC)
		sync = false;
971 972
	else if (request_mask & READ_ONCE(fi->inval_mask))
		sync = true;
M
Miklos Szeredi 已提交
973 974 975 976
	else
		sync = time_before64(fi->i_time, get_jiffies_64());

	if (sync) {
S
Seth Forshee 已提交
977
		forget_all_cached_acls(inode);
M
Miklos Szeredi 已提交
978
		err = fuse_do_getattr(inode, stat, file);
M
Miklos Szeredi 已提交
979 980 981 982
	} else if (stat) {
		generic_fillattr(inode, stat);
		stat->mode = fi->orig_i_mode;
		stat->ino = fi->orig_ino;
M
Miklos Szeredi 已提交
983 984 985 986 987
	}

	return err;
}

M
Miklos Szeredi 已提交
988 989
int fuse_update_attributes(struct inode *inode, struct file *file)
{
990 991 992
	/* Do *not* need to get atime for internal purposes */
	return fuse_update_get_attr(inode, file, NULL,
				    STATX_BASIC_STATS & ~STATX_ATIME, 0);
M
Miklos Szeredi 已提交
993 994
}

J
John Muir 已提交
995
int fuse_reverse_inval_entry(struct super_block *sb, u64 parent_nodeid,
996
			     u64 child_nodeid, struct qstr *name)
J
John Muir 已提交
997 998 999 1000 1001 1002 1003 1004 1005 1006
{
	int err = -ENOTDIR;
	struct inode *parent;
	struct dentry *dir;
	struct dentry *entry;

	parent = ilookup5(sb, parent_nodeid, fuse_inode_eq, &parent_nodeid);
	if (!parent)
		return -ENOENT;

A
Al Viro 已提交
1007
	inode_lock(parent);
J
John Muir 已提交
1008 1009 1010 1011 1012 1013 1014 1015
	if (!S_ISDIR(parent->i_mode))
		goto unlock;

	err = -ENOENT;
	dir = d_find_alias(parent);
	if (!dir)
		goto unlock;

1016
	name->hash = full_name_hash(dir, name->name, name->len);
J
John Muir 已提交
1017 1018 1019 1020 1021
	entry = d_lookup(dir, name);
	dput(dir);
	if (!entry)
		goto unlock;

1022
	fuse_dir_changed(parent);
J
John Muir 已提交
1023
	fuse_invalidate_entry(entry);
1024

1025
	if (child_nodeid != 0 && d_really_is_positive(entry)) {
A
Al Viro 已提交
1026
		inode_lock(d_inode(entry));
1027
		if (get_node_id(d_inode(entry)) != child_nodeid) {
1028 1029 1030 1031 1032 1033 1034
			err = -ENOENT;
			goto badentry;
		}
		if (d_mountpoint(entry)) {
			err = -EBUSY;
			goto badentry;
		}
1035
		if (d_is_dir(entry)) {
1036 1037 1038 1039 1040
			shrink_dcache_parent(entry);
			if (!simple_empty(entry)) {
				err = -ENOTEMPTY;
				goto badentry;
			}
1041
			d_inode(entry)->i_flags |= S_DEAD;
1042 1043
		}
		dont_mount(entry);
1044
		clear_nlink(d_inode(entry));
1045 1046
		err = 0;
 badentry:
A
Al Viro 已提交
1047
		inode_unlock(d_inode(entry));
1048 1049 1050 1051 1052
		if (!err)
			d_delete(entry);
	} else {
		err = 0;
	}
J
John Muir 已提交
1053 1054 1055
	dput(entry);

 unlock:
A
Al Viro 已提交
1056
	inode_unlock(parent);
J
John Muir 已提交
1057 1058 1059 1060
	iput(parent);
	return err;
}

1061 1062
/*
 * Calling into a user-controlled filesystem gives the filesystem
1063
 * daemon ptrace-like capabilities over the current process.  This
1064 1065 1066 1067 1068 1069 1070 1071 1072 1073
 * means, that the filesystem daemon is able to record the exact
 * filesystem operations performed, and can also control the behavior
 * of the requester process in otherwise impossible ways.  For example
 * it can delay the operation for arbitrary length of time allowing
 * DoS against the requester.
 *
 * For this reason only those processes can call into the filesystem,
 * for which the owner of the mount has ptrace privilege.  This
 * excludes processes started by other users, suid or sgid processes.
 */
1074
int fuse_allow_current_process(struct fuse_conn *fc)
1075
{
1076
	const struct cred *cred;
1077

M
Miklos Szeredi 已提交
1078
	if (fc->allow_other)
1079
		return current_in_userns(fc->user_ns);
1080

1081
	cred = current_cred();
1082 1083 1084 1085 1086 1087
	if (uid_eq(cred->euid, fc->user_id) &&
	    uid_eq(cred->suid, fc->user_id) &&
	    uid_eq(cred->uid,  fc->user_id) &&
	    gid_eq(cred->egid, fc->group_id) &&
	    gid_eq(cred->sgid, fc->group_id) &&
	    gid_eq(cred->gid,  fc->group_id))
1088
		return 1;
1089

1090
	return 0;
1091 1092
}

M
Miklos Szeredi 已提交
1093 1094 1095
static int fuse_access(struct inode *inode, int mask)
{
	struct fuse_conn *fc = get_fuse_conn(inode);
1096
	FUSE_ARGS(args);
M
Miklos Szeredi 已提交
1097 1098 1099
	struct fuse_access_in inarg;
	int err;

1100 1101
	BUG_ON(mask & MAY_NOT_BLOCK);

M
Miklos Szeredi 已提交
1102 1103 1104 1105
	if (fc->no_access)
		return 0;

	memset(&inarg, 0, sizeof(inarg));
1106
	inarg.mask = mask & (MAY_READ | MAY_WRITE | MAY_EXEC);
1107 1108 1109 1110 1111
	args.opcode = FUSE_ACCESS;
	args.nodeid = get_node_id(inode);
	args.in_numargs = 1;
	args.in_args[0].size = sizeof(inarg);
	args.in_args[0].value = &inarg;
1112
	err = fuse_simple_request(fc, &args);
M
Miklos Szeredi 已提交
1113 1114 1115 1116 1117 1118 1119
	if (err == -ENOSYS) {
		fc->no_access = 1;
		err = 0;
	}
	return err;
}

1120
static int fuse_perm_getattr(struct inode *inode, int mask)
1121
{
1122
	if (mask & MAY_NOT_BLOCK)
1123 1124
		return -ECHILD;

S
Seth Forshee 已提交
1125
	forget_all_cached_acls(inode);
1126 1127 1128
	return fuse_do_getattr(inode, NULL, NULL);
}

1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141
/*
 * Check permission.  The two basic access models of FUSE are:
 *
 * 1) Local access checking ('default_permissions' mount option) based
 * on file mode.  This is the plain old disk filesystem permission
 * modell.
 *
 * 2) "Remote" access checking, where server is responsible for
 * checking permission in each inode operation.  An exception to this
 * is if ->permission() was invoked from sys_access() in which case an
 * access request is sent.  Execute permission is still checked
 * locally based on file mode.
 */
1142
static int fuse_permission(struct inode *inode, int mask)
1143 1144
{
	struct fuse_conn *fc = get_fuse_conn(inode);
1145 1146
	bool refreshed = false;
	int err = 0;
1147

1148
	if (!fuse_allow_current_process(fc))
1149
		return -EACCES;
1150 1151

	/*
1152
	 * If attributes are needed, refresh them before proceeding
1153
	 */
M
Miklos Szeredi 已提交
1154
	if (fc->default_permissions ||
1155
	    ((mask & MAY_EXEC) && S_ISREG(inode->i_mode))) {
1156
		struct fuse_inode *fi = get_fuse_inode(inode);
1157
		u32 perm_mask = STATX_MODE | STATX_UID | STATX_GID;
1158

1159 1160
		if (perm_mask & READ_ONCE(fi->inval_mask) ||
		    time_before64(fi->i_time, get_jiffies_64())) {
1161 1162
			refreshed = true;

1163
			err = fuse_perm_getattr(inode, mask);
1164 1165 1166
			if (err)
				return err;
		}
1167 1168
	}

M
Miklos Szeredi 已提交
1169
	if (fc->default_permissions) {
1170
		err = generic_permission(inode, mask);
M
Miklos Szeredi 已提交
1171 1172 1173 1174

		/* If permission is denied, try to refresh file
		   attributes.  This is also needed, because the root
		   node will at first have no permissions */
1175
		if (err == -EACCES && !refreshed) {
1176
			err = fuse_perm_getattr(inode, mask);
M
Miklos Szeredi 已提交
1177
			if (!err)
1178
				err = generic_permission(inode, mask);
M
Miklos Szeredi 已提交
1179 1180
		}

1181 1182 1183 1184
		/* Note: the opposite of the above test does not
		   exist.  So if permissions are revoked this won't be
		   noticed immediately, only after the attribute
		   timeout has expired */
E
Eric Paris 已提交
1185
	} else if (mask & (MAY_ACCESS | MAY_CHDIR)) {
1186 1187 1188 1189 1190 1191
		err = fuse_access(inode, mask);
	} else if ((mask & MAY_EXEC) && S_ISREG(inode->i_mode)) {
		if (!(inode->i_mode & S_IXUGO)) {
			if (refreshed)
				return -EACCES;

1192
			err = fuse_perm_getattr(inode, mask);
1193 1194 1195
			if (!err && !(inode->i_mode & S_IXUGO))
				return -EACCES;
		}
1196
	}
1197
	return err;
1198 1199
}

D
Dan Schatzberg 已提交
1200
static int fuse_readlink_page(struct inode *inode, struct page *page)
1201 1202
{
	struct fuse_conn *fc = get_fuse_conn(inode);
1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219
	struct fuse_page_desc desc = { .length = PAGE_SIZE - 1 };
	struct fuse_args_pages ap = {
		.num_pages = 1,
		.pages = &page,
		.descs = &desc,
	};
	char *link;
	ssize_t res;

	ap.args.opcode = FUSE_READLINK;
	ap.args.nodeid = get_node_id(inode);
	ap.args.out_pages = true;
	ap.args.out_argvar = true;
	ap.args.page_zeroing = true;
	ap.args.out_numargs = 1;
	ap.args.out_args[0].size = desc.length;
	res = fuse_simple_request(fc, &ap.args);
1220

1221
	fuse_invalidate_atime(inode);
1222

1223 1224
	if (res < 0)
		return res;
1225

1226 1227
	if (WARN_ON(res >= PAGE_SIZE))
		return -EIO;
D
Dan Schatzberg 已提交
1228

1229 1230
	link = page_address(page);
	link[res] = '\0';
D
Dan Schatzberg 已提交
1231

1232
	return 0;
D
Dan Schatzberg 已提交
1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269
}

static const char *fuse_get_link(struct dentry *dentry, struct inode *inode,
				 struct delayed_call *callback)
{
	struct fuse_conn *fc = get_fuse_conn(inode);
	struct page *page;
	int err;

	err = -EIO;
	if (is_bad_inode(inode))
		goto out_err;

	if (fc->cache_symlinks)
		return page_get_link(dentry, inode, callback);

	err = -ECHILD;
	if (!dentry)
		goto out_err;

	page = alloc_page(GFP_KERNEL);
	err = -ENOMEM;
	if (!page)
		goto out_err;

	err = fuse_readlink_page(inode, page);
	if (err) {
		__free_page(page);
		goto out_err;
	}

	set_delayed_call(callback, page_put_link, page);

	return page_address(page);

out_err:
	return ERR_PTR(err);
1270 1271 1272 1273
}

static int fuse_dir_open(struct inode *inode, struct file *file)
{
1274
	return fuse_open_common(inode, file, true);
1275 1276 1277 1278
}

static int fuse_dir_release(struct inode *inode, struct file *file)
{
1279
	fuse_release_common(file, true);
1280 1281

	return 0;
1282 1283
}

1284 1285
static int fuse_dir_fsync(struct file *file, loff_t start, loff_t end,
			  int datasync)
1286
{
M
Miklos Szeredi 已提交
1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305
	struct inode *inode = file->f_mapping->host;
	struct fuse_conn *fc = get_fuse_conn(inode);
	int err;

	if (is_bad_inode(inode))
		return -EIO;

	if (fc->no_fsyncdir)
		return 0;

	inode_lock(inode);
	err = fuse_fsync_common(file, start, end, datasync, FUSE_FSYNCDIR);
	if (err == -ENOSYS) {
		fc->no_fsyncdir = 1;
		err = 0;
	}
	inode_unlock(inode);

	return err;
1306 1307
}

1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331
static long fuse_dir_ioctl(struct file *file, unsigned int cmd,
			    unsigned long arg)
{
	struct fuse_conn *fc = get_fuse_conn(file->f_mapping->host);

	/* FUSE_IOCTL_DIR only supported for API version >= 7.18 */
	if (fc->minor < 18)
		return -ENOTTY;

	return fuse_ioctl_common(file, cmd, arg, FUSE_IOCTL_DIR);
}

static long fuse_dir_compat_ioctl(struct file *file, unsigned int cmd,
				   unsigned long arg)
{
	struct fuse_conn *fc = get_fuse_conn(file->f_mapping->host);

	if (fc->minor < 18)
		return -ENOTTY;

	return fuse_ioctl_common(file, cmd, arg,
				 FUSE_IOCTL_COMPAT | FUSE_IOCTL_DIR);
}

M
Maxim Patlasov 已提交
1332
static bool update_mtime(unsigned ivalid, bool trust_local_mtime)
M
Miklos Szeredi 已提交
1333 1334 1335 1336 1337
{
	/* Always update if mtime is explicitly set  */
	if (ivalid & ATTR_MTIME_SET)
		return true;

M
Maxim Patlasov 已提交
1338 1339 1340 1341
	/* Or if kernel i_mtime is the official one */
	if (trust_local_mtime)
		return true;

M
Miklos Szeredi 已提交
1342 1343 1344 1345 1346 1347 1348 1349
	/* If it's an open(O_TRUNC) or an ftruncate(), don't update */
	if ((ivalid & ATTR_SIZE) && (ivalid & (ATTR_OPEN | ATTR_FILE)))
		return false;

	/* In all other cases update */
	return true;
}

1350 1351
static void iattr_to_fattr(struct fuse_conn *fc, struct iattr *iattr,
			   struct fuse_setattr_in *arg, bool trust_local_cmtime)
1352 1353 1354 1355
{
	unsigned ivalid = iattr->ia_valid;

	if (ivalid & ATTR_MODE)
1356
		arg->valid |= FATTR_MODE,   arg->mode = iattr->ia_mode;
1357
	if (ivalid & ATTR_UID)
1358
		arg->valid |= FATTR_UID,    arg->uid = from_kuid(fc->user_ns, iattr->ia_uid);
1359
	if (ivalid & ATTR_GID)
1360
		arg->valid |= FATTR_GID,    arg->gid = from_kgid(fc->user_ns, iattr->ia_gid);
1361
	if (ivalid & ATTR_SIZE)
1362
		arg->valid |= FATTR_SIZE,   arg->size = iattr->ia_size;
M
Miklos Szeredi 已提交
1363 1364
	if (ivalid & ATTR_ATIME) {
		arg->valid |= FATTR_ATIME;
1365
		arg->atime = iattr->ia_atime.tv_sec;
M
Miklos Szeredi 已提交
1366 1367 1368 1369
		arg->atimensec = iattr->ia_atime.tv_nsec;
		if (!(ivalid & ATTR_ATIME_SET))
			arg->valid |= FATTR_ATIME_NOW;
	}
1370
	if ((ivalid & ATTR_MTIME) && update_mtime(ivalid, trust_local_cmtime)) {
M
Miklos Szeredi 已提交
1371
		arg->valid |= FATTR_MTIME;
1372
		arg->mtime = iattr->ia_mtime.tv_sec;
M
Miklos Szeredi 已提交
1373
		arg->mtimensec = iattr->ia_mtime.tv_nsec;
1374
		if (!(ivalid & ATTR_MTIME_SET) && !trust_local_cmtime)
M
Miklos Szeredi 已提交
1375
			arg->valid |= FATTR_MTIME_NOW;
1376
	}
1377 1378 1379 1380 1381
	if ((ivalid & ATTR_CTIME) && trust_local_cmtime) {
		arg->valid |= FATTR_CTIME;
		arg->ctime = iattr->ia_ctime.tv_sec;
		arg->ctimensec = iattr->ia_ctime.tv_nsec;
	}
1382 1383
}

M
Miklos Szeredi 已提交
1384 1385 1386 1387 1388 1389 1390 1391 1392 1393
/*
 * Prevent concurrent writepages on inode
 *
 * This is done by adding a negative bias to the inode write counter
 * and waiting for all pending writes to finish.
 */
void fuse_set_nowrite(struct inode *inode)
{
	struct fuse_inode *fi = get_fuse_inode(inode);

A
Al Viro 已提交
1394
	BUG_ON(!inode_is_locked(inode));
M
Miklos Szeredi 已提交
1395

1396
	spin_lock(&fi->lock);
M
Miklos Szeredi 已提交
1397 1398
	BUG_ON(fi->writectr < 0);
	fi->writectr += FUSE_NOWRITE;
1399
	spin_unlock(&fi->lock);
M
Miklos Szeredi 已提交
1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419
	wait_event(fi->page_waitq, fi->writectr == FUSE_NOWRITE);
}

/*
 * Allow writepages on inode
 *
 * Remove the bias from the writecounter and send any queued
 * writepages.
 */
static void __fuse_release_nowrite(struct inode *inode)
{
	struct fuse_inode *fi = get_fuse_inode(inode);

	BUG_ON(fi->writectr != FUSE_NOWRITE);
	fi->writectr = 0;
	fuse_flush_writepages(inode);
}

void fuse_release_nowrite(struct inode *inode)
{
1420
	struct fuse_inode *fi = get_fuse_inode(inode);
M
Miklos Szeredi 已提交
1421

1422
	spin_lock(&fi->lock);
M
Miklos Szeredi 已提交
1423
	__fuse_release_nowrite(inode);
1424
	spin_unlock(&fi->lock);
M
Miklos Szeredi 已提交
1425 1426
}

1427
static void fuse_setattr_fill(struct fuse_conn *fc, struct fuse_args *args,
M
Maxim Patlasov 已提交
1428 1429 1430 1431
			      struct inode *inode,
			      struct fuse_setattr_in *inarg_p,
			      struct fuse_attr_out *outarg_p)
{
1432 1433 1434 1435 1436 1437 1438 1439
	args->opcode = FUSE_SETATTR;
	args->nodeid = get_node_id(inode);
	args->in_numargs = 1;
	args->in_args[0].size = sizeof(*inarg_p);
	args->in_args[0].value = inarg_p;
	args->out_numargs = 1;
	args->out_args[0].size = sizeof(*outarg_p);
	args->out_args[0].value = outarg_p;
M
Maxim Patlasov 已提交
1440 1441 1442 1443 1444
}

/*
 * Flush inode->i_mtime to the server
 */
1445
int fuse_flush_times(struct inode *inode, struct fuse_file *ff)
M
Maxim Patlasov 已提交
1446 1447
{
	struct fuse_conn *fc = get_fuse_conn(inode);
1448
	FUSE_ARGS(args);
M
Maxim Patlasov 已提交
1449 1450 1451 1452 1453 1454
	struct fuse_setattr_in inarg;
	struct fuse_attr_out outarg;

	memset(&inarg, 0, sizeof(inarg));
	memset(&outarg, 0, sizeof(outarg));

1455
	inarg.valid = FATTR_MTIME;
M
Maxim Patlasov 已提交
1456 1457
	inarg.mtime = inode->i_mtime.tv_sec;
	inarg.mtimensec = inode->i_mtime.tv_nsec;
1458 1459 1460 1461 1462
	if (fc->minor >= 23) {
		inarg.valid |= FATTR_CTIME;
		inarg.ctime = inode->i_ctime.tv_sec;
		inarg.ctimensec = inode->i_ctime.tv_nsec;
	}
M
Miklos Szeredi 已提交
1463 1464 1465 1466
	if (ff) {
		inarg.valid |= FATTR_FH;
		inarg.fh = ff->fh;
	}
1467
	fuse_setattr_fill(fc, &args, inode, &inarg, &outarg);
M
Maxim Patlasov 已提交
1468

1469
	return fuse_simple_request(fc, &args);
M
Maxim Patlasov 已提交
1470 1471
}

1472 1473 1474 1475 1476
/*
 * Set attributes, and at the same time refresh them.
 *
 * Truncation is slightly complicated, because the 'truncate' request
 * may fail, in which case we don't want to touch the mapping.
M
Miklos Szeredi 已提交
1477 1478
 * vmtruncate() doesn't allow for this case, so do the rlimit checking
 * and the actual truncation by hand.
1479
 */
1480
int fuse_do_setattr(struct dentry *dentry, struct iattr *attr,
1481
		    struct file *file)
1482
{
1483
	struct inode *inode = d_inode(dentry);
1484
	struct fuse_conn *fc = get_fuse_conn(inode);
1485
	struct fuse_inode *fi = get_fuse_inode(inode);
1486
	FUSE_ARGS(args);
1487 1488
	struct fuse_setattr_in inarg;
	struct fuse_attr_out outarg;
M
Miklos Szeredi 已提交
1489
	bool is_truncate = false;
P
Pavel Emelyanov 已提交
1490
	bool is_wb = fc->writeback_cache;
M
Miklos Szeredi 已提交
1491
	loff_t oldsize;
1492
	int err;
1493
	bool trust_local_cmtime = is_wb && S_ISREG(inode->i_mode);
1494

M
Miklos Szeredi 已提交
1495
	if (!fc->default_permissions)
1496 1497
		attr->ia_valid |= ATTR_FORCE;

1498
	err = setattr_prepare(dentry, attr);
1499 1500
	if (err)
		return err;
M
Miklos Szeredi 已提交
1501

M
Miklos Szeredi 已提交
1502
	if (attr->ia_valid & ATTR_OPEN) {
1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513
		/* This is coming from open(..., ... | O_TRUNC); */
		WARN_ON(!(attr->ia_valid & ATTR_SIZE));
		WARN_ON(attr->ia_size != 0);
		if (fc->atomic_o_trunc) {
			/*
			 * No need to send request to userspace, since actual
			 * truncation has already been done by OPEN.  But still
			 * need to truncate page cache.
			 */
			i_size_write(inode, 0);
			truncate_pagecache(inode, 0);
M
Miklos Szeredi 已提交
1514
			return 0;
1515
		}
M
Miklos Szeredi 已提交
1516 1517
		file = NULL;
	}
1518

1519 1520 1521
	if (attr->ia_valid & ATTR_SIZE) {
		if (WARN_ON(!S_ISREG(inode->i_mode)))
			return -EIO;
M
Miklos Szeredi 已提交
1522
		is_truncate = true;
1523
	}
1524

1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537
	/* Flush dirty data/metadata before non-truncate SETATTR */
	if (is_wb && S_ISREG(inode->i_mode) &&
	    attr->ia_valid &
			(ATTR_MODE | ATTR_UID | ATTR_GID | ATTR_MTIME_SET |
			 ATTR_TIMES_SET)) {
		err = write_inode_now(inode, true);
		if (err)
			return err;

		fuse_set_nowrite(inode);
		fuse_release_nowrite(inode);
	}

1538
	if (is_truncate) {
M
Miklos Szeredi 已提交
1539
		fuse_set_nowrite(inode);
1540
		set_bit(FUSE_I_SIZE_UNSTABLE, &fi->state);
1541 1542
		if (trust_local_cmtime && attr->ia_size != inode->i_size)
			attr->ia_valid |= ATTR_MTIME | ATTR_CTIME;
1543
	}
M
Miklos Szeredi 已提交
1544

1545
	memset(&inarg, 0, sizeof(inarg));
1546
	memset(&outarg, 0, sizeof(outarg));
1547
	iattr_to_fattr(fc, attr, &inarg, trust_local_cmtime);
1548 1549 1550 1551 1552
	if (file) {
		struct fuse_file *ff = file->private_data;
		inarg.valid |= FATTR_FH;
		inarg.fh = ff->fh;
	}
1553 1554 1555 1556 1557
	if (attr->ia_valid & ATTR_SIZE) {
		/* For mandatory locking in truncate */
		inarg.valid |= FATTR_LOCKOWNER;
		inarg.lock_owner = fuse_lock_owner_id(fc, current->files);
	}
1558 1559
	fuse_setattr_fill(fc, &args, inode, &inarg, &outarg);
	err = fuse_simple_request(fc, &args);
1560 1561 1562
	if (err) {
		if (err == -EINTR)
			fuse_invalidate_attr(inode);
M
Miklos Szeredi 已提交
1563
		goto error;
1564
	}
1565

1566 1567
	if ((inode->i_mode ^ outarg.attr.mode) & S_IFMT) {
		make_bad_inode(inode);
M
Miklos Szeredi 已提交
1568 1569 1570 1571
		err = -EIO;
		goto error;
	}

1572
	spin_lock(&fi->lock);
M
Maxim Patlasov 已提交
1573
	/* the kernel maintains i_mtime locally */
1574 1575 1576 1577 1578
	if (trust_local_cmtime) {
		if (attr->ia_valid & ATTR_MTIME)
			inode->i_mtime = attr->ia_mtime;
		if (attr->ia_valid & ATTR_CTIME)
			inode->i_ctime = attr->ia_ctime;
M
Miklos Szeredi 已提交
1579
		/* FIXME: clear I_DIRTY_SYNC? */
M
Maxim Patlasov 已提交
1580 1581
	}

M
Miklos Szeredi 已提交
1582 1583 1584
	fuse_change_attributes_common(inode, &outarg.attr,
				      attr_timeout(&outarg));
	oldsize = inode->i_size;
P
Pavel Emelyanov 已提交
1585 1586 1587
	/* see the comment in fuse_change_attributes() */
	if (!is_wb || is_truncate || !S_ISREG(inode->i_mode))
		i_size_write(inode, outarg.attr.size);
M
Miklos Szeredi 已提交
1588 1589

	if (is_truncate) {
1590
		/* NOTE: this may release/reacquire fi->lock */
M
Miklos Szeredi 已提交
1591 1592
		__fuse_release_nowrite(inode);
	}
1593
	spin_unlock(&fi->lock);
M
Miklos Szeredi 已提交
1594 1595 1596 1597 1598

	/*
	 * Only call invalidate_inode_pages2() after removing
	 * FUSE_NOWRITE, otherwise fuse_launder_page() would deadlock.
	 */
P
Pavel Emelyanov 已提交
1599 1600
	if ((is_truncate || !is_wb) &&
	    S_ISREG(inode->i_mode) && oldsize != outarg.attr.size) {
1601
		truncate_pagecache(inode, outarg.attr.size);
M
Miklos Szeredi 已提交
1602
		invalidate_inode_pages2(inode->i_mapping);
1603 1604
	}

1605
	clear_bit(FUSE_I_SIZE_UNSTABLE, &fi->state);
1606
	return 0;
M
Miklos Szeredi 已提交
1607 1608 1609 1610 1611

error:
	if (is_truncate)
		fuse_release_nowrite(inode);

1612
	clear_bit(FUSE_I_SIZE_UNSTABLE, &fi->state);
M
Miklos Szeredi 已提交
1613
	return err;
1614 1615
}

1616 1617
static int fuse_setattr(struct dentry *entry, struct iattr *attr)
{
1618
	struct inode *inode = d_inode(entry);
1619
	struct fuse_conn *fc = get_fuse_conn(inode);
1620
	struct file *file = (attr->ia_valid & ATTR_FILE) ? attr->ia_file : NULL;
1621
	int ret;
1622 1623 1624 1625

	if (!fuse_allow_current_process(get_fuse_conn(inode)))
		return -EACCES;

1626 1627 1628
	if (attr->ia_valid & (ATTR_KILL_SUID | ATTR_KILL_SGID)) {
		attr->ia_valid &= ~(ATTR_KILL_SUID | ATTR_KILL_SGID |
				    ATTR_MODE);
1629

1630
		/*
1631 1632 1633 1634
		 * The only sane way to reliably kill suid/sgid is to do it in
		 * the userspace filesystem
		 *
		 * This should be done on write(), truncate() and chown().
1635
		 */
1636 1637 1638 1639 1640 1641 1642 1643 1644 1645
		if (!fc->handle_killpriv) {
			/*
			 * ia_mode calculation may have used stale i_mode.
			 * Refresh and recalculate.
			 */
			ret = fuse_do_getattr(inode, NULL, file);
			if (ret)
				return ret;

			attr->ia_mode = inode->i_mode;
1646
			if (inode->i_mode & S_ISUID) {
1647 1648 1649
				attr->ia_valid |= ATTR_MODE;
				attr->ia_mode &= ~S_ISUID;
			}
1650
			if ((inode->i_mode & (S_ISGID | S_IXGRP)) == (S_ISGID | S_IXGRP)) {
1651 1652 1653
				attr->ia_valid |= ATTR_MODE;
				attr->ia_mode &= ~S_ISGID;
			}
1654 1655 1656 1657
		}
	}
	if (!attr->ia_valid)
		return 0;
1658

1659
	ret = fuse_do_setattr(entry, attr, file);
1660
	if (!ret) {
S
Seth Forshee 已提交
1661 1662 1663 1664 1665 1666 1667
		/*
		 * If filesystem supports acls it may have updated acl xattrs in
		 * the filesystem, so forget cached acls for the inode.
		 */
		if (fc->posix_acl)
			forget_all_cached_acls(inode);

1668 1669 1670 1671 1672
		/* Directory mode changed, may need to revalidate access */
		if (d_is_dir(entry) && (attr->ia_valid & ATTR_MODE))
			fuse_invalidate_entry_cache(entry);
	}
	return ret;
1673 1674
}

1675 1676
static int fuse_getattr(const struct path *path, struct kstat *stat,
			u32 request_mask, unsigned int flags)
1677
{
1678
	struct inode *inode = d_inode(path->dentry);
1679 1680
	struct fuse_conn *fc = get_fuse_conn(inode);

1681
	if (!fuse_allow_current_process(fc))
1682 1683
		return -EACCES;

1684
	return fuse_update_get_attr(inode, NULL, stat, request_mask, flags);
1685 1686
}

1687
static const struct inode_operations fuse_dir_inode_operations = {
1688
	.lookup		= fuse_lookup,
1689 1690 1691 1692
	.mkdir		= fuse_mkdir,
	.symlink	= fuse_symlink,
	.unlink		= fuse_unlink,
	.rmdir		= fuse_rmdir,
1693
	.rename		= fuse_rename2,
1694 1695 1696
	.link		= fuse_link,
	.setattr	= fuse_setattr,
	.create		= fuse_create,
1697
	.atomic_open	= fuse_atomic_open,
1698
	.mknod		= fuse_mknod,
1699 1700
	.permission	= fuse_permission,
	.getattr	= fuse_getattr,
1701
	.listxattr	= fuse_listxattr,
S
Seth Forshee 已提交
1702 1703
	.get_acl	= fuse_get_acl,
	.set_acl	= fuse_set_acl,
1704 1705
};

1706
static const struct file_operations fuse_dir_operations = {
M
Miklos Szeredi 已提交
1707
	.llseek		= generic_file_llseek,
1708
	.read		= generic_read_dir,
A
Al Viro 已提交
1709
	.iterate_shared	= fuse_readdir,
1710 1711
	.open		= fuse_dir_open,
	.release	= fuse_dir_release,
1712
	.fsync		= fuse_dir_fsync,
1713 1714
	.unlocked_ioctl	= fuse_dir_ioctl,
	.compat_ioctl	= fuse_dir_compat_ioctl,
1715 1716
};

1717
static const struct inode_operations fuse_common_inode_operations = {
1718
	.setattr	= fuse_setattr,
1719 1720
	.permission	= fuse_permission,
	.getattr	= fuse_getattr,
1721
	.listxattr	= fuse_listxattr,
S
Seth Forshee 已提交
1722 1723
	.get_acl	= fuse_get_acl,
	.set_acl	= fuse_set_acl,
1724 1725
};

1726
static const struct inode_operations fuse_symlink_inode_operations = {
1727
	.setattr	= fuse_setattr,
1728
	.get_link	= fuse_get_link,
1729
	.getattr	= fuse_getattr,
1730
	.listxattr	= fuse_listxattr,
1731 1732 1733 1734 1735 1736 1737 1738 1739
};

void fuse_init_common(struct inode *inode)
{
	inode->i_op = &fuse_common_inode_operations;
}

void fuse_init_dir(struct inode *inode)
{
1740 1741
	struct fuse_inode *fi = get_fuse_inode(inode);

1742 1743
	inode->i_op = &fuse_dir_inode_operations;
	inode->i_fop = &fuse_dir_operations;
1744 1745 1746 1747 1748 1749

	spin_lock_init(&fi->rdc.lock);
	fi->rdc.cached = false;
	fi->rdc.size = 0;
	fi->rdc.pos = 0;
	fi->rdc.version = 0;
1750 1751
}

D
Dan Schatzberg 已提交
1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767
static int fuse_symlink_readpage(struct file *null, struct page *page)
{
	int err = fuse_readlink_page(page->mapping->host, page);

	if (!err)
		SetPageUptodate(page);

	unlock_page(page);

	return err;
}

static const struct address_space_operations fuse_symlink_aops = {
	.readpage	= fuse_symlink_readpage,
};

1768 1769 1770
void fuse_init_symlink(struct inode *inode)
{
	inode->i_op = &fuse_symlink_inode_operations;
D
Dan Schatzberg 已提交
1771 1772
	inode->i_data.a_ops = &fuse_symlink_aops;
	inode_nohighmem(inode);
1773
}