dir.c 25.6 KB
Newer Older
1 2 3 4 5 6 7 8 9
/*
 * fs/kernfs/dir.c - kernfs directory implementation
 *
 * Copyright (c) 2001-3 Patrick Mochel
 * Copyright (c) 2007 SUSE Linux Products GmbH
 * Copyright (c) 2007, 2013 Tejun Heo <tj@kernel.org>
 *
 * This file is released under the GPLv2.
 */
10

11
#include <linux/sched.h>
12 13 14 15 16 17 18 19 20
#include <linux/fs.h>
#include <linux/namei.h>
#include <linux/idr.h>
#include <linux/slab.h>
#include <linux/security.h>
#include <linux/hash.h>

#include "kernfs-internal.h"

21
DEFINE_MUTEX(kernfs_mutex);
22

23
#define rb_to_kn(X) rb_entry((X), struct kernfs_node, rb)
24

25 26 27 28 29 30 31 32 33
static bool kernfs_lockdep(struct kernfs_node *kn)
{
#ifdef CONFIG_DEBUG_LOCK_ALLOC
	return kn->flags & KERNFS_LOCKDEP;
#else
	return false;
#endif
}

34
/**
35
 *	kernfs_name_hash
36 37 38 39 40
 *	@name: Null terminated string to hash
 *	@ns:   Namespace tag to hash
 *
 *	Returns 31 bit hash of ns + name (so it fits in an off_t )
 */
41
static unsigned int kernfs_name_hash(const char *name, const void *ns)
42 43 44 45 46 47 48 49 50 51 52 53 54 55 56
{
	unsigned long hash = init_name_hash();
	unsigned int len = strlen(name);
	while (len--)
		hash = partial_name_hash(*name++, hash);
	hash = (end_name_hash(hash) ^ hash_ptr((void *)ns, 31));
	hash &= 0x7fffffffU;
	/* Reserve hash numbers 0, 1 and INT_MAX for magic directory entries */
	if (hash < 1)
		hash += 2;
	if (hash >= INT_MAX)
		hash = INT_MAX - 1;
	return hash;
}

57 58
static int kernfs_name_compare(unsigned int hash, const char *name,
			       const void *ns, const struct kernfs_node *kn)
59
{
60 61 62 63 64
	if (hash != kn->hash)
		return hash - kn->hash;
	if (ns != kn->ns)
		return ns - kn->ns;
	return strcmp(name, kn->name);
65 66
}

67 68
static int kernfs_sd_compare(const struct kernfs_node *left,
			     const struct kernfs_node *right)
69
{
70
	return kernfs_name_compare(left->hash, left->name, left->ns, right);
71 72 73
}

/**
74
 *	kernfs_link_sibling - link kernfs_node into sibling rbtree
75
 *	@kn: kernfs_node of interest
76
 *
77
 *	Link @kn into its sibling rbtree which starts from
78
 *	@kn->parent->dir.children.
79 80
 *
 *	Locking:
81
 *	mutex_lock(kernfs_mutex)
82 83 84 85
 *
 *	RETURNS:
 *	0 on susccess -EEXIST on failure.
 */
86
static int kernfs_link_sibling(struct kernfs_node *kn)
87
{
88
	struct rb_node **node = &kn->parent->dir.children.rb_node;
89 90
	struct rb_node *parent = NULL;

T
Tejun Heo 已提交
91
	if (kernfs_type(kn) == KERNFS_DIR)
92
		kn->parent->dir.subdirs++;
93 94

	while (*node) {
95
		struct kernfs_node *pos;
96 97
		int result;

98
		pos = rb_to_kn(*node);
99
		parent = *node;
100
		result = kernfs_sd_compare(kn, pos);
101
		if (result < 0)
102
			node = &pos->rb.rb_left;
103
		else if (result > 0)
104
			node = &pos->rb.rb_right;
105 106 107 108
		else
			return -EEXIST;
	}
	/* add new node and rebalance the tree */
109 110
	rb_link_node(&kn->rb, parent, node);
	rb_insert_color(&kn->rb, &kn->parent->dir.children);
111 112 113 114
	return 0;
}

/**
115
 *	kernfs_unlink_sibling - unlink kernfs_node from sibling rbtree
116
 *	@kn: kernfs_node of interest
117
 *
118
 *	Unlink @kn from its sibling rbtree which starts from
119
 *	kn->parent->dir.children.
120 121
 *
 *	Locking:
122
 *	mutex_lock(kernfs_mutex)
123
 */
124
static bool kernfs_unlink_sibling(struct kernfs_node *kn)
125
{
126 127 128
	if (RB_EMPTY_NODE(&kn->rb))
		return false;

T
Tejun Heo 已提交
129
	if (kernfs_type(kn) == KERNFS_DIR)
130
		kn->parent->dir.subdirs--;
131

132
	rb_erase(&kn->rb, &kn->parent->dir.children);
T
Tejun Heo 已提交
133
	RB_CLEAR_NODE(&kn->rb);
134
	return true;
135 136 137
}

/**
138
 *	kernfs_get_active - get an active reference to kernfs_node
139
 *	@kn: kernfs_node to get an active reference to
140
 *
141
 *	Get an active reference of @kn.  This function is noop if @kn
142 143 144
 *	is NULL.
 *
 *	RETURNS:
145
 *	Pointer to @kn on success, NULL on failure.
146
 */
147
struct kernfs_node *kernfs_get_active(struct kernfs_node *kn)
148
{
149
	if (unlikely(!kn))
150 151
		return NULL;

152
	if (kernfs_lockdep(kn))
153
		rwsem_acquire_read(&kn->dep_map, 0, 1, _RET_IP_);
154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170

	/*
	 * Try to obtain an active ref.  If @kn is deactivated, we block
	 * till either it's reactivated or killed.
	 */
	do {
		if (atomic_inc_unless_negative(&kn->active))
			return kn;

		wait_event(kernfs_root(kn)->deactivate_waitq,
			   atomic_read(&kn->active) >= 0 ||
			   RB_EMPTY_NODE(&kn->rb));
	} while (!RB_EMPTY_NODE(&kn->rb));

	if (kernfs_lockdep(kn))
		rwsem_release(&kn->dep_map, 1, _RET_IP_);
	return NULL;
171 172 173
}

/**
174
 *	kernfs_put_active - put an active reference to kernfs_node
175
 *	@kn: kernfs_node to put an active reference to
176
 *
177
 *	Put an active reference to @kn.  This function is noop if @kn
178 179
 *	is NULL.
 */
180
void kernfs_put_active(struct kernfs_node *kn)
181
{
182
	struct kernfs_root *root = kernfs_root(kn);
183 184
	int v;

185
	if (unlikely(!kn))
186 187
		return;

188
	if (kernfs_lockdep(kn))
189
		rwsem_release(&kn->dep_map, 1, _RET_IP_);
190
	v = atomic_dec_return(&kn->active);
T
Tejun Heo 已提交
191
	if (likely(v != KN_DEACTIVATED_BIAS))
192 193
		return;

194
	wake_up_all(&root->deactivate_waitq);
195 196 197
}

/**
T
Tejun Heo 已提交
198 199
 * kernfs_drain - drain kernfs_node
 * @kn: kernfs_node to drain
200
 *
201 202 203 204 205 206 207 208
 * Drain existing usages of @kn.  Mutiple removers may invoke this function
 * concurrently on @kn and all will return after draining is complete.
 * Returns %true if drain is performed and kernfs_mutex was temporarily
 * released.  %false if @kn was already drained and no operation was
 * necessary.
 *
 * The caller is responsible for ensuring @kn stays pinned while this
 * function is in progress even if it gets removed by someone else.
209
 */
210 211
static bool kernfs_drain(struct kernfs_node *kn)
	__releases(&kernfs_mutex) __acquires(&kernfs_mutex)
212
{
213
	struct kernfs_root *root = kernfs_root(kn);
214

215
	lockdep_assert_held(&kernfs_mutex);
T
Tejun Heo 已提交
216
	WARN_ON_ONCE(atomic_read(&kn->active) >= 0);
217

218 219 220 221 222 223 224 225 226 227 228 229 230 231 232
	/*
	 * We want to go through the active ref lockdep annotation at least
	 * once for all node removals, but the lockdep annotation can't be
	 * nested inside kernfs_mutex and deactivation can't make forward
	 * progress if we keep dropping the mutex.  Use JUST_ACTIVATED to
	 * force the slow path once for each deactivation if lockdep is
	 * enabled.
	 */
	if ((!kernfs_lockdep(kn) || !(kn->flags & KERNFS_JUST_DEACTIVATED)) &&
	    atomic_read(&kn->active) == KN_DEACTIVATED_BIAS)
		return false;

	kn->flags &= ~KERNFS_JUST_DEACTIVATED;
	mutex_unlock(&kernfs_mutex);

233 234 235 236 237
	if (kernfs_lockdep(kn)) {
		rwsem_acquire(&kn->dep_map, 0, 0, _RET_IP_);
		if (atomic_read(&kn->active) != KN_DEACTIVATED_BIAS)
			lock_contended(&kn->dep_map, _RET_IP_);
	}
238 239 240

	wait_event(root->deactivate_waitq,
		   atomic_read(&kn->active) == KN_DEACTIVATED_BIAS);
241

242 243 244 245
	if (kernfs_lockdep(kn)) {
		lock_acquired(&kn->dep_map, _RET_IP_);
		rwsem_release(&kn->dep_map, 1, _RET_IP_);
	}
246 247 248

	mutex_lock(&kernfs_mutex);
	return true;
249 250 251
}

/**
252 253
 * kernfs_get - get a reference count on a kernfs_node
 * @kn: the target kernfs_node
254
 */
255
void kernfs_get(struct kernfs_node *kn)
256
{
257
	if (kn) {
258 259
		WARN_ON(!atomic_read(&kn->count));
		atomic_inc(&kn->count);
260 261 262 263 264
	}
}
EXPORT_SYMBOL_GPL(kernfs_get);

/**
265 266
 * kernfs_put - put a reference count on a kernfs_node
 * @kn: the target kernfs_node
267
 *
268
 * Put a reference count of @kn and destroy it if it reached zero.
269
 */
270
void kernfs_put(struct kernfs_node *kn)
271
{
272
	struct kernfs_node *parent;
273
	struct kernfs_root *root;
274

275
	if (!kn || !atomic_dec_and_test(&kn->count))
276
		return;
277
	root = kernfs_root(kn);
278
 repeat:
T
Tejun Heo 已提交
279 280
	/*
	 * Moving/renaming is always done while holding reference.
281
	 * kn->parent won't change beneath us.
282
	 */
283
	parent = kn->parent;
284

T
Tejun Heo 已提交
285 286 287
	WARN_ONCE(atomic_read(&kn->active) != KN_DEACTIVATED_BIAS,
		  "kernfs_put: %s/%s: released with incorrect active_ref %d\n",
		  parent ? parent->name : "", kn->name, atomic_read(&kn->active));
288

T
Tejun Heo 已提交
289
	if (kernfs_type(kn) == KERNFS_LINK)
290
		kernfs_put(kn->symlink.target_kn);
291
	if (!(kn->flags & KERNFS_STATIC_NAME))
292 293 294 295 296 297
		kfree(kn->name);
	if (kn->iattr) {
		if (kn->iattr->ia_secdata)
			security_release_secctx(kn->iattr->ia_secdata,
						kn->iattr->ia_secdata_len);
		simple_xattrs_free(&kn->iattr->xattrs);
298
	}
299 300
	kfree(kn->iattr);
	ida_simple_remove(&root->ino_ida, kn->ino);
301
	kmem_cache_free(kernfs_node_cache, kn);
302

303 304
	kn = parent;
	if (kn) {
305
		if (atomic_dec_and_test(&kn->count))
306 307
			goto repeat;
	} else {
308
		/* just released the root kn, free @root too */
309
		ida_destroy(&root->ino_ida);
310 311
		kfree(root);
	}
312 313 314
}
EXPORT_SYMBOL_GPL(kernfs_put);

315
static int kernfs_dop_revalidate(struct dentry *dentry, unsigned int flags)
316
{
317
	struct kernfs_node *kn;
318 319 320 321

	if (flags & LOOKUP_RCU)
		return -ECHILD;

T
Tejun Heo 已提交
322 323 324 325
	/* Always perform fresh lookup for negatives */
	if (!dentry->d_inode)
		goto out_bad_unlocked;

326
	kn = dentry->d_fsdata;
327
	mutex_lock(&kernfs_mutex);
328

T
Tejun Heo 已提交
329 330
	/* Force fresh lookup if removed */
	if (kn->parent && RB_EMPTY_NODE(&kn->rb))
331 332
		goto out_bad;

333
	/* The kernfs node has been moved? */
334
	if (dentry->d_parent->d_fsdata != kn->parent)
335 336
		goto out_bad;

337
	/* The kernfs node has been renamed */
338
	if (strcmp(dentry->d_name.name, kn->name) != 0)
339 340
		goto out_bad;

341
	/* The kernfs node has been moved to a different namespace */
342
	if (kn->parent && kernfs_ns_enabled(kn->parent) &&
343
	    kernfs_info(dentry->d_sb)->ns != kn->ns)
344 345
		goto out_bad;

346
	mutex_unlock(&kernfs_mutex);
347 348 349
out_valid:
	return 1;
out_bad:
350
	mutex_unlock(&kernfs_mutex);
T
Tejun Heo 已提交
351 352 353 354 355 356 357
out_bad_unlocked:
	/*
	 * @dentry doesn't match the underlying kernfs node, drop the
	 * dentry and force lookup.  If we have submounts we must allow the
	 * vfs caches to lie about the state of the filesystem to prevent
	 * leaks and other nasty things, so use check_submounts_and_drop()
	 * instead of d_drop().
358 359 360 361 362 363 364
	 */
	if (check_submounts_and_drop(dentry) != 0)
		goto out_valid;

	return 0;
}

365
static void kernfs_dop_release(struct dentry *dentry)
366 367 368 369
{
	kernfs_put(dentry->d_fsdata);
}

370
const struct dentry_operations kernfs_dops = {
371 372
	.d_revalidate	= kernfs_dop_revalidate,
	.d_release	= kernfs_dop_release,
373 374
};

375
struct kernfs_node *kernfs_new_node(struct kernfs_root *root, const char *name,
376
				    umode_t mode, unsigned flags)
377 378
{
	char *dup_name = NULL;
379
	struct kernfs_node *kn;
380
	int ret;
381

382
	if (!(flags & KERNFS_STATIC_NAME)) {
383 384 385 386 387
		name = dup_name = kstrdup(name, GFP_KERNEL);
		if (!name)
			return NULL;
	}

388
	kn = kmem_cache_zalloc(kernfs_node_cache, GFP_KERNEL);
389
	if (!kn)
390 391
		goto err_out1;

392 393
	ret = ida_simple_get(&root->ino_ida, 1, 0, GFP_KERNEL);
	if (ret < 0)
394
		goto err_out2;
395
	kn->ino = ret;
396

397
	atomic_set(&kn->count, 1);
T
Tejun Heo 已提交
398 399
	atomic_set(&kn->active, KN_DEACTIVATED_BIAS);
	RB_CLEAR_NODE(&kn->rb);
400

401 402
	kn->name = name;
	kn->mode = mode;
T
Tejun Heo 已提交
403
	kn->flags = flags;
404

405
	return kn;
406 407

 err_out2:
408
	kmem_cache_free(kernfs_node_cache, kn);
409 410 411 412 413 414
 err_out1:
	kfree(dup_name);
	return NULL;
}

/**
415
 *	kernfs_add_one - add kernfs_node to parent without warning
416 417
 *	@kn: kernfs_node to be added
 *	@parent: the parent kernfs_node to add @kn to
418
 *
419 420 421
 *	Get @parent and set @kn->parent to it and increment nlink of the
 *	parent inode if @kn is a directory and link into the children list
 *	of the parent.
422 423 424 425 426
 *
 *	RETURNS:
 *	0 on success, -EEXIST if entry with the given name already
 *	exists.
 */
T
Tejun Heo 已提交
427
int kernfs_add_one(struct kernfs_node *kn, struct kernfs_node *parent)
428
{
429
	struct kernfs_iattrs *ps_iattr;
T
Tejun Heo 已提交
430
	bool has_ns;
431 432
	int ret;

T
Tejun Heo 已提交
433 434
	if (!kernfs_get_active(parent))
		return -ENOENT;
T
Tejun Heo 已提交
435

T
Tejun Heo 已提交
436 437 438 439 440 441 442
	mutex_lock(&kernfs_mutex);

	ret = -EINVAL;
	has_ns = kernfs_ns_enabled(parent);
	if (WARN(has_ns != (bool)kn->ns, KERN_WARNING "kernfs: ns %s in '%s' for '%s'\n",
		 has_ns ? "required" : "invalid", parent->name, kn->name))
		goto out_unlock;
443

T
Tejun Heo 已提交
444
	if (kernfs_type(parent) != KERNFS_DIR)
T
Tejun Heo 已提交
445
		goto out_unlock;
446

447
	kn->hash = kernfs_name_hash(kn->name, kn->ns);
448
	kn->parent = parent;
449
	kernfs_get(parent);
450

451
	ret = kernfs_link_sibling(kn);
452
	if (ret)
T
Tejun Heo 已提交
453
		goto out_unlock;
454 455

	/* Update timestamps on the parent */
456
	ps_iattr = parent->iattr;
457 458 459 460 461 462
	if (ps_iattr) {
		struct iattr *ps_iattrs = &ps_iattr->ia_iattr;
		ps_iattrs->ia_ctime = ps_iattrs->ia_mtime = CURRENT_TIME;
	}

	/* Mark the entry added into directory tree */
T
Tejun Heo 已提交
463
	atomic_sub(KN_DEACTIVATED_BIAS, &kn->active);
T
Tejun Heo 已提交
464 465
	ret = 0;
out_unlock:
466
	mutex_unlock(&kernfs_mutex);
T
Tejun Heo 已提交
467 468
	kernfs_put_active(parent);
	return ret;
469 470 471
}

/**
472 473
 * kernfs_find_ns - find kernfs_node with the given name
 * @parent: kernfs_node to search under
474 475 476
 * @name: name to look for
 * @ns: the namespace tag to use
 *
477 478
 * Look for kernfs_node with name @name under @parent.  Returns pointer to
 * the found kernfs_node on success, %NULL on failure.
479
 */
480 481 482
static struct kernfs_node *kernfs_find_ns(struct kernfs_node *parent,
					  const unsigned char *name,
					  const void *ns)
483
{
484
	struct rb_node *node = parent->dir.children.rb_node;
485
	bool has_ns = kernfs_ns_enabled(parent);
486 487
	unsigned int hash;

488
	lockdep_assert_held(&kernfs_mutex);
489 490

	if (has_ns != (bool)ns) {
491
		WARN(1, KERN_WARNING "kernfs: ns %s in '%s' for '%s'\n",
492
		     has_ns ? "required" : "invalid", parent->name, name);
493 494 495
		return NULL;
	}

496
	hash = kernfs_name_hash(name, ns);
497
	while (node) {
498
		struct kernfs_node *kn;
499 500
		int result;

501
		kn = rb_to_kn(node);
502
		result = kernfs_name_compare(hash, name, ns, kn);
503 504 505 506 507
		if (result < 0)
			node = node->rb_left;
		else if (result > 0)
			node = node->rb_right;
		else
508
			return kn;
509 510 511 512 513
	}
	return NULL;
}

/**
514 515
 * kernfs_find_and_get_ns - find and get kernfs_node with the given name
 * @parent: kernfs_node to search under
516 517 518
 * @name: name to look for
 * @ns: the namespace tag to use
 *
519
 * Look for kernfs_node with name @name under @parent and get a reference
520
 * if found.  This function may sleep and returns pointer to the found
521
 * kernfs_node on success, %NULL on failure.
522
 */
523 524
struct kernfs_node *kernfs_find_and_get_ns(struct kernfs_node *parent,
					   const char *name, const void *ns)
525
{
526
	struct kernfs_node *kn;
527

528
	mutex_lock(&kernfs_mutex);
529 530
	kn = kernfs_find_ns(parent, name, ns);
	kernfs_get(kn);
531
	mutex_unlock(&kernfs_mutex);
532

533
	return kn;
534 535 536
}
EXPORT_SYMBOL_GPL(kernfs_find_and_get_ns);

537 538
/**
 * kernfs_create_root - create a new kernfs hierarchy
T
Tejun Heo 已提交
539
 * @kdops: optional directory syscall operations for the hierarchy
540 541 542 543 544
 * @priv: opaque data associated with the new directory
 *
 * Returns the root of the new hierarchy on success, ERR_PTR() value on
 * failure.
 */
T
Tejun Heo 已提交
545
struct kernfs_root *kernfs_create_root(struct kernfs_dir_ops *kdops, void *priv)
546 547
{
	struct kernfs_root *root;
548
	struct kernfs_node *kn;
549 550 551 552 553

	root = kzalloc(sizeof(*root), GFP_KERNEL);
	if (!root)
		return ERR_PTR(-ENOMEM);

554 555
	ida_init(&root->ino_ida);

556
	kn = kernfs_new_node(root, "", S_IFDIR | S_IRUGO | S_IXUGO, KERNFS_DIR);
557
	if (!kn) {
558
		ida_destroy(&root->ino_ida);
559 560 561 562
		kfree(root);
		return ERR_PTR(-ENOMEM);
	}

T
Tejun Heo 已提交
563
	atomic_sub(KN_DEACTIVATED_BIAS, &kn->active);
564
	kn->priv = priv;
565
	kn->dir.root = root;
566

T
Tejun Heo 已提交
567
	root->dir_ops = kdops;
568
	root->kn = kn;
569
	init_waitqueue_head(&root->deactivate_waitq);
570 571 572 573 574 575 576 577 578 579 580 581 582

	return root;
}

/**
 * kernfs_destroy_root - destroy a kernfs hierarchy
 * @root: root of the hierarchy to destroy
 *
 * Destroy the hierarchy anchored at @root by removing all existing
 * directories and destroying @root.
 */
void kernfs_destroy_root(struct kernfs_root *root)
{
583
	kernfs_remove(root->kn);	/* will also free @root */
584 585
}

586 587 588 589
/**
 * kernfs_create_dir_ns - create a directory
 * @parent: parent in which to create a new directory
 * @name: name of the new directory
590
 * @mode: mode of the new directory
591 592 593 594 595
 * @priv: opaque data associated with the new directory
 * @ns: optional namespace tag of the directory
 *
 * Returns the created node on success, ERR_PTR() value on failure.
 */
596
struct kernfs_node *kernfs_create_dir_ns(struct kernfs_node *parent,
597 598
					 const char *name, umode_t mode,
					 void *priv, const void *ns)
599
{
600
	struct kernfs_node *kn;
601 602 603
	int rc;

	/* allocate */
604 605
	kn = kernfs_new_node(kernfs_root(parent), name, mode | S_IFDIR,
			     KERNFS_DIR);
606
	if (!kn)
607 608
		return ERR_PTR(-ENOMEM);

609 610
	kn->dir.root = parent->dir.root;
	kn->ns = ns;
611
	kn->priv = priv;
612 613

	/* link in */
T
Tejun Heo 已提交
614
	rc = kernfs_add_one(kn, parent);
615
	if (!rc)
616
		return kn;
617

618
	kernfs_put(kn);
619 620 621
	return ERR_PTR(rc);
}

622 623 624
static struct dentry *kernfs_iop_lookup(struct inode *dir,
					struct dentry *dentry,
					unsigned int flags)
625
{
T
Tejun Heo 已提交
626
	struct dentry *ret;
627 628
	struct kernfs_node *parent = dentry->d_parent->d_fsdata;
	struct kernfs_node *kn;
629 630 631
	struct inode *inode;
	const void *ns = NULL;

632
	mutex_lock(&kernfs_mutex);
633

634
	if (kernfs_ns_enabled(parent))
635
		ns = kernfs_info(dir->i_sb)->ns;
636

637
	kn = kernfs_find_ns(parent, dentry->d_name.name, ns);
638 639

	/* no such entry */
640
	if (!kn) {
T
Tejun Heo 已提交
641
		ret = NULL;
642 643
		goto out_unlock;
	}
644 645
	kernfs_get(kn);
	dentry->d_fsdata = kn;
646 647

	/* attach dentry and inode */
648
	inode = kernfs_get_inode(dir->i_sb, kn);
649 650 651 652 653 654 655 656
	if (!inode) {
		ret = ERR_PTR(-ENOMEM);
		goto out_unlock;
	}

	/* instantiate and hash dentry */
	ret = d_materialise_unique(dentry, inode);
 out_unlock:
657
	mutex_unlock(&kernfs_mutex);
658 659 660
	return ret;
}

T
Tejun Heo 已提交
661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696
static int kernfs_iop_mkdir(struct inode *dir, struct dentry *dentry,
			    umode_t mode)
{
	struct kernfs_node *parent = dir->i_private;
	struct kernfs_dir_ops *kdops = kernfs_root(parent)->dir_ops;

	if (!kdops || !kdops->mkdir)
		return -EPERM;

	return kdops->mkdir(parent, dentry->d_name.name, mode);
}

static int kernfs_iop_rmdir(struct inode *dir, struct dentry *dentry)
{
	struct kernfs_node *kn  = dentry->d_fsdata;
	struct kernfs_dir_ops *kdops = kernfs_root(kn)->dir_ops;

	if (!kdops || !kdops->rmdir)
		return -EPERM;

	return kdops->rmdir(kn);
}

static int kernfs_iop_rename(struct inode *old_dir, struct dentry *old_dentry,
			     struct inode *new_dir, struct dentry *new_dentry)
{
	struct kernfs_node *kn  = old_dentry->d_fsdata;
	struct kernfs_node *new_parent = new_dir->i_private;
	struct kernfs_dir_ops *kdops = kernfs_root(kn)->dir_ops;

	if (!kdops || !kdops->rename)
		return -EPERM;

	return kdops->rename(kn, new_parent, new_dentry->d_name.name);
}

697
const struct inode_operations kernfs_dir_iops = {
698 699 700 701 702 703 704 705
	.lookup		= kernfs_iop_lookup,
	.permission	= kernfs_iop_permission,
	.setattr	= kernfs_iop_setattr,
	.getattr	= kernfs_iop_getattr,
	.setxattr	= kernfs_iop_setxattr,
	.removexattr	= kernfs_iop_removexattr,
	.getxattr	= kernfs_iop_getxattr,
	.listxattr	= kernfs_iop_listxattr,
T
Tejun Heo 已提交
706 707 708 709

	.mkdir		= kernfs_iop_mkdir,
	.rmdir		= kernfs_iop_rmdir,
	.rename		= kernfs_iop_rename,
710 711
};

712
static struct kernfs_node *kernfs_leftmost_descendant(struct kernfs_node *pos)
713
{
714
	struct kernfs_node *last;
715 716 717 718 719 720

	while (true) {
		struct rb_node *rbn;

		last = pos;

T
Tejun Heo 已提交
721
		if (kernfs_type(pos) != KERNFS_DIR)
722 723
			break;

724
		rbn = rb_first(&pos->dir.children);
725 726 727
		if (!rbn)
			break;

728
		pos = rb_to_kn(rbn);
729 730 731 732 733 734
	}

	return last;
}

/**
735
 * kernfs_next_descendant_post - find the next descendant for post-order walk
736
 * @pos: the current position (%NULL to initiate traversal)
737
 * @root: kernfs_node whose descendants to walk
738 739 740 741 742
 *
 * Find the next descendant to visit for post-order traversal of @root's
 * descendants.  @root is included in the iteration and the last node to be
 * visited.
 */
743 744
static struct kernfs_node *kernfs_next_descendant_post(struct kernfs_node *pos,
						       struct kernfs_node *root)
745 746 747
{
	struct rb_node *rbn;

748
	lockdep_assert_held(&kernfs_mutex);
749 750 751

	/* if first iteration, visit leftmost descendant which may be root */
	if (!pos)
752
		return kernfs_leftmost_descendant(root);
753 754 755 756 757 758

	/* if we visited @root, we're done */
	if (pos == root)
		return NULL;

	/* if there's an unvisited sibling, visit its leftmost descendant */
759
	rbn = rb_next(&pos->rb);
760
	if (rbn)
761
		return kernfs_leftmost_descendant(rb_to_kn(rbn));
762 763

	/* no sibling left, visit parent */
764
	return pos->parent;
765 766
}

767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799
static void __kernfs_deactivate(struct kernfs_node *kn)
{
	struct kernfs_node *pos;

	lockdep_assert_held(&kernfs_mutex);

	/* prevent any new usage under @kn by deactivating all nodes */
	pos = NULL;
	while ((pos = kernfs_next_descendant_post(pos, kn))) {
		if (atomic_read(&pos->active) >= 0) {
			atomic_add(KN_DEACTIVATED_BIAS, &pos->active);
			pos->flags |= KERNFS_JUST_DEACTIVATED;
		}
	}

	/*
	 * Drain the subtree.  If kernfs_drain() blocked to drain, which is
	 * indicated by %true return, it temporarily released kernfs_mutex
	 * and the rbtree might have been modified inbetween breaking our
	 * future walk.  Restart the walk after each %true return.
	 */
	pos = NULL;
	while ((pos = kernfs_next_descendant_post(pos, kn))) {
		bool drained;

		kernfs_get(pos);
		drained = kernfs_drain(pos);
		kernfs_put(pos);
		if (drained)
			pos = NULL;
	}
}

T
Tejun Heo 已提交
800
static void __kernfs_remove(struct kernfs_node *kn)
801
{
802
	struct kernfs_root *root = kernfs_root(kn);
803 804 805
	struct kernfs_node *pos;

	lockdep_assert_held(&kernfs_mutex);
806

807
	if (!kn)
808 809
		return;

810
	pr_debug("kernfs %s: removing\n", kn->name);
811

812 813 814
	__kernfs_deactivate(kn);

	/* unlink the subtree node-by-node */
815
	do {
816 817
		pos = kernfs_leftmost_descendant(kn);

818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841
		/*
		 * We're gonna release kernfs_mutex to unmap bin files,
		 * Make sure @pos doesn't go away inbetween.
		 */
		kernfs_get(pos);

		/*
		 * This must be come before unlinking; otherwise, when
		 * there are multiple removers, some may finish before
		 * unmapping is complete.
		 */
		if (pos->flags & KERNFS_HAS_MMAP) {
			mutex_unlock(&kernfs_mutex);
			kernfs_unmap_file(pos);
			mutex_lock(&kernfs_mutex);
		}

		/*
		 * kernfs_unlink_sibling() succeeds once per node.  Use it
		 * to decide who's responsible for cleanups.
		 */
		if (!pos->parent || kernfs_unlink_sibling(pos)) {
			struct kernfs_iattrs *ps_iattr =
				pos->parent ? pos->parent->iattr : NULL;
842 843 844 845 846 847

			/* update timestamps on the parent */
			if (ps_iattr) {
				ps_iattr->ia_iattr.ia_ctime = CURRENT_TIME;
				ps_iattr->ia_iattr.ia_mtime = CURRENT_TIME;
			}
848

T
Tejun Heo 已提交
849
			kernfs_put(pos);
850 851
		}

852
		kernfs_put(pos);
853
	} while (pos != kn);
854 855 856

	/* some nodes killed, kick get_active waiters */
	wake_up_all(&root->deactivate_waitq);
857 858 859
}

/**
860 861
 * kernfs_remove - remove a kernfs_node recursively
 * @kn: the kernfs_node to remove
862
 *
863
 * Remove @kn along with all its subdirectories and files.
864
 */
865
void kernfs_remove(struct kernfs_node *kn)
866
{
T
Tejun Heo 已提交
867 868 869
	mutex_lock(&kernfs_mutex);
	__kernfs_remove(kn);
	mutex_unlock(&kernfs_mutex);
870 871 872
}

/**
873 874 875 876
 * kernfs_remove_by_name_ns - find a kernfs_node by name and remove it
 * @parent: parent of the target
 * @name: name of the kernfs_node to remove
 * @ns: namespace tag of the kernfs_node to remove
877
 *
878 879
 * Look for the kernfs_node with @name and @ns under @parent and remove it.
 * Returns 0 on success, -ENOENT if such entry doesn't exist.
880
 */
881
int kernfs_remove_by_name_ns(struct kernfs_node *parent, const char *name,
882 883
			     const void *ns)
{
884
	struct kernfs_node *kn;
885

886
	if (!parent) {
887
		WARN(1, KERN_WARNING "kernfs: can not remove '%s', no directory\n",
888 889 890 891
			name);
		return -ENOENT;
	}

T
Tejun Heo 已提交
892
	mutex_lock(&kernfs_mutex);
893

894 895
	kn = kernfs_find_ns(parent, name, ns);
	if (kn)
T
Tejun Heo 已提交
896
		__kernfs_remove(kn);
897

T
Tejun Heo 已提交
898
	mutex_unlock(&kernfs_mutex);
899

900
	if (kn)
901 902 903 904 905 906 907
		return 0;
	else
		return -ENOENT;
}

/**
 * kernfs_rename_ns - move and rename a kernfs_node
908
 * @kn: target node
909 910 911 912
 * @new_parent: new parent to put @sd under
 * @new_name: new name
 * @new_ns: new namespace tag
 */
913
int kernfs_rename_ns(struct kernfs_node *kn, struct kernfs_node *new_parent,
914 915 916 917
		     const char *new_name, const void *new_ns)
{
	int error;

918
	error = -ENOENT;
T
Tejun Heo 已提交
919
	if (!kernfs_get_active(new_parent))
920
		goto out;
T
Tejun Heo 已提交
921 922 923 924
	if (!kernfs_get_active(kn))
		goto out_put_new_parent;

	mutex_lock(&kernfs_mutex);
925

926
	error = 0;
927 928
	if ((kn->parent == new_parent) && (kn->ns == new_ns) &&
	    (strcmp(kn->name, new_name) == 0))
T
Tejun Heo 已提交
929
		goto out_unlock;	/* nothing to rename */
930 931 932

	error = -EEXIST;
	if (kernfs_find_ns(new_parent, new_name, new_ns))
T
Tejun Heo 已提交
933
		goto out_unlock;
934

935
	/* rename kernfs_node */
936
	if (strcmp(kn->name, new_name) != 0) {
937 938 939
		error = -ENOMEM;
		new_name = kstrdup(new_name, GFP_KERNEL);
		if (!new_name)
T
Tejun Heo 已提交
940
			goto out_unlock;
941

942 943 944 945 946
		if (kn->flags & KERNFS_STATIC_NAME)
			kn->flags &= ~KERNFS_STATIC_NAME;
		else
			kfree(kn->name);

947
		kn->name = new_name;
948 949 950 951 952
	}

	/*
	 * Move to the appropriate place in the appropriate directories rbtree.
	 */
953
	kernfs_unlink_sibling(kn);
954
	kernfs_get(new_parent);
955 956
	kernfs_put(kn->parent);
	kn->ns = new_ns;
957
	kn->hash = kernfs_name_hash(kn->name, kn->ns);
958
	kn->parent = new_parent;
959
	kernfs_link_sibling(kn);
960 961

	error = 0;
T
Tejun Heo 已提交
962
out_unlock:
963
	mutex_unlock(&kernfs_mutex);
T
Tejun Heo 已提交
964 965 966 967
	kernfs_put_active(kn);
out_put_new_parent:
	kernfs_put_active(new_parent);
out:
968 969 970 971
	return error;
}

/* Relationship between s_mode and the DT_xxx types */
972
static inline unsigned char dt_type(struct kernfs_node *kn)
973
{
974
	return (kn->mode >> 12) & 15;
975 976
}

977
static int kernfs_dir_fop_release(struct inode *inode, struct file *filp)
978 979 980 981 982
{
	kernfs_put(filp->private_data);
	return 0;
}

983
static struct kernfs_node *kernfs_dir_pos(const void *ns,
984
	struct kernfs_node *parent, loff_t hash, struct kernfs_node *pos)
985 986
{
	if (pos) {
T
Tejun Heo 已提交
987
		int valid = pos->parent == parent && hash == pos->hash;
988 989 990 991 992
		kernfs_put(pos);
		if (!valid)
			pos = NULL;
	}
	if (!pos && (hash > 1) && (hash < INT_MAX)) {
993
		struct rb_node *node = parent->dir.children.rb_node;
994
		while (node) {
995
			pos = rb_to_kn(node);
996

997
			if (hash < pos->hash)
998
				node = node->rb_left;
999
			else if (hash > pos->hash)
1000 1001 1002 1003 1004 1005
				node = node->rb_right;
			else
				break;
		}
	}
	/* Skip over entries in the wrong namespace */
1006 1007
	while (pos && pos->ns != ns) {
		struct rb_node *node = rb_next(&pos->rb);
1008 1009 1010
		if (!node)
			pos = NULL;
		else
1011
			pos = rb_to_kn(node);
1012 1013 1014 1015
	}
	return pos;
}

1016
static struct kernfs_node *kernfs_dir_next_pos(const void *ns,
1017
	struct kernfs_node *parent, ino_t ino, struct kernfs_node *pos)
1018
{
1019
	pos = kernfs_dir_pos(ns, parent, ino, pos);
1020 1021
	if (pos)
		do {
1022
			struct rb_node *node = rb_next(&pos->rb);
1023 1024 1025
			if (!node)
				pos = NULL;
			else
1026
				pos = rb_to_kn(node);
1027
		} while (pos && pos->ns != ns);
1028 1029 1030
	return pos;
}

1031
static int kernfs_fop_readdir(struct file *file, struct dir_context *ctx)
1032 1033
{
	struct dentry *dentry = file->f_path.dentry;
1034 1035
	struct kernfs_node *parent = dentry->d_fsdata;
	struct kernfs_node *pos = file->private_data;
1036 1037 1038 1039
	const void *ns = NULL;

	if (!dir_emit_dots(file, ctx))
		return 0;
1040
	mutex_lock(&kernfs_mutex);
1041

1042
	if (kernfs_ns_enabled(parent))
1043
		ns = kernfs_info(dentry->d_sb)->ns;
1044

1045
	for (pos = kernfs_dir_pos(ns, parent, ctx->pos, pos);
1046
	     pos;
1047
	     pos = kernfs_dir_next_pos(ns, parent, ctx->pos, pos)) {
1048
		const char *name = pos->name;
1049 1050
		unsigned int type = dt_type(pos);
		int len = strlen(name);
1051
		ino_t ino = pos->ino;
1052

1053
		ctx->pos = pos->hash;
1054 1055 1056
		file->private_data = pos;
		kernfs_get(pos);

1057
		mutex_unlock(&kernfs_mutex);
1058 1059
		if (!dir_emit(ctx, name, len, ino, type))
			return 0;
1060
		mutex_lock(&kernfs_mutex);
1061
	}
1062
	mutex_unlock(&kernfs_mutex);
1063 1064 1065 1066 1067
	file->private_data = NULL;
	ctx->pos = INT_MAX;
	return 0;
}

1068 1069
static loff_t kernfs_dir_fop_llseek(struct file *file, loff_t offset,
				    int whence)
1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080
{
	struct inode *inode = file_inode(file);
	loff_t ret;

	mutex_lock(&inode->i_mutex);
	ret = generic_file_llseek(file, offset, whence);
	mutex_unlock(&inode->i_mutex);

	return ret;
}

1081
const struct file_operations kernfs_dir_fops = {
1082
	.read		= generic_read_dir,
1083 1084 1085
	.iterate	= kernfs_fop_readdir,
	.release	= kernfs_dir_fop_release,
	.llseek		= kernfs_dir_fop_llseek,
1086
};