dir.c 25.0 KB
Newer Older
1 2 3 4 5 6 7 8 9
/*
 * fs/kernfs/dir.c - kernfs directory implementation
 *
 * Copyright (c) 2001-3 Patrick Mochel
 * Copyright (c) 2007 SUSE Linux Products GmbH
 * Copyright (c) 2007, 2013 Tejun Heo <tj@kernel.org>
 *
 * This file is released under the GPLv2.
 */
10 11 12 13 14 15 16 17 18 19

#include <linux/fs.h>
#include <linux/namei.h>
#include <linux/idr.h>
#include <linux/slab.h>
#include <linux/security.h>
#include <linux/hash.h>

#include "kernfs-internal.h"

20
DEFINE_MUTEX(kernfs_mutex);
21

22
#define rb_to_kn(X) rb_entry((X), struct kernfs_node, rb)
23 24

/**
25
 *	kernfs_name_hash
26 27 28 29 30
 *	@name: Null terminated string to hash
 *	@ns:   Namespace tag to hash
 *
 *	Returns 31 bit hash of ns + name (so it fits in an off_t )
 */
31
static unsigned int kernfs_name_hash(const char *name, const void *ns)
32 33 34 35 36 37 38 39 40 41 42 43 44 45 46
{
	unsigned long hash = init_name_hash();
	unsigned int len = strlen(name);
	while (len--)
		hash = partial_name_hash(*name++, hash);
	hash = (end_name_hash(hash) ^ hash_ptr((void *)ns, 31));
	hash &= 0x7fffffffU;
	/* Reserve hash numbers 0, 1 and INT_MAX for magic directory entries */
	if (hash < 1)
		hash += 2;
	if (hash >= INT_MAX)
		hash = INT_MAX - 1;
	return hash;
}

47 48
static int kernfs_name_compare(unsigned int hash, const char *name,
			       const void *ns, const struct kernfs_node *kn)
49
{
50 51 52 53 54
	if (hash != kn->hash)
		return hash - kn->hash;
	if (ns != kn->ns)
		return ns - kn->ns;
	return strcmp(name, kn->name);
55 56
}

57 58
static int kernfs_sd_compare(const struct kernfs_node *left,
			     const struct kernfs_node *right)
59
{
60
	return kernfs_name_compare(left->hash, left->name, left->ns, right);
61 62 63
}

/**
64
 *	kernfs_link_sibling - link kernfs_node into sibling rbtree
65
 *	@kn: kernfs_node of interest
66
 *
67
 *	Link @kn into its sibling rbtree which starts from
68
 *	@kn->parent->dir.children.
69 70
 *
 *	Locking:
71
 *	mutex_lock(kernfs_mutex)
72 73 74 75
 *
 *	RETURNS:
 *	0 on susccess -EEXIST on failure.
 */
76
static int kernfs_link_sibling(struct kernfs_node *kn)
77
{
78
	struct rb_node **node = &kn->parent->dir.children.rb_node;
79 80
	struct rb_node *parent = NULL;

T
Tejun Heo 已提交
81
	if (kernfs_type(kn) == KERNFS_DIR)
82
		kn->parent->dir.subdirs++;
83 84

	while (*node) {
85
		struct kernfs_node *pos;
86 87
		int result;

88
		pos = rb_to_kn(*node);
89
		parent = *node;
90
		result = kernfs_sd_compare(kn, pos);
91
		if (result < 0)
92
			node = &pos->rb.rb_left;
93
		else if (result > 0)
94
			node = &pos->rb.rb_right;
95 96 97 98
		else
			return -EEXIST;
	}
	/* add new node and rebalance the tree */
99 100
	rb_link_node(&kn->rb, parent, node);
	rb_insert_color(&kn->rb, &kn->parent->dir.children);
101 102 103 104
	return 0;
}

/**
105
 *	kernfs_unlink_sibling - unlink kernfs_node from sibling rbtree
106
 *	@kn: kernfs_node of interest
107
 *
108
 *	Unlink @kn from its sibling rbtree which starts from
109
 *	kn->parent->dir.children.
110 111
 *
 *	Locking:
112
 *	mutex_lock(kernfs_mutex)
113
 */
114
static void kernfs_unlink_sibling(struct kernfs_node *kn)
115
{
T
Tejun Heo 已提交
116
	if (kernfs_type(kn) == KERNFS_DIR)
117
		kn->parent->dir.subdirs--;
118

119
	rb_erase(&kn->rb, &kn->parent->dir.children);
120 121 122
}

/**
123
 *	kernfs_get_active - get an active reference to kernfs_node
124
 *	@kn: kernfs_node to get an active reference to
125
 *
126
 *	Get an active reference of @kn.  This function is noop if @kn
127 128 129
 *	is NULL.
 *
 *	RETURNS:
130
 *	Pointer to @kn on success, NULL on failure.
131
 */
132
struct kernfs_node *kernfs_get_active(struct kernfs_node *kn)
133
{
134
	if (unlikely(!kn))
135 136
		return NULL;

137 138
	if (!atomic_inc_unless_negative(&kn->active))
		return NULL;
139

140
	if (kn->flags & KERNFS_LOCKDEP)
141 142
		rwsem_acquire_read(&kn->dep_map, 0, 1, _RET_IP_);
	return kn;
143 144 145
}

/**
146
 *	kernfs_put_active - put an active reference to kernfs_node
147
 *	@kn: kernfs_node to put an active reference to
148
 *
149
 *	Put an active reference to @kn.  This function is noop if @kn
150 151
 *	is NULL.
 */
152
void kernfs_put_active(struct kernfs_node *kn)
153 154 155
{
	int v;

156
	if (unlikely(!kn))
157 158
		return;

159
	if (kn->flags & KERNFS_LOCKDEP)
160
		rwsem_release(&kn->dep_map, 1, _RET_IP_);
161
	v = atomic_dec_return(&kn->active);
T
Tejun Heo 已提交
162
	if (likely(v != KN_DEACTIVATED_BIAS))
163 164
		return;

165 166 167 168 169
	/*
	 * atomic_dec_return() is a mb(), we'll always see the updated
	 * kn->u.completion.
	 */
	complete(kn->u.completion);
170 171 172
}

/**
173 174
 *	kernfs_deactivate - deactivate kernfs_node
 *	@kn: kernfs_node to deactivate
175
 *
176
 *	Deny new active references and drain existing ones.
177
 */
178
static void kernfs_deactivate(struct kernfs_node *kn)
179
{
180 181
	DECLARE_COMPLETION_ONSTACK(wait);
	int v;
182

183 184
	BUG_ON(!(kn->flags & KERNFS_REMOVED));

185 186 187
	if (!(kernfs_type(kn) & KERNFS_ACTIVE_REF))
		return;

188
	kn->u.completion = (void *)&wait;
189

190 191
	if (kn->flags & KERNFS_LOCKDEP)
		rwsem_acquire(&kn->dep_map, 0, 0, _RET_IP_);
192 193 194 195
	/* atomic_add_return() is a mb(), put_active() will always see
	 * the updated kn->u.completion.
	 */
	v = atomic_add_return(KN_DEACTIVATED_BIAS, &kn->active);
196

197
	if (v != KN_DEACTIVATED_BIAS) {
198 199
		if (kn->flags & KERNFS_LOCKDEP)
			lock_contended(&kn->dep_map, _RET_IP_);
200 201
		wait_for_completion(&wait);
	}
202

203 204 205 206
	if (kn->flags & KERNFS_LOCKDEP) {
		lock_acquired(&kn->dep_map, _RET_IP_);
		rwsem_release(&kn->dep_map, 1, _RET_IP_);
	}
207 208 209
}

/**
210 211
 * kernfs_get - get a reference count on a kernfs_node
 * @kn: the target kernfs_node
212
 */
213
void kernfs_get(struct kernfs_node *kn)
214
{
215
	if (kn) {
216 217
		WARN_ON(!atomic_read(&kn->count));
		atomic_inc(&kn->count);
218 219 220 221 222
	}
}
EXPORT_SYMBOL_GPL(kernfs_get);

/**
223 224
 * kernfs_put - put a reference count on a kernfs_node
 * @kn: the target kernfs_node
225
 *
226
 * Put a reference count of @kn and destroy it if it reached zero.
227
 */
228
void kernfs_put(struct kernfs_node *kn)
229
{
230
	struct kernfs_node *parent;
231
	struct kernfs_root *root;
232

233
	if (!kn || !atomic_dec_and_test(&kn->count))
234
		return;
235
	root = kernfs_root(kn);
236
 repeat:
237
	/* Moving/renaming is always done while holding reference.
238
	 * kn->parent won't change beneath us.
239
	 */
240
	parent = kn->parent;
241

242 243
	WARN(!(kn->flags & KERNFS_REMOVED), "kernfs: free using entry: %s/%s\n",
	     parent ? parent->name : "", kn->name);
244

T
Tejun Heo 已提交
245
	if (kernfs_type(kn) == KERNFS_LINK)
246
		kernfs_put(kn->symlink.target_kn);
247
	if (!(kn->flags & KERNFS_STATIC_NAME))
248 249 250 251 252 253
		kfree(kn->name);
	if (kn->iattr) {
		if (kn->iattr->ia_secdata)
			security_release_secctx(kn->iattr->ia_secdata,
						kn->iattr->ia_secdata_len);
		simple_xattrs_free(&kn->iattr->xattrs);
254
	}
255 256
	kfree(kn->iattr);
	ida_simple_remove(&root->ino_ida, kn->ino);
257
	kmem_cache_free(kernfs_node_cache, kn);
258

259 260
	kn = parent;
	if (kn) {
261
		if (atomic_dec_and_test(&kn->count))
262 263
			goto repeat;
	} else {
264
		/* just released the root kn, free @root too */
265
		ida_destroy(&root->ino_ida);
266 267
		kfree(root);
	}
268 269 270
}
EXPORT_SYMBOL_GPL(kernfs_put);

271
static int kernfs_dop_revalidate(struct dentry *dentry, unsigned int flags)
272
{
273
	struct kernfs_node *kn;
274 275 276 277

	if (flags & LOOKUP_RCU)
		return -ECHILD;

T
Tejun Heo 已提交
278 279 280 281
	/* Always perform fresh lookup for negatives */
	if (!dentry->d_inode)
		goto out_bad_unlocked;

282
	kn = dentry->d_fsdata;
283
	mutex_lock(&kernfs_mutex);
284

285 286
	/* The kernfs node has been deleted */
	if (kn->flags & KERNFS_REMOVED)
287 288
		goto out_bad;

289
	/* The kernfs node has been moved? */
290
	if (dentry->d_parent->d_fsdata != kn->parent)
291 292
		goto out_bad;

293
	/* The kernfs node has been renamed */
294
	if (strcmp(dentry->d_name.name, kn->name) != 0)
295 296
		goto out_bad;

297
	/* The kernfs node has been moved to a different namespace */
298
	if (kn->parent && kernfs_ns_enabled(kn->parent) &&
299
	    kernfs_info(dentry->d_sb)->ns != kn->ns)
300 301
		goto out_bad;

302
	mutex_unlock(&kernfs_mutex);
303 304 305
out_valid:
	return 1;
out_bad:
306
	mutex_unlock(&kernfs_mutex);
T
Tejun Heo 已提交
307 308 309 310 311 312 313
out_bad_unlocked:
	/*
	 * @dentry doesn't match the underlying kernfs node, drop the
	 * dentry and force lookup.  If we have submounts we must allow the
	 * vfs caches to lie about the state of the filesystem to prevent
	 * leaks and other nasty things, so use check_submounts_and_drop()
	 * instead of d_drop().
314 315 316 317 318 319 320
	 */
	if (check_submounts_and_drop(dentry) != 0)
		goto out_valid;

	return 0;
}

321
static void kernfs_dop_release(struct dentry *dentry)
322 323 324 325
{
	kernfs_put(dentry->d_fsdata);
}

326
const struct dentry_operations kernfs_dops = {
327 328
	.d_revalidate	= kernfs_dop_revalidate,
	.d_release	= kernfs_dop_release,
329 330
};

331 332 333
static struct kernfs_node *__kernfs_new_node(struct kernfs_root *root,
					     const char *name, umode_t mode,
					     unsigned flags)
334 335
{
	char *dup_name = NULL;
336
	struct kernfs_node *kn;
337
	int ret;
338

339
	if (!(flags & KERNFS_STATIC_NAME)) {
340 341 342 343 344
		name = dup_name = kstrdup(name, GFP_KERNEL);
		if (!name)
			return NULL;
	}

345
	kn = kmem_cache_zalloc(kernfs_node_cache, GFP_KERNEL);
346
	if (!kn)
347 348
		goto err_out1;

349 350
	ret = ida_simple_get(&root->ino_ida, 1, 0, GFP_KERNEL);
	if (ret < 0)
351
		goto err_out2;
352
	kn->ino = ret;
353

354
	atomic_set(&kn->count, 1);
355
	atomic_set(&kn->active, 0);
356

357 358
	kn->name = name;
	kn->mode = mode;
359
	kn->flags = flags | KERNFS_REMOVED;
360

361
	return kn;
362 363

 err_out2:
364
	kmem_cache_free(kernfs_node_cache, kn);
365 366 367 368 369
 err_out1:
	kfree(dup_name);
	return NULL;
}

370 371 372 373 374 375 376 377 378 379 380 381 382 383
struct kernfs_node *kernfs_new_node(struct kernfs_node *parent,
				    const char *name, umode_t mode,
				    unsigned flags)
{
	struct kernfs_node *kn;

	kn = __kernfs_new_node(kernfs_root(parent), name, mode, flags);
	if (kn) {
		kernfs_get(parent);
		kn->parent = parent;
	}
	return kn;
}

384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403
/**
 *	kernfs_addrm_start - prepare for kernfs_node add/remove
 *	@acxt: pointer to kernfs_addrm_cxt to be used
 *
 *	This function is called when the caller is about to add or remove
 *	kernfs_node.  This function acquires kernfs_mutex.  @acxt is used
 *	to keep and pass context to other addrm functions.
 *
 *	LOCKING:
 *	Kernel thread context (may sleep).  kernfs_mutex is locked on
 *	return.
 */
void kernfs_addrm_start(struct kernfs_addrm_cxt *acxt)
	__acquires(kernfs_mutex)
{
	memset(acxt, 0, sizeof(*acxt));

	mutex_lock(&kernfs_mutex);
}

404
/**
405
 *	kernfs_add_one - add kernfs_node to parent without warning
406
 *	@acxt: addrm context to use
407
 *	@kn: kernfs_node to be added
408
 *
409 410 411
 *	The caller must already have initialized @kn->parent.  This
 *	function increments nlink of the parent's inode if @kn is a
 *	directory and link into the children list of the parent.
412
 *
413 414 415 416 417 418 419
 *	This function should be called between calls to
 *	kernfs_addrm_start() and kernfs_addrm_finish() and should be passed
 *	the same @acxt as passed to kernfs_addrm_start().
 *
 *	LOCKING:
 *	Determined by kernfs_addrm_start().
 *
420 421 422 423
 *	RETURNS:
 *	0 on success, -EEXIST if entry with the given name already
 *	exists.
 */
424
int kernfs_add_one(struct kernfs_addrm_cxt *acxt, struct kernfs_node *kn)
425
{
426
	struct kernfs_node *parent = kn->parent;
427
	bool has_ns = kernfs_ns_enabled(parent);
428
	struct kernfs_iattrs *ps_iattr;
429 430
	int ret;

431 432 433 434 435
	if (has_ns != (bool)kn->ns) {
		WARN(1, KERN_WARNING "kernfs: ns %s in '%s' for '%s'\n",
		     has_ns ? "required" : "invalid", parent->name, kn->name);
		return -EINVAL;
	}
436

T
Tejun Heo 已提交
437
	if (kernfs_type(parent) != KERNFS_DIR)
438
		return -EINVAL;
439

440 441 442
	if (parent->flags & KERNFS_REMOVED)
		return -ENOENT;

443
	kn->hash = kernfs_name_hash(kn->name, kn->ns);
444

445
	ret = kernfs_link_sibling(kn);
446
	if (ret)
447
		return ret;
448 449

	/* Update timestamps on the parent */
450
	ps_iattr = parent->iattr;
451 452 453 454 455 456
	if (ps_iattr) {
		struct iattr *ps_iattrs = &ps_iattr->ia_iattr;
		ps_iattrs->ia_ctime = ps_iattrs->ia_mtime = CURRENT_TIME;
	}

	/* Mark the entry added into directory tree */
457 458
	kn->flags &= ~KERNFS_REMOVED;

459 460 461
	return 0;
}

462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485
/**
 *	kernfs_remove_one - remove kernfs_node from parent
 *	@acxt: addrm context to use
 *	@kn: kernfs_node to be removed
 *
 *	Mark @kn removed and drop nlink of parent inode if @kn is a
 *	directory.  @kn is unlinked from the children list.
 *
 *	This function should be called between calls to
 *	kernfs_addrm_start() and kernfs_addrm_finish() and should be
 *	passed the same @acxt as passed to kernfs_addrm_start().
 *
 *	LOCKING:
 *	Determined by kernfs_addrm_start().
 */
static void kernfs_remove_one(struct kernfs_addrm_cxt *acxt,
			      struct kernfs_node *kn)
{
	struct kernfs_iattrs *ps_iattr;

	/*
	 * Removal can be called multiple times on the same node.  Only the
	 * first invocation is effective and puts the base ref.
	 */
486
	if (kn->flags & KERNFS_REMOVED)
487 488 489 490 491 492 493 494 495 496 497 498 499
		return;

	if (kn->parent) {
		kernfs_unlink_sibling(kn);

		/* Update timestamps on the parent */
		ps_iattr = kn->parent->iattr;
		if (ps_iattr) {
			ps_iattr->ia_iattr.ia_ctime = CURRENT_TIME;
			ps_iattr->ia_iattr.ia_mtime = CURRENT_TIME;
		}
	}

500
	kn->flags |= KERNFS_REMOVED;
501 502 503 504
	kn->u.removed_list = acxt->removed;
	acxt->removed = kn;
}

505 506 507 508 509 510 511 512 513 514 515 516 517 518 519
/**
 *	kernfs_addrm_finish - finish up kernfs_node add/remove
 *	@acxt: addrm context to finish up
 *
 *	Finish up kernfs_node add/remove.  Resources acquired by
 *	kernfs_addrm_start() are released and removed kernfs_nodes are
 *	cleaned up.
 *
 *	LOCKING:
 *	kernfs_mutex is released.
 */
void kernfs_addrm_finish(struct kernfs_addrm_cxt *acxt)
	__releases(kernfs_mutex)
{
	/* release resources acquired by kernfs_addrm_start() */
520
	mutex_unlock(&kernfs_mutex);
521 522 523 524 525 526 527

	/* kill removed kernfs_nodes */
	while (acxt->removed) {
		struct kernfs_node *kn = acxt->removed;

		acxt->removed = kn->u.removed_list;

528
		kernfs_deactivate(kn);
529
		kernfs_unmap_bin_file(kn);
530 531
		kernfs_put(kn);
	}
532 533 534
}

/**
535 536
 * kernfs_find_ns - find kernfs_node with the given name
 * @parent: kernfs_node to search under
537 538 539
 * @name: name to look for
 * @ns: the namespace tag to use
 *
540 541
 * Look for kernfs_node with name @name under @parent.  Returns pointer to
 * the found kernfs_node on success, %NULL on failure.
542
 */
543 544 545
static struct kernfs_node *kernfs_find_ns(struct kernfs_node *parent,
					  const unsigned char *name,
					  const void *ns)
546
{
547
	struct rb_node *node = parent->dir.children.rb_node;
548
	bool has_ns = kernfs_ns_enabled(parent);
549 550
	unsigned int hash;

551
	lockdep_assert_held(&kernfs_mutex);
552 553

	if (has_ns != (bool)ns) {
554
		WARN(1, KERN_WARNING "kernfs: ns %s in '%s' for '%s'\n",
555
		     has_ns ? "required" : "invalid", parent->name, name);
556 557 558
		return NULL;
	}

559
	hash = kernfs_name_hash(name, ns);
560
	while (node) {
561
		struct kernfs_node *kn;
562 563
		int result;

564
		kn = rb_to_kn(node);
565
		result = kernfs_name_compare(hash, name, ns, kn);
566 567 568 569 570
		if (result < 0)
			node = node->rb_left;
		else if (result > 0)
			node = node->rb_right;
		else
571
			return kn;
572 573 574 575 576
	}
	return NULL;
}

/**
577 578
 * kernfs_find_and_get_ns - find and get kernfs_node with the given name
 * @parent: kernfs_node to search under
579 580 581
 * @name: name to look for
 * @ns: the namespace tag to use
 *
582
 * Look for kernfs_node with name @name under @parent and get a reference
583
 * if found.  This function may sleep and returns pointer to the found
584
 * kernfs_node on success, %NULL on failure.
585
 */
586 587
struct kernfs_node *kernfs_find_and_get_ns(struct kernfs_node *parent,
					   const char *name, const void *ns)
588
{
589
	struct kernfs_node *kn;
590

591
	mutex_lock(&kernfs_mutex);
592 593
	kn = kernfs_find_ns(parent, name, ns);
	kernfs_get(kn);
594
	mutex_unlock(&kernfs_mutex);
595

596
	return kn;
597 598 599
}
EXPORT_SYMBOL_GPL(kernfs_find_and_get_ns);

600 601
/**
 * kernfs_create_root - create a new kernfs hierarchy
T
Tejun Heo 已提交
602
 * @kdops: optional directory syscall operations for the hierarchy
603 604 605 606 607
 * @priv: opaque data associated with the new directory
 *
 * Returns the root of the new hierarchy on success, ERR_PTR() value on
 * failure.
 */
T
Tejun Heo 已提交
608
struct kernfs_root *kernfs_create_root(struct kernfs_dir_ops *kdops, void *priv)
609 610
{
	struct kernfs_root *root;
611
	struct kernfs_node *kn;
612 613 614 615 616

	root = kzalloc(sizeof(*root), GFP_KERNEL);
	if (!root)
		return ERR_PTR(-ENOMEM);

617 618
	ida_init(&root->ino_ida);

619 620
	kn = __kernfs_new_node(root, "", S_IFDIR | S_IRUGO | S_IXUGO,
			       KERNFS_DIR);
621
	if (!kn) {
622
		ida_destroy(&root->ino_ida);
623 624 625 626
		kfree(root);
		return ERR_PTR(-ENOMEM);
	}

627
	kn->flags &= ~KERNFS_REMOVED;
628
	kn->priv = priv;
629
	kn->dir.root = root;
630

T
Tejun Heo 已提交
631
	root->dir_ops = kdops;
632
	root->kn = kn;
633 634 635 636 637 638 639 640 641 642 643 644 645

	return root;
}

/**
 * kernfs_destroy_root - destroy a kernfs hierarchy
 * @root: root of the hierarchy to destroy
 *
 * Destroy the hierarchy anchored at @root by removing all existing
 * directories and destroying @root.
 */
void kernfs_destroy_root(struct kernfs_root *root)
{
646
	kernfs_remove(root->kn);	/* will also free @root */
647 648
}

649 650 651 652
/**
 * kernfs_create_dir_ns - create a directory
 * @parent: parent in which to create a new directory
 * @name: name of the new directory
653
 * @mode: mode of the new directory
654 655 656 657 658
 * @priv: opaque data associated with the new directory
 * @ns: optional namespace tag of the directory
 *
 * Returns the created node on success, ERR_PTR() value on failure.
 */
659
struct kernfs_node *kernfs_create_dir_ns(struct kernfs_node *parent,
660 661
					 const char *name, umode_t mode,
					 void *priv, const void *ns)
662
{
663
	struct kernfs_addrm_cxt acxt;
664
	struct kernfs_node *kn;
665 666 667
	int rc;

	/* allocate */
668
	kn = kernfs_new_node(parent, name, mode | S_IFDIR, KERNFS_DIR);
669
	if (!kn)
670 671
		return ERR_PTR(-ENOMEM);

672 673
	kn->dir.root = parent->dir.root;
	kn->ns = ns;
674
	kn->priv = priv;
675 676

	/* link in */
677
	kernfs_addrm_start(&acxt);
678
	rc = kernfs_add_one(&acxt, kn);
679
	kernfs_addrm_finish(&acxt);
680

681
	if (!rc)
682
		return kn;
683

684
	kernfs_put(kn);
685 686 687
	return ERR_PTR(rc);
}

688 689 690
static struct dentry *kernfs_iop_lookup(struct inode *dir,
					struct dentry *dentry,
					unsigned int flags)
691
{
T
Tejun Heo 已提交
692
	struct dentry *ret;
693 694
	struct kernfs_node *parent = dentry->d_parent->d_fsdata;
	struct kernfs_node *kn;
695 696 697
	struct inode *inode;
	const void *ns = NULL;

698
	mutex_lock(&kernfs_mutex);
699

700
	if (kernfs_ns_enabled(parent))
701
		ns = kernfs_info(dir->i_sb)->ns;
702

703
	kn = kernfs_find_ns(parent, dentry->d_name.name, ns);
704 705

	/* no such entry */
706
	if (!kn) {
T
Tejun Heo 已提交
707
		ret = NULL;
708 709
		goto out_unlock;
	}
710 711
	kernfs_get(kn);
	dentry->d_fsdata = kn;
712 713

	/* attach dentry and inode */
714
	inode = kernfs_get_inode(dir->i_sb, kn);
715 716 717 718 719 720 721 722
	if (!inode) {
		ret = ERR_PTR(-ENOMEM);
		goto out_unlock;
	}

	/* instantiate and hash dentry */
	ret = d_materialise_unique(dentry, inode);
 out_unlock:
723
	mutex_unlock(&kernfs_mutex);
724 725 726
	return ret;
}

T
Tejun Heo 已提交
727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762
static int kernfs_iop_mkdir(struct inode *dir, struct dentry *dentry,
			    umode_t mode)
{
	struct kernfs_node *parent = dir->i_private;
	struct kernfs_dir_ops *kdops = kernfs_root(parent)->dir_ops;

	if (!kdops || !kdops->mkdir)
		return -EPERM;

	return kdops->mkdir(parent, dentry->d_name.name, mode);
}

static int kernfs_iop_rmdir(struct inode *dir, struct dentry *dentry)
{
	struct kernfs_node *kn  = dentry->d_fsdata;
	struct kernfs_dir_ops *kdops = kernfs_root(kn)->dir_ops;

	if (!kdops || !kdops->rmdir)
		return -EPERM;

	return kdops->rmdir(kn);
}

static int kernfs_iop_rename(struct inode *old_dir, struct dentry *old_dentry,
			     struct inode *new_dir, struct dentry *new_dentry)
{
	struct kernfs_node *kn  = old_dentry->d_fsdata;
	struct kernfs_node *new_parent = new_dir->i_private;
	struct kernfs_dir_ops *kdops = kernfs_root(kn)->dir_ops;

	if (!kdops || !kdops->rename)
		return -EPERM;

	return kdops->rename(kn, new_parent, new_dentry->d_name.name);
}

763
const struct inode_operations kernfs_dir_iops = {
764 765 766 767 768 769 770 771
	.lookup		= kernfs_iop_lookup,
	.permission	= kernfs_iop_permission,
	.setattr	= kernfs_iop_setattr,
	.getattr	= kernfs_iop_getattr,
	.setxattr	= kernfs_iop_setxattr,
	.removexattr	= kernfs_iop_removexattr,
	.getxattr	= kernfs_iop_getxattr,
	.listxattr	= kernfs_iop_listxattr,
T
Tejun Heo 已提交
772 773 774 775

	.mkdir		= kernfs_iop_mkdir,
	.rmdir		= kernfs_iop_rmdir,
	.rename		= kernfs_iop_rename,
776 777
};

778
static struct kernfs_node *kernfs_leftmost_descendant(struct kernfs_node *pos)
779
{
780
	struct kernfs_node *last;
781 782 783 784 785 786

	while (true) {
		struct rb_node *rbn;

		last = pos;

T
Tejun Heo 已提交
787
		if (kernfs_type(pos) != KERNFS_DIR)
788 789
			break;

790
		rbn = rb_first(&pos->dir.children);
791 792 793
		if (!rbn)
			break;

794
		pos = rb_to_kn(rbn);
795 796 797 798 799 800
	}

	return last;
}

/**
801
 * kernfs_next_descendant_post - find the next descendant for post-order walk
802
 * @pos: the current position (%NULL to initiate traversal)
803
 * @root: kernfs_node whose descendants to walk
804 805 806 807 808
 *
 * Find the next descendant to visit for post-order traversal of @root's
 * descendants.  @root is included in the iteration and the last node to be
 * visited.
 */
809 810
static struct kernfs_node *kernfs_next_descendant_post(struct kernfs_node *pos,
						       struct kernfs_node *root)
811 812 813
{
	struct rb_node *rbn;

814
	lockdep_assert_held(&kernfs_mutex);
815 816 817

	/* if first iteration, visit leftmost descendant which may be root */
	if (!pos)
818
		return kernfs_leftmost_descendant(root);
819 820 821 822 823 824

	/* if we visited @root, we're done */
	if (pos == root)
		return NULL;

	/* if there's an unvisited sibling, visit its leftmost descendant */
825
	rbn = rb_next(&pos->rb);
826
	if (rbn)
827
		return kernfs_leftmost_descendant(rb_to_kn(rbn));
828 829

	/* no sibling left, visit parent */
830
	return pos->parent;
831 832
}

833 834
static void __kernfs_remove(struct kernfs_addrm_cxt *acxt,
			    struct kernfs_node *kn)
835
{
836
	struct kernfs_node *pos, *next;
837

838 839 840
	if (!kn)
		return;

841
	pr_debug("kernfs %s: removing\n", kn->name);
842

843
	next = NULL;
844
	do {
845 846 847 848 849
		pos = next;
		next = kernfs_next_descendant_post(pos, kn);
		if (pos)
			kernfs_remove_one(acxt, pos);
	} while (next);
850 851 852
}

/**
853 854
 * kernfs_remove - remove a kernfs_node recursively
 * @kn: the kernfs_node to remove
855
 *
856
 * Remove @kn along with all its subdirectories and files.
857
 */
858
void kernfs_remove(struct kernfs_node *kn)
859
{
860 861 862 863 864
	struct kernfs_addrm_cxt acxt;

	kernfs_addrm_start(&acxt);
	__kernfs_remove(&acxt, kn);
	kernfs_addrm_finish(&acxt);
865 866 867
}

/**
868 869 870 871
 * kernfs_remove_by_name_ns - find a kernfs_node by name and remove it
 * @parent: parent of the target
 * @name: name of the kernfs_node to remove
 * @ns: namespace tag of the kernfs_node to remove
872
 *
873 874
 * Look for the kernfs_node with @name and @ns under @parent and remove it.
 * Returns 0 on success, -ENOENT if such entry doesn't exist.
875
 */
876
int kernfs_remove_by_name_ns(struct kernfs_node *parent, const char *name,
877 878
			     const void *ns)
{
879
	struct kernfs_addrm_cxt acxt;
880
	struct kernfs_node *kn;
881

882
	if (!parent) {
883
		WARN(1, KERN_WARNING "kernfs: can not remove '%s', no directory\n",
884 885 886 887
			name);
		return -ENOENT;
	}

888
	kernfs_addrm_start(&acxt);
889

890 891
	kn = kernfs_find_ns(parent, name, ns);
	if (kn)
892
		__kernfs_remove(&acxt, kn);
893

894
	kernfs_addrm_finish(&acxt);
895

896
	if (kn)
897 898 899 900 901 902 903
		return 0;
	else
		return -ENOENT;
}

/**
 * kernfs_rename_ns - move and rename a kernfs_node
904
 * @kn: target node
905 906 907 908
 * @new_parent: new parent to put @sd under
 * @new_name: new name
 * @new_ns: new namespace tag
 */
909
int kernfs_rename_ns(struct kernfs_node *kn, struct kernfs_node *new_parent,
910 911 912 913
		     const char *new_name, const void *new_ns)
{
	int error;

914 915
	mutex_lock(&kernfs_mutex);

916
	error = -ENOENT;
917
	if ((kn->flags | new_parent->flags) & KERNFS_REMOVED)
918 919
		goto out;

920
	error = 0;
921 922
	if ((kn->parent == new_parent) && (kn->ns == new_ns) &&
	    (strcmp(kn->name, new_name) == 0))
923
		goto out;	/* nothing to rename */
924 925 926

	error = -EEXIST;
	if (kernfs_find_ns(new_parent, new_name, new_ns))
927
		goto out;
928

929
	/* rename kernfs_node */
930
	if (strcmp(kn->name, new_name) != 0) {
931 932 933
		error = -ENOMEM;
		new_name = kstrdup(new_name, GFP_KERNEL);
		if (!new_name)
934
			goto out;
935

936 937 938 939 940
		if (kn->flags & KERNFS_STATIC_NAME)
			kn->flags &= ~KERNFS_STATIC_NAME;
		else
			kfree(kn->name);

941
		kn->name = new_name;
942 943 944 945 946
	}

	/*
	 * Move to the appropriate place in the appropriate directories rbtree.
	 */
947
	kernfs_unlink_sibling(kn);
948
	kernfs_get(new_parent);
949 950
	kernfs_put(kn->parent);
	kn->ns = new_ns;
951
	kn->hash = kernfs_name_hash(kn->name, kn->ns);
952
	kn->parent = new_parent;
953
	kernfs_link_sibling(kn);
954 955

	error = 0;
956
 out:
957
	mutex_unlock(&kernfs_mutex);
958 959 960 961
	return error;
}

/* Relationship between s_mode and the DT_xxx types */
962
static inline unsigned char dt_type(struct kernfs_node *kn)
963
{
964
	return (kn->mode >> 12) & 15;
965 966
}

967
static int kernfs_dir_fop_release(struct inode *inode, struct file *filp)
968 969 970 971 972
{
	kernfs_put(filp->private_data);
	return 0;
}

973
static struct kernfs_node *kernfs_dir_pos(const void *ns,
974
	struct kernfs_node *parent, loff_t hash, struct kernfs_node *pos)
975 976
{
	if (pos) {
977 978
		int valid = !(pos->flags & KERNFS_REMOVED) &&
			pos->parent == parent && hash == pos->hash;
979 980 981 982 983
		kernfs_put(pos);
		if (!valid)
			pos = NULL;
	}
	if (!pos && (hash > 1) && (hash < INT_MAX)) {
984
		struct rb_node *node = parent->dir.children.rb_node;
985
		while (node) {
986
			pos = rb_to_kn(node);
987

988
			if (hash < pos->hash)
989
				node = node->rb_left;
990
			else if (hash > pos->hash)
991 992 993 994 995 996
				node = node->rb_right;
			else
				break;
		}
	}
	/* Skip over entries in the wrong namespace */
997 998
	while (pos && pos->ns != ns) {
		struct rb_node *node = rb_next(&pos->rb);
999 1000 1001
		if (!node)
			pos = NULL;
		else
1002
			pos = rb_to_kn(node);
1003 1004 1005 1006
	}
	return pos;
}

1007
static struct kernfs_node *kernfs_dir_next_pos(const void *ns,
1008
	struct kernfs_node *parent, ino_t ino, struct kernfs_node *pos)
1009
{
1010
	pos = kernfs_dir_pos(ns, parent, ino, pos);
1011 1012
	if (pos)
		do {
1013
			struct rb_node *node = rb_next(&pos->rb);
1014 1015 1016
			if (!node)
				pos = NULL;
			else
1017
				pos = rb_to_kn(node);
1018
		} while (pos && pos->ns != ns);
1019 1020 1021
	return pos;
}

1022
static int kernfs_fop_readdir(struct file *file, struct dir_context *ctx)
1023 1024
{
	struct dentry *dentry = file->f_path.dentry;
1025 1026
	struct kernfs_node *parent = dentry->d_fsdata;
	struct kernfs_node *pos = file->private_data;
1027 1028 1029 1030
	const void *ns = NULL;

	if (!dir_emit_dots(file, ctx))
		return 0;
1031
	mutex_lock(&kernfs_mutex);
1032

1033
	if (kernfs_ns_enabled(parent))
1034
		ns = kernfs_info(dentry->d_sb)->ns;
1035

1036
	for (pos = kernfs_dir_pos(ns, parent, ctx->pos, pos);
1037
	     pos;
1038
	     pos = kernfs_dir_next_pos(ns, parent, ctx->pos, pos)) {
1039
		const char *name = pos->name;
1040 1041
		unsigned int type = dt_type(pos);
		int len = strlen(name);
1042
		ino_t ino = pos->ino;
1043

1044
		ctx->pos = pos->hash;
1045 1046 1047
		file->private_data = pos;
		kernfs_get(pos);

1048
		mutex_unlock(&kernfs_mutex);
1049 1050
		if (!dir_emit(ctx, name, len, ino, type))
			return 0;
1051
		mutex_lock(&kernfs_mutex);
1052
	}
1053
	mutex_unlock(&kernfs_mutex);
1054 1055 1056 1057 1058
	file->private_data = NULL;
	ctx->pos = INT_MAX;
	return 0;
}

1059 1060
static loff_t kernfs_dir_fop_llseek(struct file *file, loff_t offset,
				    int whence)
1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071
{
	struct inode *inode = file_inode(file);
	loff_t ret;

	mutex_lock(&inode->i_mutex);
	ret = generic_file_llseek(file, offset, whence);
	mutex_unlock(&inode->i_mutex);

	return ret;
}

1072
const struct file_operations kernfs_dir_fops = {
1073
	.read		= generic_read_dir,
1074 1075 1076
	.iterate	= kernfs_fop_readdir,
	.release	= kernfs_dir_fop_release,
	.llseek		= kernfs_dir_fop_llseek,
1077
};