dir.c 23.8 KB
Newer Older
1 2 3 4 5 6 7 8 9
/*
 * fs/kernfs/dir.c - kernfs directory implementation
 *
 * Copyright (c) 2001-3 Patrick Mochel
 * Copyright (c) 2007 SUSE Linux Products GmbH
 * Copyright (c) 2007, 2013 Tejun Heo <tj@kernel.org>
 *
 * This file is released under the GPLv2.
 */
10 11 12 13 14 15 16 17 18 19 20 21

#include <linux/fs.h>
#include <linux/namei.h>
#include <linux/idr.h>
#include <linux/slab.h>
#include <linux/security.h>
#include <linux/hash.h>

#include "kernfs-internal.h"

DEFINE_MUTEX(sysfs_mutex);

22
#define rb_to_kn(X) rb_entry((X), struct kernfs_node, s_rb)
23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47

/**
 *	sysfs_name_hash
 *	@name: Null terminated string to hash
 *	@ns:   Namespace tag to hash
 *
 *	Returns 31 bit hash of ns + name (so it fits in an off_t )
 */
static unsigned int sysfs_name_hash(const char *name, const void *ns)
{
	unsigned long hash = init_name_hash();
	unsigned int len = strlen(name);
	while (len--)
		hash = partial_name_hash(*name++, hash);
	hash = (end_name_hash(hash) ^ hash_ptr((void *)ns, 31));
	hash &= 0x7fffffffU;
	/* Reserve hash numbers 0, 1 and INT_MAX for magic directory entries */
	if (hash < 1)
		hash += 2;
	if (hash >= INT_MAX)
		hash = INT_MAX - 1;
	return hash;
}

static int sysfs_name_compare(unsigned int hash, const char *name,
48
			      const void *ns, const struct kernfs_node *kn)
49
{
50 51 52 53 54
	if (hash != kn->s_hash)
		return hash - kn->s_hash;
	if (ns != kn->s_ns)
		return ns - kn->s_ns;
	return strcmp(name, kn->s_name);
55 56
}

57 58
static int sysfs_sd_compare(const struct kernfs_node *left,
			    const struct kernfs_node *right)
59 60 61 62 63 64
{
	return sysfs_name_compare(left->s_hash, left->s_name, left->s_ns,
				  right);
}

/**
65 66
 *	sysfs_link_sibling - link kernfs_node into sibling rbtree
 *	@kn: kernfs_node of interest
67
 *
68 69
 *	Link @kn into its sibling rbtree which starts from
 *	@kn->s_parent->s_dir.children.
70 71 72 73 74 75 76
 *
 *	Locking:
 *	mutex_lock(sysfs_mutex)
 *
 *	RETURNS:
 *	0 on susccess -EEXIST on failure.
 */
77
static int sysfs_link_sibling(struct kernfs_node *kn)
78
{
79
	struct rb_node **node = &kn->s_parent->s_dir.children.rb_node;
80 81
	struct rb_node *parent = NULL;

82 83
	if (sysfs_type(kn) == SYSFS_DIR)
		kn->s_parent->s_dir.subdirs++;
84 85

	while (*node) {
86
		struct kernfs_node *pos;
87 88
		int result;

89
		pos = rb_to_kn(*node);
90
		parent = *node;
91
		result = sysfs_sd_compare(kn, pos);
92 93 94 95 96 97 98 99
		if (result < 0)
			node = &pos->s_rb.rb_left;
		else if (result > 0)
			node = &pos->s_rb.rb_right;
		else
			return -EEXIST;
	}
	/* add new node and rebalance the tree */
100 101
	rb_link_node(&kn->s_rb, parent, node);
	rb_insert_color(&kn->s_rb, &kn->s_parent->s_dir.children);
102 103 104 105
	return 0;
}

/**
106 107
 *	sysfs_unlink_sibling - unlink kernfs_node from sibling rbtree
 *	@kn: kernfs_node of interest
108
 *
109 110
 *	Unlink @kn from its sibling rbtree which starts from
 *	kn->s_parent->s_dir.children.
111 112 113 114
 *
 *	Locking:
 *	mutex_lock(sysfs_mutex)
 */
115
static void sysfs_unlink_sibling(struct kernfs_node *kn)
116
{
117 118
	if (sysfs_type(kn) == SYSFS_DIR)
		kn->s_parent->s_dir.subdirs--;
119

120
	rb_erase(&kn->s_rb, &kn->s_parent->s_dir.children);
121 122 123
}

/**
124 125
 *	sysfs_get_active - get an active reference to kernfs_node
 *	@kn: kernfs_node to get an active reference to
126
 *
127
 *	Get an active reference of @kn.  This function is noop if @kn
128 129 130
 *	is NULL.
 *
 *	RETURNS:
131
 *	Pointer to @kn on success, NULL on failure.
132
 */
133
struct kernfs_node *sysfs_get_active(struct kernfs_node *kn)
134
{
135
	if (unlikely(!kn))
136 137
		return NULL;

138
	if (!atomic_inc_unless_negative(&kn->s_active))
139 140
		return NULL;

141 142 143
	if (kn->s_flags & SYSFS_FLAG_LOCKDEP)
		rwsem_acquire_read(&kn->dep_map, 0, 1, _RET_IP_);
	return kn;
144 145 146
}

/**
147 148
 *	sysfs_put_active - put an active reference to kernfs_node
 *	@kn: kernfs_node to put an active reference to
149
 *
150
 *	Put an active reference to @kn.  This function is noop if @kn
151 152
 *	is NULL.
 */
153
void sysfs_put_active(struct kernfs_node *kn)
154 155 156
{
	int v;

157
	if (unlikely(!kn))
158 159
		return;

160 161 162
	if (kn->s_flags & SYSFS_FLAG_LOCKDEP)
		rwsem_release(&kn->dep_map, 1, _RET_IP_);
	v = atomic_dec_return(&kn->s_active);
163 164 165
	if (likely(v != SD_DEACTIVATED_BIAS))
		return;

166 167 168
	/*
	 * atomic_dec_return() is a mb(), we'll always see the updated
	 * kn->u.completion.
169
	 */
170
	complete(kn->u.completion);
171 172 173
}

/**
174 175
 *	sysfs_deactivate - deactivate kernfs_node
 *	@kn: kernfs_node to deactivate
176 177 178
 *
 *	Deny new active references and drain existing ones.
 */
179
static void sysfs_deactivate(struct kernfs_node *kn)
180 181 182 183
{
	DECLARE_COMPLETION_ONSTACK(wait);
	int v;

184
	BUG_ON(!(kn->s_flags & SYSFS_FLAG_REMOVED));
185

186
	if (!(sysfs_type(kn) & SYSFS_ACTIVE_REF))
187 188
		return;

189
	kn->u.completion = (void *)&wait;
190

191
	rwsem_acquire(&kn->dep_map, 0, 0, _RET_IP_);
192
	/* atomic_add_return() is a mb(), put_active() will always see
193
	 * the updated kn->u.completion.
194
	 */
195
	v = atomic_add_return(SD_DEACTIVATED_BIAS, &kn->s_active);
196 197

	if (v != SD_DEACTIVATED_BIAS) {
198
		lock_contended(&kn->dep_map, _RET_IP_);
199 200 201
		wait_for_completion(&wait);
	}

202 203
	lock_acquired(&kn->dep_map, _RET_IP_);
	rwsem_release(&kn->dep_map, 1, _RET_IP_);
204 205 206
}

/**
207 208
 * kernfs_get - get a reference count on a kernfs_node
 * @kn: the target kernfs_node
209
 */
210
void kernfs_get(struct kernfs_node *kn)
211
{
212 213 214
	if (kn) {
		WARN_ON(!atomic_read(&kn->s_count));
		atomic_inc(&kn->s_count);
215 216 217 218 219
	}
}
EXPORT_SYMBOL_GPL(kernfs_get);

/**
220 221
 * kernfs_put - put a reference count on a kernfs_node
 * @kn: the target kernfs_node
222
 *
223
 * Put a reference count of @kn and destroy it if it reached zero.
224
 */
225
void kernfs_put(struct kernfs_node *kn)
226
{
227
	struct kernfs_node *parent;
228
	struct kernfs_root *root;
229

230
	if (!kn || !atomic_dec_and_test(&kn->s_count))
231
		return;
232
	root = kernfs_root(kn);
233 234
 repeat:
	/* Moving/renaming is always done while holding reference.
235
	 * kn->s_parent won't change beneath us.
236
	 */
237
	parent = kn->s_parent;
238

239
	WARN(!(kn->s_flags & SYSFS_FLAG_REMOVED),
240
		"sysfs: free using entry: %s/%s\n",
241 242 243 244 245 246 247 248 249 250 251
		parent ? parent->s_name : "", kn->s_name);

	if (sysfs_type(kn) == SYSFS_KOBJ_LINK)
		kernfs_put(kn->s_symlink.target_kn);
	if (sysfs_type(kn) & SYSFS_COPY_NAME)
		kfree(kn->s_name);
	if (kn->s_iattr) {
		if (kn->s_iattr->ia_secdata)
			security_release_secctx(kn->s_iattr->ia_secdata,
						kn->s_iattr->ia_secdata_len);
		simple_xattrs_free(&kn->s_iattr->xattrs);
252
	}
253 254 255
	kfree(kn->s_iattr);
	ida_simple_remove(&root->ino_ida, kn->s_ino);
	kmem_cache_free(sysfs_dir_cachep, kn);
256

257 258 259
	kn = parent;
	if (kn) {
		if (atomic_dec_and_test(&kn->s_count))
260 261
			goto repeat;
	} else {
262
		/* just released the root kn, free @root too */
263
		ida_destroy(&root->ino_ida);
264 265
		kfree(root);
	}
266 267 268 269 270
}
EXPORT_SYMBOL_GPL(kernfs_put);

static int sysfs_dentry_delete(const struct dentry *dentry)
{
271 272
	struct kernfs_node *kn = dentry->d_fsdata;
	return !(kn && !(kn->s_flags & SYSFS_FLAG_REMOVED));
273 274 275 276
}

static int sysfs_dentry_revalidate(struct dentry *dentry, unsigned int flags)
{
277
	struct kernfs_node *kn;
278 279 280 281

	if (flags & LOOKUP_RCU)
		return -ECHILD;

282
	kn = dentry->d_fsdata;
283 284 285
	mutex_lock(&sysfs_mutex);

	/* The sysfs dirent has been deleted */
286
	if (kn->s_flags & SYSFS_FLAG_REMOVED)
287 288 289
		goto out_bad;

	/* The sysfs dirent has been moved? */
290
	if (dentry->d_parent->d_fsdata != kn->s_parent)
291 292 293
		goto out_bad;

	/* The sysfs dirent has been renamed */
294
	if (strcmp(dentry->d_name.name, kn->s_name) != 0)
295 296 297
		goto out_bad;

	/* The sysfs dirent has been moved to a different namespace */
298 299
	if (kn->s_parent && kernfs_ns_enabled(kn->s_parent) &&
	    sysfs_info(dentry->d_sb)->ns != kn->s_ns)
300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338
		goto out_bad;

	mutex_unlock(&sysfs_mutex);
out_valid:
	return 1;
out_bad:
	/* Remove the dentry from the dcache hashes.
	 * If this is a deleted dentry we use d_drop instead of d_delete
	 * so sysfs doesn't need to cope with negative dentries.
	 *
	 * If this is a dentry that has simply been renamed we
	 * use d_drop to remove it from the dcache lookup on its
	 * old parent.  If this dentry persists later when a lookup
	 * is performed at its new name the dentry will be readded
	 * to the dcache hashes.
	 */
	mutex_unlock(&sysfs_mutex);

	/* If we have submounts we must allow the vfs caches
	 * to lie about the state of the filesystem to prevent
	 * leaks and other nasty things.
	 */
	if (check_submounts_and_drop(dentry) != 0)
		goto out_valid;

	return 0;
}

static void sysfs_dentry_release(struct dentry *dentry)
{
	kernfs_put(dentry->d_fsdata);
}

const struct dentry_operations sysfs_dentry_ops = {
	.d_revalidate	= sysfs_dentry_revalidate,
	.d_delete	= sysfs_dentry_delete,
	.d_release	= sysfs_dentry_release,
};

339 340
struct kernfs_node *sysfs_new_dirent(struct kernfs_root *root,
				     const char *name, umode_t mode, int type)
341 342
{
	char *dup_name = NULL;
343
	struct kernfs_node *kn;
344
	int ret;
345 346 347 348 349 350 351

	if (type & SYSFS_COPY_NAME) {
		name = dup_name = kstrdup(name, GFP_KERNEL);
		if (!name)
			return NULL;
	}

352 353
	kn = kmem_cache_zalloc(sysfs_dir_cachep, GFP_KERNEL);
	if (!kn)
354 355
		goto err_out1;

356 357
	ret = ida_simple_get(&root->ino_ida, 1, 0, GFP_KERNEL);
	if (ret < 0)
358
		goto err_out2;
359
	kn->s_ino = ret;
360

361 362
	atomic_set(&kn->s_count, 1);
	atomic_set(&kn->s_active, 0);
363

364 365 366
	kn->s_name = name;
	kn->s_mode = mode;
	kn->s_flags = type | SYSFS_FLAG_REMOVED;
367

368
	return kn;
369 370

 err_out2:
371
	kmem_cache_free(sysfs_dir_cachep, kn);
372 373 374 375 376 377
 err_out1:
	kfree(dup_name);
	return NULL;
}

/**
378
 *	sysfs_addrm_start - prepare for kernfs_node add/remove
379 380 381
 *	@acxt: pointer to sysfs_addrm_cxt to be used
 *
 *	This function is called when the caller is about to add or remove
382 383
 *	kernfs_node.  This function acquires sysfs_mutex.  @acxt is used to
 *	keep and pass context to other addrm functions.
384 385 386 387 388 389 390 391 392 393 394 395 396 397
 *
 *	LOCKING:
 *	Kernel thread context (may sleep).  sysfs_mutex is locked on
 *	return.
 */
void sysfs_addrm_start(struct sysfs_addrm_cxt *acxt)
	__acquires(sysfs_mutex)
{
	memset(acxt, 0, sizeof(*acxt));

	mutex_lock(&sysfs_mutex);
}

/**
398
 *	sysfs_add_one - add kernfs_node to parent without warning
399
 *	@acxt: addrm context to use
400 401
 *	@kn: kernfs_node to be added
 *	@parent: the parent kernfs_node to add @kn to
402
 *
403 404
 *	Get @parent and set @kn->s_parent to it and increment nlink of
 *	the parent inode if @kn is a directory and link into the children
405 406 407 408 409 410 411 412 413 414 415 416 417
 *	list of the parent.
 *
 *	This function should be called between calls to
 *	sysfs_addrm_start() and sysfs_addrm_finish() and should be
 *	passed the same @acxt as passed to sysfs_addrm_start().
 *
 *	LOCKING:
 *	Determined by sysfs_addrm_start().
 *
 *	RETURNS:
 *	0 on success, -EEXIST if entry with the given name already
 *	exists.
 */
418 419
int sysfs_add_one(struct sysfs_addrm_cxt *acxt, struct kernfs_node *kn,
		  struct kernfs_node *parent)
420
{
421
	bool has_ns = kernfs_ns_enabled(parent);
422 423 424
	struct sysfs_inode_attrs *ps_iattr;
	int ret;

425
	if (has_ns != (bool)kn->s_ns) {
426 427
		WARN(1, KERN_WARNING "sysfs: ns %s in '%s' for '%s'\n",
		     has_ns ? "required" : "invalid",
428
		     parent->s_name, kn->s_name);
429 430 431
		return -EINVAL;
	}

432
	if (sysfs_type(parent) != SYSFS_DIR)
433 434
		return -EINVAL;

435 436 437
	kn->s_hash = sysfs_name_hash(kn->s_name, kn->s_ns);
	kn->s_parent = parent;
	kernfs_get(parent);
438

439
	ret = sysfs_link_sibling(kn);
440 441 442 443
	if (ret)
		return ret;

	/* Update timestamps on the parent */
444
	ps_iattr = parent->s_iattr;
445 446 447 448 449 450
	if (ps_iattr) {
		struct iattr *ps_iattrs = &ps_iattr->ia_iattr;
		ps_iattrs->ia_ctime = ps_iattrs->ia_mtime = CURRENT_TIME;
	}

	/* Mark the entry added into directory tree */
451
	kn->s_flags &= ~SYSFS_FLAG_REMOVED;
452 453 454 455 456

	return 0;
}

/**
457
 *	sysfs_remove_one - remove kernfs_node from parent
458
 *	@acxt: addrm context to use
459
 *	@kn: kernfs_node to be removed
460
 *
461 462
 *	Mark @kn removed and drop nlink of parent inode if @kn is a
 *	directory.  @kn is unlinked from the children list.
463 464 465 466 467 468 469 470 471
 *
 *	This function should be called between calls to
 *	sysfs_addrm_start() and sysfs_addrm_finish() and should be
 *	passed the same @acxt as passed to sysfs_addrm_start().
 *
 *	LOCKING:
 *	Determined by sysfs_addrm_start().
 */
static void sysfs_remove_one(struct sysfs_addrm_cxt *acxt,
472
			     struct kernfs_node *kn)
473 474 475 476 477 478 479
{
	struct sysfs_inode_attrs *ps_iattr;

	/*
	 * Removal can be called multiple times on the same node.  Only the
	 * first invocation is effective and puts the base ref.
	 */
480
	if (kn->s_flags & SYSFS_FLAG_REMOVED)
481 482
		return;

483 484
	if (kn->s_parent) {
		sysfs_unlink_sibling(kn);
485

486
		/* Update timestamps on the parent */
487
		ps_iattr = kn->s_parent->s_iattr;
488 489 490 491
		if (ps_iattr) {
			ps_iattr->ia_iattr.ia_ctime = CURRENT_TIME;
			ps_iattr->ia_iattr.ia_mtime = CURRENT_TIME;
		}
492 493
	}

494 495 496
	kn->s_flags |= SYSFS_FLAG_REMOVED;
	kn->u.removed_list = acxt->removed;
	acxt->removed = kn;
497 498 499
}

/**
500
 *	sysfs_addrm_finish - finish up kernfs_node add/remove
501 502
 *	@acxt: addrm context to finish up
 *
503 504
 *	Finish up kernfs_node add/remove.  Resources acquired by
 *	sysfs_addrm_start() are released and removed kernfs_nodes are
505 506 507 508 509 510 511 512 513 514 515
 *	cleaned up.
 *
 *	LOCKING:
 *	sysfs_mutex is released.
 */
void sysfs_addrm_finish(struct sysfs_addrm_cxt *acxt)
	__releases(sysfs_mutex)
{
	/* release resources acquired by sysfs_addrm_start() */
	mutex_unlock(&sysfs_mutex);

516
	/* kill removed kernfs_nodes */
517
	while (acxt->removed) {
518
		struct kernfs_node *kn = acxt->removed;
519

520
		acxt->removed = kn->u.removed_list;
521

522 523 524
		sysfs_deactivate(kn);
		sysfs_unmap_bin_file(kn);
		kernfs_put(kn);
525 526 527 528
	}
}

/**
529 530
 * kernfs_find_ns - find kernfs_node with the given name
 * @parent: kernfs_node to search under
531 532 533
 * @name: name to look for
 * @ns: the namespace tag to use
 *
534 535
 * Look for kernfs_node with name @name under @parent.  Returns pointer to
 * the found kernfs_node on success, %NULL on failure.
536
 */
537 538 539
static struct kernfs_node *kernfs_find_ns(struct kernfs_node *parent,
					  const unsigned char *name,
					  const void *ns)
540 541
{
	struct rb_node *node = parent->s_dir.children.rb_node;
542
	bool has_ns = kernfs_ns_enabled(parent);
543 544 545 546 547 548 549 550 551 552 553 554 555
	unsigned int hash;

	lockdep_assert_held(&sysfs_mutex);

	if (has_ns != (bool)ns) {
		WARN(1, KERN_WARNING "sysfs: ns %s in '%s' for '%s'\n",
		     has_ns ? "required" : "invalid",
		     parent->s_name, name);
		return NULL;
	}

	hash = sysfs_name_hash(name, ns);
	while (node) {
556
		struct kernfs_node *kn;
557 558
		int result;

559 560
		kn = rb_to_kn(node);
		result = sysfs_name_compare(hash, name, ns, kn);
561 562 563 564 565
		if (result < 0)
			node = node->rb_left;
		else if (result > 0)
			node = node->rb_right;
		else
566
			return kn;
567 568 569 570 571
	}
	return NULL;
}

/**
572 573
 * kernfs_find_and_get_ns - find and get kernfs_node with the given name
 * @parent: kernfs_node to search under
574 575 576
 * @name: name to look for
 * @ns: the namespace tag to use
 *
577
 * Look for kernfs_node with name @name under @parent and get a reference
578
 * if found.  This function may sleep and returns pointer to the found
579
 * kernfs_node on success, %NULL on failure.
580
 */
581 582
struct kernfs_node *kernfs_find_and_get_ns(struct kernfs_node *parent,
					   const char *name, const void *ns)
583
{
584
	struct kernfs_node *kn;
585 586

	mutex_lock(&sysfs_mutex);
587 588
	kn = kernfs_find_ns(parent, name, ns);
	kernfs_get(kn);
589 590
	mutex_unlock(&sysfs_mutex);

591
	return kn;
592 593 594
}
EXPORT_SYMBOL_GPL(kernfs_find_and_get_ns);

595 596 597 598 599 600 601 602 603 604
/**
 * kernfs_create_root - create a new kernfs hierarchy
 * @priv: opaque data associated with the new directory
 *
 * Returns the root of the new hierarchy on success, ERR_PTR() value on
 * failure.
 */
struct kernfs_root *kernfs_create_root(void *priv)
{
	struct kernfs_root *root;
605
	struct kernfs_node *kn;
606 607 608 609 610

	root = kzalloc(sizeof(*root), GFP_KERNEL);
	if (!root)
		return ERR_PTR(-ENOMEM);

611 612
	ida_init(&root->ino_ida);

613 614
	kn = sysfs_new_dirent(root, "", S_IFDIR | S_IRUGO | S_IXUGO, SYSFS_DIR);
	if (!kn) {
615
		ida_destroy(&root->ino_ida);
616 617 618 619
		kfree(root);
		return ERR_PTR(-ENOMEM);
	}

620 621 622
	kn->s_flags &= ~SYSFS_FLAG_REMOVED;
	kn->priv = priv;
	kn->s_dir.root = root;
623

624
	root->kn = kn;
625 626 627 628 629 630 631 632 633 634 635 636 637

	return root;
}

/**
 * kernfs_destroy_root - destroy a kernfs hierarchy
 * @root: root of the hierarchy to destroy
 *
 * Destroy the hierarchy anchored at @root by removing all existing
 * directories and destroying @root.
 */
void kernfs_destroy_root(struct kernfs_root *root)
{
638
	kernfs_remove(root->kn);	/* will also free @root */
639 640
}

641 642 643 644 645 646 647 648 649
/**
 * kernfs_create_dir_ns - create a directory
 * @parent: parent in which to create a new directory
 * @name: name of the new directory
 * @priv: opaque data associated with the new directory
 * @ns: optional namespace tag of the directory
 *
 * Returns the created node on success, ERR_PTR() value on failure.
 */
650 651 652
struct kernfs_node *kernfs_create_dir_ns(struct kernfs_node *parent,
					 const char *name, void *priv,
					 const void *ns)
653 654 655
{
	umode_t mode = S_IFDIR | S_IRWXU | S_IRUGO | S_IXUGO;
	struct sysfs_addrm_cxt acxt;
656
	struct kernfs_node *kn;
657 658 659
	int rc;

	/* allocate */
660 661
	kn = sysfs_new_dirent(kernfs_root(parent), name, mode, SYSFS_DIR);
	if (!kn)
662 663
		return ERR_PTR(-ENOMEM);

664 665 666
	kn->s_dir.root = parent->s_dir.root;
	kn->s_ns = ns;
	kn->priv = priv;
667 668 669

	/* link in */
	sysfs_addrm_start(&acxt);
670
	rc = sysfs_add_one(&acxt, kn, parent);
671 672 673
	sysfs_addrm_finish(&acxt);

	if (!rc)
674
		return kn;
675

676
	kernfs_put(kn);
677 678 679 680 681 682 683
	return ERR_PTR(rc);
}

static struct dentry *sysfs_lookup(struct inode *dir, struct dentry *dentry,
				   unsigned int flags)
{
	struct dentry *ret = NULL;
684 685
	struct kernfs_node *parent = dentry->d_parent->d_fsdata;
	struct kernfs_node *kn;
686 687 688 689 690
	struct inode *inode;
	const void *ns = NULL;

	mutex_lock(&sysfs_mutex);

691
	if (kernfs_ns_enabled(parent))
692 693
		ns = sysfs_info(dir->i_sb)->ns;

694
	kn = kernfs_find_ns(parent, dentry->d_name.name, ns);
695 696

	/* no such entry */
697
	if (!kn) {
698 699 700
		ret = ERR_PTR(-ENOENT);
		goto out_unlock;
	}
701 702
	kernfs_get(kn);
	dentry->d_fsdata = kn;
703 704

	/* attach dentry and inode */
705
	inode = sysfs_get_inode(dir->i_sb, kn);
706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723
	if (!inode) {
		ret = ERR_PTR(-ENOMEM);
		goto out_unlock;
	}

	/* instantiate and hash dentry */
	ret = d_materialise_unique(dentry, inode);
 out_unlock:
	mutex_unlock(&sysfs_mutex);
	return ret;
}

const struct inode_operations sysfs_dir_inode_operations = {
	.lookup		= sysfs_lookup,
	.permission	= sysfs_permission,
	.setattr	= sysfs_setattr,
	.getattr	= sysfs_getattr,
	.setxattr	= sysfs_setxattr,
724 725 726
	.removexattr	= sysfs_removexattr,
	.getxattr	= sysfs_getxattr,
	.listxattr	= sysfs_listxattr,
727 728
};

729
static struct kernfs_node *sysfs_leftmost_descendant(struct kernfs_node *pos)
730
{
731
	struct kernfs_node *last;
732 733 734 735 736 737 738 739 740 741 742 743 744

	while (true) {
		struct rb_node *rbn;

		last = pos;

		if (sysfs_type(pos) != SYSFS_DIR)
			break;

		rbn = rb_first(&pos->s_dir.children);
		if (!rbn)
			break;

745
		pos = rb_to_kn(rbn);
746 747 748 749 750 751 752 753
	}

	return last;
}

/**
 * sysfs_next_descendant_post - find the next descendant for post-order walk
 * @pos: the current position (%NULL to initiate traversal)
754
 * @root: kernfs_node whose descendants to walk
755 756 757 758 759
 *
 * Find the next descendant to visit for post-order traversal of @root's
 * descendants.  @root is included in the iteration and the last node to be
 * visited.
 */
760 761
static struct kernfs_node *sysfs_next_descendant_post(struct kernfs_node *pos,
						      struct kernfs_node *root)
762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777
{
	struct rb_node *rbn;

	lockdep_assert_held(&sysfs_mutex);

	/* if first iteration, visit leftmost descendant which may be root */
	if (!pos)
		return sysfs_leftmost_descendant(root);

	/* if we visited @root, we're done */
	if (pos == root)
		return NULL;

	/* if there's an unvisited sibling, visit its leftmost descendant */
	rbn = rb_next(&pos->s_rb);
	if (rbn)
778
		return sysfs_leftmost_descendant(rb_to_kn(rbn));
779 780 781 782 783 784

	/* no sibling left, visit parent */
	return pos->s_parent;
}

static void __kernfs_remove(struct sysfs_addrm_cxt *acxt,
785
			    struct kernfs_node *kn)
786
{
787
	struct kernfs_node *pos, *next;
788

789
	if (!kn)
790 791
		return;

792
	pr_debug("sysfs %s: removing\n", kn->s_name);
793 794 795 796

	next = NULL;
	do {
		pos = next;
797
		next = sysfs_next_descendant_post(pos, kn);
798 799 800 801 802 803
		if (pos)
			sysfs_remove_one(acxt, pos);
	} while (next);
}

/**
804 805
 * kernfs_remove - remove a kernfs_node recursively
 * @kn: the kernfs_node to remove
806
 *
807
 * Remove @kn along with all its subdirectories and files.
808
 */
809
void kernfs_remove(struct kernfs_node *kn)
810 811 812 813
{
	struct sysfs_addrm_cxt acxt;

	sysfs_addrm_start(&acxt);
814
	__kernfs_remove(&acxt, kn);
815 816 817 818
	sysfs_addrm_finish(&acxt);
}

/**
819 820 821 822
 * kernfs_remove_by_name_ns - find a kernfs_node by name and remove it
 * @parent: parent of the target
 * @name: name of the kernfs_node to remove
 * @ns: namespace tag of the kernfs_node to remove
823
 *
824 825
 * Look for the kernfs_node with @name and @ns under @parent and remove it.
 * Returns 0 on success, -ENOENT if such entry doesn't exist.
826
 */
827
int kernfs_remove_by_name_ns(struct kernfs_node *parent, const char *name,
828 829 830
			     const void *ns)
{
	struct sysfs_addrm_cxt acxt;
831
	struct kernfs_node *kn;
832

833
	if (!parent) {
834 835 836 837 838 839 840
		WARN(1, KERN_WARNING "sysfs: can not remove '%s', no directory\n",
			name);
		return -ENOENT;
	}

	sysfs_addrm_start(&acxt);

841 842 843
	kn = kernfs_find_ns(parent, name, ns);
	if (kn)
		__kernfs_remove(&acxt, kn);
844 845 846

	sysfs_addrm_finish(&acxt);

847
	if (kn)
848 849 850 851 852 853 854
		return 0;
	else
		return -ENOENT;
}

/**
 * kernfs_rename_ns - move and rename a kernfs_node
855
 * @kn: target node
856 857 858 859
 * @new_parent: new parent to put @sd under
 * @new_name: new name
 * @new_ns: new namespace tag
 */
860
int kernfs_rename_ns(struct kernfs_node *kn, struct kernfs_node *new_parent,
861 862 863 864 865 866 867
		     const char *new_name, const void *new_ns)
{
	int error;

	mutex_lock(&sysfs_mutex);

	error = 0;
868 869
	if ((kn->s_parent == new_parent) && (kn->s_ns == new_ns) &&
	    (strcmp(kn->s_name, new_name) == 0))
870 871 872 873 874 875
		goto out;	/* nothing to rename */

	error = -EEXIST;
	if (kernfs_find_ns(new_parent, new_name, new_ns))
		goto out;

876 877
	/* rename kernfs_node */
	if (strcmp(kn->s_name, new_name) != 0) {
878 879 880 881 882
		error = -ENOMEM;
		new_name = kstrdup(new_name, GFP_KERNEL);
		if (!new_name)
			goto out;

883 884
		kfree(kn->s_name);
		kn->s_name = new_name;
885 886 887 888 889
	}

	/*
	 * Move to the appropriate place in the appropriate directories rbtree.
	 */
890
	sysfs_unlink_sibling(kn);
891
	kernfs_get(new_parent);
892 893 894 895 896
	kernfs_put(kn->s_parent);
	kn->s_ns = new_ns;
	kn->s_hash = sysfs_name_hash(kn->s_name, kn->s_ns);
	kn->s_parent = new_parent;
	sysfs_link_sibling(kn);
897 898 899 900 901 902 903 904

	error = 0;
 out:
	mutex_unlock(&sysfs_mutex);
	return error;
}

/* Relationship between s_mode and the DT_xxx types */
905
static inline unsigned char dt_type(struct kernfs_node *kn)
906
{
907
	return (kn->s_mode >> 12) & 15;
908 909 910 911 912 913 914 915
}

static int sysfs_dir_release(struct inode *inode, struct file *filp)
{
	kernfs_put(filp->private_data);
	return 0;
}

916 917
static struct kernfs_node *sysfs_dir_pos(const void *ns,
	struct kernfs_node *parent, loff_t hash, struct kernfs_node *pos)
918 919 920
{
	if (pos) {
		int valid = !(pos->s_flags & SYSFS_FLAG_REMOVED) &&
921
			pos->s_parent == parent &&
922 923 924 925 926 927
			hash == pos->s_hash;
		kernfs_put(pos);
		if (!valid)
			pos = NULL;
	}
	if (!pos && (hash > 1) && (hash < INT_MAX)) {
928
		struct rb_node *node = parent->s_dir.children.rb_node;
929
		while (node) {
930
			pos = rb_to_kn(node);
931 932 933 934 935 936 937 938 939 940 941 942 943 944 945

			if (hash < pos->s_hash)
				node = node->rb_left;
			else if (hash > pos->s_hash)
				node = node->rb_right;
			else
				break;
		}
	}
	/* Skip over entries in the wrong namespace */
	while (pos && pos->s_ns != ns) {
		struct rb_node *node = rb_next(&pos->s_rb);
		if (!node)
			pos = NULL;
		else
946
			pos = rb_to_kn(node);
947 948 949 950
	}
	return pos;
}

951 952
static struct kernfs_node *sysfs_dir_next_pos(const void *ns,
	struct kernfs_node *parent, ino_t ino, struct kernfs_node *pos)
953
{
954
	pos = sysfs_dir_pos(ns, parent, ino, pos);
955 956 957 958 959 960
	if (pos)
		do {
			struct rb_node *node = rb_next(&pos->s_rb);
			if (!node)
				pos = NULL;
			else
961
				pos = rb_to_kn(node);
962 963 964 965 966 967 968
		} while (pos && pos->s_ns != ns);
	return pos;
}

static int sysfs_readdir(struct file *file, struct dir_context *ctx)
{
	struct dentry *dentry = file->f_path.dentry;
969 970
	struct kernfs_node *parent = dentry->d_fsdata;
	struct kernfs_node *pos = file->private_data;
971 972 973 974 975 976
	const void *ns = NULL;

	if (!dir_emit_dots(file, ctx))
		return 0;
	mutex_lock(&sysfs_mutex);

977
	if (kernfs_ns_enabled(parent))
978 979
		ns = sysfs_info(dentry->d_sb)->ns;

980
	for (pos = sysfs_dir_pos(ns, parent, ctx->pos, pos);
981
	     pos;
982
	     pos = sysfs_dir_next_pos(ns, parent, ctx->pos, pos)) {
983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020
		const char *name = pos->s_name;
		unsigned int type = dt_type(pos);
		int len = strlen(name);
		ino_t ino = pos->s_ino;

		ctx->pos = pos->s_hash;
		file->private_data = pos;
		kernfs_get(pos);

		mutex_unlock(&sysfs_mutex);
		if (!dir_emit(ctx, name, len, ino, type))
			return 0;
		mutex_lock(&sysfs_mutex);
	}
	mutex_unlock(&sysfs_mutex);
	file->private_data = NULL;
	ctx->pos = INT_MAX;
	return 0;
}

static loff_t sysfs_dir_llseek(struct file *file, loff_t offset, int whence)
{
	struct inode *inode = file_inode(file);
	loff_t ret;

	mutex_lock(&inode->i_mutex);
	ret = generic_file_llseek(file, offset, whence);
	mutex_unlock(&inode->i_mutex);

	return ret;
}

const struct file_operations sysfs_dir_operations = {
	.read		= generic_read_dir,
	.iterate	= sysfs_readdir,
	.release	= sysfs_dir_release,
	.llseek		= sysfs_dir_llseek,
};