dir.c 25.0 KB
Newer Older
1 2 3 4 5 6 7 8 9
/*
 * fs/kernfs/dir.c - kernfs directory implementation
 *
 * Copyright (c) 2001-3 Patrick Mochel
 * Copyright (c) 2007 SUSE Linux Products GmbH
 * Copyright (c) 2007, 2013 Tejun Heo <tj@kernel.org>
 *
 * This file is released under the GPLv2.
 */
10

11
#include <linux/sched.h>
12 13 14 15 16 17 18 19 20
#include <linux/fs.h>
#include <linux/namei.h>
#include <linux/idr.h>
#include <linux/slab.h>
#include <linux/security.h>
#include <linux/hash.h>

#include "kernfs-internal.h"

21
DEFINE_MUTEX(kernfs_mutex);
22

23
#define rb_to_kn(X) rb_entry((X), struct kernfs_node, rb)
24

25 26 27 28 29 30 31 32 33
static bool kernfs_lockdep(struct kernfs_node *kn)
{
#ifdef CONFIG_DEBUG_LOCK_ALLOC
	return kn->flags & KERNFS_LOCKDEP;
#else
	return false;
#endif
}

34
/**
35
 *	kernfs_name_hash
36 37 38 39 40
 *	@name: Null terminated string to hash
 *	@ns:   Namespace tag to hash
 *
 *	Returns 31 bit hash of ns + name (so it fits in an off_t )
 */
41
static unsigned int kernfs_name_hash(const char *name, const void *ns)
42 43 44 45 46 47 48 49 50 51 52 53 54 55 56
{
	unsigned long hash = init_name_hash();
	unsigned int len = strlen(name);
	while (len--)
		hash = partial_name_hash(*name++, hash);
	hash = (end_name_hash(hash) ^ hash_ptr((void *)ns, 31));
	hash &= 0x7fffffffU;
	/* Reserve hash numbers 0, 1 and INT_MAX for magic directory entries */
	if (hash < 1)
		hash += 2;
	if (hash >= INT_MAX)
		hash = INT_MAX - 1;
	return hash;
}

57 58
static int kernfs_name_compare(unsigned int hash, const char *name,
			       const void *ns, const struct kernfs_node *kn)
59
{
60 61 62 63 64
	if (hash != kn->hash)
		return hash - kn->hash;
	if (ns != kn->ns)
		return ns - kn->ns;
	return strcmp(name, kn->name);
65 66
}

67 68
static int kernfs_sd_compare(const struct kernfs_node *left,
			     const struct kernfs_node *right)
69
{
70
	return kernfs_name_compare(left->hash, left->name, left->ns, right);
71 72 73
}

/**
74
 *	kernfs_link_sibling - link kernfs_node into sibling rbtree
75
 *	@kn: kernfs_node of interest
76
 *
77
 *	Link @kn into its sibling rbtree which starts from
78
 *	@kn->parent->dir.children.
79 80
 *
 *	Locking:
81
 *	mutex_lock(kernfs_mutex)
82 83 84 85
 *
 *	RETURNS:
 *	0 on susccess -EEXIST on failure.
 */
86
static int kernfs_link_sibling(struct kernfs_node *kn)
87
{
88
	struct rb_node **node = &kn->parent->dir.children.rb_node;
89 90
	struct rb_node *parent = NULL;

T
Tejun Heo 已提交
91
	if (kernfs_type(kn) == KERNFS_DIR)
92
		kn->parent->dir.subdirs++;
93 94

	while (*node) {
95
		struct kernfs_node *pos;
96 97
		int result;

98
		pos = rb_to_kn(*node);
99
		parent = *node;
100
		result = kernfs_sd_compare(kn, pos);
101
		if (result < 0)
102
			node = &pos->rb.rb_left;
103
		else if (result > 0)
104
			node = &pos->rb.rb_right;
105 106 107 108
		else
			return -EEXIST;
	}
	/* add new node and rebalance the tree */
109 110
	rb_link_node(&kn->rb, parent, node);
	rb_insert_color(&kn->rb, &kn->parent->dir.children);
111 112 113 114
	return 0;
}

/**
115
 *	kernfs_unlink_sibling - unlink kernfs_node from sibling rbtree
116
 *	@kn: kernfs_node of interest
117
 *
118
 *	Unlink @kn from its sibling rbtree which starts from
119
 *	kn->parent->dir.children.
120 121
 *
 *	Locking:
122
 *	mutex_lock(kernfs_mutex)
123
 */
124
static void kernfs_unlink_sibling(struct kernfs_node *kn)
125
{
T
Tejun Heo 已提交
126
	if (kernfs_type(kn) == KERNFS_DIR)
127
		kn->parent->dir.subdirs--;
128

129
	rb_erase(&kn->rb, &kn->parent->dir.children);
T
Tejun Heo 已提交
130
	RB_CLEAR_NODE(&kn->rb);
131 132 133
}

/**
134
 *	kernfs_get_active - get an active reference to kernfs_node
135
 *	@kn: kernfs_node to get an active reference to
136
 *
137
 *	Get an active reference of @kn.  This function is noop if @kn
138 139 140
 *	is NULL.
 *
 *	RETURNS:
141
 *	Pointer to @kn on success, NULL on failure.
142
 */
143
struct kernfs_node *kernfs_get_active(struct kernfs_node *kn)
144
{
145
	if (unlikely(!kn))
146 147
		return NULL;

148
	if (!atomic_inc_unless_negative(&kn->active))
149 150
		return NULL;

151
	if (kernfs_lockdep(kn))
152 153
		rwsem_acquire_read(&kn->dep_map, 0, 1, _RET_IP_);
	return kn;
154 155 156
}

/**
157
 *	kernfs_put_active - put an active reference to kernfs_node
158
 *	@kn: kernfs_node to put an active reference to
159
 *
160
 *	Put an active reference to @kn.  This function is noop if @kn
161 162
 *	is NULL.
 */
163
void kernfs_put_active(struct kernfs_node *kn)
164
{
165
	struct kernfs_root *root = kernfs_root(kn);
166 167
	int v;

168
	if (unlikely(!kn))
169 170
		return;

171
	if (kernfs_lockdep(kn))
172
		rwsem_release(&kn->dep_map, 1, _RET_IP_);
173
	v = atomic_dec_return(&kn->active);
T
Tejun Heo 已提交
174
	if (likely(v != KN_DEACTIVATED_BIAS))
175 176
		return;

177
	wake_up_all(&root->deactivate_waitq);
178 179 180
}

/**
T
Tejun Heo 已提交
181 182
 * kernfs_drain - drain kernfs_node
 * @kn: kernfs_node to drain
183
 *
T
Tejun Heo 已提交
184
 * Drain existing usages.
185
 */
T
Tejun Heo 已提交
186
static void kernfs_drain(struct kernfs_node *kn)
187
{
188
	struct kernfs_root *root = kernfs_root(kn);
189

T
Tejun Heo 已提交
190
	WARN_ON_ONCE(atomic_read(&kn->active) >= 0);
191

192 193 194 195 196
	if (kernfs_lockdep(kn)) {
		rwsem_acquire(&kn->dep_map, 0, 0, _RET_IP_);
		if (atomic_read(&kn->active) != KN_DEACTIVATED_BIAS)
			lock_contended(&kn->dep_map, _RET_IP_);
	}
197 198 199

	wait_event(root->deactivate_waitq,
		   atomic_read(&kn->active) == KN_DEACTIVATED_BIAS);
200

201 202 203 204
	if (kernfs_lockdep(kn)) {
		lock_acquired(&kn->dep_map, _RET_IP_);
		rwsem_release(&kn->dep_map, 1, _RET_IP_);
	}
205 206 207
}

/**
208 209
 * kernfs_get - get a reference count on a kernfs_node
 * @kn: the target kernfs_node
210
 */
211
void kernfs_get(struct kernfs_node *kn)
212
{
213
	if (kn) {
214 215
		WARN_ON(!atomic_read(&kn->count));
		atomic_inc(&kn->count);
216 217 218 219 220
	}
}
EXPORT_SYMBOL_GPL(kernfs_get);

/**
221 222
 * kernfs_put - put a reference count on a kernfs_node
 * @kn: the target kernfs_node
223
 *
224
 * Put a reference count of @kn and destroy it if it reached zero.
225
 */
226
void kernfs_put(struct kernfs_node *kn)
227
{
228
	struct kernfs_node *parent;
229
	struct kernfs_root *root;
230

231
	if (!kn || !atomic_dec_and_test(&kn->count))
232
		return;
233
	root = kernfs_root(kn);
234
 repeat:
T
Tejun Heo 已提交
235 236
	/*
	 * Moving/renaming is always done while holding reference.
237
	 * kn->parent won't change beneath us.
238
	 */
239
	parent = kn->parent;
240

T
Tejun Heo 已提交
241 242 243
	WARN_ONCE(atomic_read(&kn->active) != KN_DEACTIVATED_BIAS,
		  "kernfs_put: %s/%s: released with incorrect active_ref %d\n",
		  parent ? parent->name : "", kn->name, atomic_read(&kn->active));
244

T
Tejun Heo 已提交
245
	if (kernfs_type(kn) == KERNFS_LINK)
246
		kernfs_put(kn->symlink.target_kn);
247
	if (!(kn->flags & KERNFS_STATIC_NAME))
248 249 250 251 252 253
		kfree(kn->name);
	if (kn->iattr) {
		if (kn->iattr->ia_secdata)
			security_release_secctx(kn->iattr->ia_secdata,
						kn->iattr->ia_secdata_len);
		simple_xattrs_free(&kn->iattr->xattrs);
254
	}
255 256
	kfree(kn->iattr);
	ida_simple_remove(&root->ino_ida, kn->ino);
257
	kmem_cache_free(kernfs_node_cache, kn);
258

259 260
	kn = parent;
	if (kn) {
261
		if (atomic_dec_and_test(&kn->count))
262 263
			goto repeat;
	} else {
264
		/* just released the root kn, free @root too */
265
		ida_destroy(&root->ino_ida);
266 267
		kfree(root);
	}
268 269 270
}
EXPORT_SYMBOL_GPL(kernfs_put);

271
static int kernfs_dop_revalidate(struct dentry *dentry, unsigned int flags)
272
{
273
	struct kernfs_node *kn;
274 275 276 277

	if (flags & LOOKUP_RCU)
		return -ECHILD;

T
Tejun Heo 已提交
278 279 280 281
	/* Always perform fresh lookup for negatives */
	if (!dentry->d_inode)
		goto out_bad_unlocked;

282
	kn = dentry->d_fsdata;
283
	mutex_lock(&kernfs_mutex);
284

T
Tejun Heo 已提交
285 286
	/* Force fresh lookup if removed */
	if (kn->parent && RB_EMPTY_NODE(&kn->rb))
287 288
		goto out_bad;

289
	/* The kernfs node has been moved? */
290
	if (dentry->d_parent->d_fsdata != kn->parent)
291 292
		goto out_bad;

293
	/* The kernfs node has been renamed */
294
	if (strcmp(dentry->d_name.name, kn->name) != 0)
295 296
		goto out_bad;

297
	/* The kernfs node has been moved to a different namespace */
298
	if (kn->parent && kernfs_ns_enabled(kn->parent) &&
299
	    kernfs_info(dentry->d_sb)->ns != kn->ns)
300 301
		goto out_bad;

302
	mutex_unlock(&kernfs_mutex);
303 304 305
out_valid:
	return 1;
out_bad:
306
	mutex_unlock(&kernfs_mutex);
T
Tejun Heo 已提交
307 308 309 310 311 312 313
out_bad_unlocked:
	/*
	 * @dentry doesn't match the underlying kernfs node, drop the
	 * dentry and force lookup.  If we have submounts we must allow the
	 * vfs caches to lie about the state of the filesystem to prevent
	 * leaks and other nasty things, so use check_submounts_and_drop()
	 * instead of d_drop().
314 315 316 317 318 319 320
	 */
	if (check_submounts_and_drop(dentry) != 0)
		goto out_valid;

	return 0;
}

321
static void kernfs_dop_release(struct dentry *dentry)
322 323 324 325
{
	kernfs_put(dentry->d_fsdata);
}

326
const struct dentry_operations kernfs_dops = {
327 328
	.d_revalidate	= kernfs_dop_revalidate,
	.d_release	= kernfs_dop_release,
329 330
};

331
struct kernfs_node *kernfs_new_node(struct kernfs_root *root, const char *name,
332
				    umode_t mode, unsigned flags)
333 334
{
	char *dup_name = NULL;
335
	struct kernfs_node *kn;
336
	int ret;
337

338
	if (!(flags & KERNFS_STATIC_NAME)) {
339 340 341 342 343
		name = dup_name = kstrdup(name, GFP_KERNEL);
		if (!name)
			return NULL;
	}

344
	kn = kmem_cache_zalloc(kernfs_node_cache, GFP_KERNEL);
345
	if (!kn)
346 347
		goto err_out1;

348 349
	ret = ida_simple_get(&root->ino_ida, 1, 0, GFP_KERNEL);
	if (ret < 0)
350
		goto err_out2;
351
	kn->ino = ret;
352

353
	atomic_set(&kn->count, 1);
T
Tejun Heo 已提交
354 355
	atomic_set(&kn->active, KN_DEACTIVATED_BIAS);
	RB_CLEAR_NODE(&kn->rb);
356

357 358
	kn->name = name;
	kn->mode = mode;
T
Tejun Heo 已提交
359
	kn->flags = flags;
360

361
	return kn;
362 363

 err_out2:
364
	kmem_cache_free(kernfs_node_cache, kn);
365 366 367 368 369 370
 err_out1:
	kfree(dup_name);
	return NULL;
}

/**
371
 *	kernfs_addrm_start - prepare for kernfs_node add/remove
372
 *	@acxt: pointer to kernfs_addrm_cxt to be used
373 374
 *
 *	This function is called when the caller is about to add or remove
375 376
 *	kernfs_node.  This function acquires kernfs_mutex.  @acxt is used
 *	to keep and pass context to other addrm functions.
377 378
 *
 *	LOCKING:
379
 *	Kernel thread context (may sleep).  kernfs_mutex is locked on
380 381
 *	return.
 */
382
void kernfs_addrm_start(struct kernfs_addrm_cxt *acxt)
383
	__acquires(kernfs_mutex)
384 385 386
{
	memset(acxt, 0, sizeof(*acxt));

387
	mutex_lock(&kernfs_mutex);
388 389 390
}

/**
391
 *	kernfs_add_one - add kernfs_node to parent without warning
392
 *	@acxt: addrm context to use
393 394
 *	@kn: kernfs_node to be added
 *	@parent: the parent kernfs_node to add @kn to
395
 *
396 397 398
 *	Get @parent and set @kn->parent to it and increment nlink of the
 *	parent inode if @kn is a directory and link into the children list
 *	of the parent.
399 400
 *
 *	This function should be called between calls to
401 402
 *	kernfs_addrm_start() and kernfs_addrm_finish() and should be passed
 *	the same @acxt as passed to kernfs_addrm_start().
403 404
 *
 *	LOCKING:
405
 *	Determined by kernfs_addrm_start().
406 407 408 409 410
 *
 *	RETURNS:
 *	0 on success, -EEXIST if entry with the given name already
 *	exists.
 */
411
int kernfs_add_one(struct kernfs_addrm_cxt *acxt, struct kernfs_node *kn,
412
		  struct kernfs_node *parent)
413
{
414
	bool has_ns = kernfs_ns_enabled(parent);
415
	struct kernfs_iattrs *ps_iattr;
416 417
	int ret;

T
Tejun Heo 已提交
418 419
	WARN_ON_ONCE(atomic_read(&parent->active) < 0);

420
	if (has_ns != (bool)kn->ns) {
421
		WARN(1, KERN_WARNING "kernfs: ns %s in '%s' for '%s'\n",
422
		     has_ns ? "required" : "invalid", parent->name, kn->name);
423 424 425
		return -EINVAL;
	}

T
Tejun Heo 已提交
426
	if (kernfs_type(parent) != KERNFS_DIR)
427 428
		return -EINVAL;

429
	kn->hash = kernfs_name_hash(kn->name, kn->ns);
430
	kn->parent = parent;
431
	kernfs_get(parent);
432

433
	ret = kernfs_link_sibling(kn);
434 435 436 437
	if (ret)
		return ret;

	/* Update timestamps on the parent */
438
	ps_iattr = parent->iattr;
439 440 441 442 443 444
	if (ps_iattr) {
		struct iattr *ps_iattrs = &ps_iattr->ia_iattr;
		ps_iattrs->ia_ctime = ps_iattrs->ia_mtime = CURRENT_TIME;
	}

	/* Mark the entry added into directory tree */
T
Tejun Heo 已提交
445
	atomic_sub(KN_DEACTIVATED_BIAS, &kn->active);
446 447 448 449
	return 0;
}

/**
450
 *	kernfs_remove_one - remove kernfs_node from parent
451
 *	@acxt: addrm context to use
452
 *	@kn: kernfs_node to be removed
453
 *
454 455
 *	Mark @kn removed and drop nlink of parent inode if @kn is a
 *	directory.  @kn is unlinked from the children list.
456 457
 *
 *	This function should be called between calls to
458 459
 *	kernfs_addrm_start() and kernfs_addrm_finish() and should be
 *	passed the same @acxt as passed to kernfs_addrm_start().
460 461
 *
 *	LOCKING:
462
 *	Determined by kernfs_addrm_start().
463
 */
464 465
static void kernfs_remove_one(struct kernfs_addrm_cxt *acxt,
			      struct kernfs_node *kn)
466
{
467
	struct kernfs_iattrs *ps_iattr;
468 469 470 471 472

	/*
	 * Removal can be called multiple times on the same node.  Only the
	 * first invocation is effective and puts the base ref.
	 */
T
Tejun Heo 已提交
473
	if (atomic_read(&kn->active) < 0)
474 475
		return;

476
	if (kn->parent) {
477
		kernfs_unlink_sibling(kn);
478

479
		/* Update timestamps on the parent */
480
		ps_iattr = kn->parent->iattr;
481 482 483 484
		if (ps_iattr) {
			ps_iattr->ia_iattr.ia_ctime = CURRENT_TIME;
			ps_iattr->ia_iattr.ia_mtime = CURRENT_TIME;
		}
485 486
	}

T
Tejun Heo 已提交
487
	atomic_add(KN_DEACTIVATED_BIAS, &kn->active);
488 489
	kn->u.removed_list = acxt->removed;
	acxt->removed = kn;
490 491 492
}

/**
493
 *	kernfs_addrm_finish - finish up kernfs_node add/remove
494 495
 *	@acxt: addrm context to finish up
 *
496
 *	Finish up kernfs_node add/remove.  Resources acquired by
497
 *	kernfs_addrm_start() are released and removed kernfs_nodes are
498 499 500
 *	cleaned up.
 *
 *	LOCKING:
501
 *	kernfs_mutex is released.
502
 */
503
void kernfs_addrm_finish(struct kernfs_addrm_cxt *acxt)
504
	__releases(kernfs_mutex)
505
{
506
	/* release resources acquired by kernfs_addrm_start() */
507
	mutex_unlock(&kernfs_mutex);
508

509
	/* kill removed kernfs_nodes */
510
	while (acxt->removed) {
511
		struct kernfs_node *kn = acxt->removed;
512

513
		acxt->removed = kn->u.removed_list;
514

T
Tejun Heo 已提交
515
		kernfs_drain(kn);
516
		kernfs_unmap_bin_file(kn);
517
		kernfs_put(kn);
518 519 520 521
	}
}

/**
522 523
 * kernfs_find_ns - find kernfs_node with the given name
 * @parent: kernfs_node to search under
524 525 526
 * @name: name to look for
 * @ns: the namespace tag to use
 *
527 528
 * Look for kernfs_node with name @name under @parent.  Returns pointer to
 * the found kernfs_node on success, %NULL on failure.
529
 */
530 531 532
static struct kernfs_node *kernfs_find_ns(struct kernfs_node *parent,
					  const unsigned char *name,
					  const void *ns)
533
{
534
	struct rb_node *node = parent->dir.children.rb_node;
535
	bool has_ns = kernfs_ns_enabled(parent);
536 537
	unsigned int hash;

538
	lockdep_assert_held(&kernfs_mutex);
539 540

	if (has_ns != (bool)ns) {
541
		WARN(1, KERN_WARNING "kernfs: ns %s in '%s' for '%s'\n",
542
		     has_ns ? "required" : "invalid", parent->name, name);
543 544 545
		return NULL;
	}

546
	hash = kernfs_name_hash(name, ns);
547
	while (node) {
548
		struct kernfs_node *kn;
549 550
		int result;

551
		kn = rb_to_kn(node);
552
		result = kernfs_name_compare(hash, name, ns, kn);
553 554 555 556 557
		if (result < 0)
			node = node->rb_left;
		else if (result > 0)
			node = node->rb_right;
		else
558
			return kn;
559 560 561 562 563
	}
	return NULL;
}

/**
564 565
 * kernfs_find_and_get_ns - find and get kernfs_node with the given name
 * @parent: kernfs_node to search under
566 567 568
 * @name: name to look for
 * @ns: the namespace tag to use
 *
569
 * Look for kernfs_node with name @name under @parent and get a reference
570
 * if found.  This function may sleep and returns pointer to the found
571
 * kernfs_node on success, %NULL on failure.
572
 */
573 574
struct kernfs_node *kernfs_find_and_get_ns(struct kernfs_node *parent,
					   const char *name, const void *ns)
575
{
576
	struct kernfs_node *kn;
577

578
	mutex_lock(&kernfs_mutex);
579 580
	kn = kernfs_find_ns(parent, name, ns);
	kernfs_get(kn);
581
	mutex_unlock(&kernfs_mutex);
582

583
	return kn;
584 585 586
}
EXPORT_SYMBOL_GPL(kernfs_find_and_get_ns);

587 588
/**
 * kernfs_create_root - create a new kernfs hierarchy
T
Tejun Heo 已提交
589
 * @kdops: optional directory syscall operations for the hierarchy
590 591 592 593 594
 * @priv: opaque data associated with the new directory
 *
 * Returns the root of the new hierarchy on success, ERR_PTR() value on
 * failure.
 */
T
Tejun Heo 已提交
595
struct kernfs_root *kernfs_create_root(struct kernfs_dir_ops *kdops, void *priv)
596 597
{
	struct kernfs_root *root;
598
	struct kernfs_node *kn;
599 600 601 602 603

	root = kzalloc(sizeof(*root), GFP_KERNEL);
	if (!root)
		return ERR_PTR(-ENOMEM);

604 605
	ida_init(&root->ino_ida);

606
	kn = kernfs_new_node(root, "", S_IFDIR | S_IRUGO | S_IXUGO, KERNFS_DIR);
607
	if (!kn) {
608
		ida_destroy(&root->ino_ida);
609 610 611 612
		kfree(root);
		return ERR_PTR(-ENOMEM);
	}

T
Tejun Heo 已提交
613
	atomic_sub(KN_DEACTIVATED_BIAS, &kn->active);
614
	kn->priv = priv;
615
	kn->dir.root = root;
616

T
Tejun Heo 已提交
617
	root->dir_ops = kdops;
618
	root->kn = kn;
619
	init_waitqueue_head(&root->deactivate_waitq);
620 621 622 623 624 625 626 627 628 629 630 631 632

	return root;
}

/**
 * kernfs_destroy_root - destroy a kernfs hierarchy
 * @root: root of the hierarchy to destroy
 *
 * Destroy the hierarchy anchored at @root by removing all existing
 * directories and destroying @root.
 */
void kernfs_destroy_root(struct kernfs_root *root)
{
633
	kernfs_remove(root->kn);	/* will also free @root */
634 635
}

636 637 638 639
/**
 * kernfs_create_dir_ns - create a directory
 * @parent: parent in which to create a new directory
 * @name: name of the new directory
640
 * @mode: mode of the new directory
641 642 643 644 645
 * @priv: opaque data associated with the new directory
 * @ns: optional namespace tag of the directory
 *
 * Returns the created node on success, ERR_PTR() value on failure.
 */
646
struct kernfs_node *kernfs_create_dir_ns(struct kernfs_node *parent,
647 648
					 const char *name, umode_t mode,
					 void *priv, const void *ns)
649
{
650
	struct kernfs_addrm_cxt acxt;
651
	struct kernfs_node *kn;
652 653 654
	int rc;

	/* allocate */
655 656
	kn = kernfs_new_node(kernfs_root(parent), name, mode | S_IFDIR,
			     KERNFS_DIR);
657
	if (!kn)
658 659
		return ERR_PTR(-ENOMEM);

660 661
	kn->dir.root = parent->dir.root;
	kn->ns = ns;
662
	kn->priv = priv;
663 664

	/* link in */
T
Tejun Heo 已提交
665 666 667 668 669 670 671
	rc = -ENOENT;
	if (kernfs_get_active(parent)) {
		kernfs_addrm_start(&acxt);
		rc = kernfs_add_one(&acxt, kn, parent);
		kernfs_addrm_finish(&acxt);
		kernfs_put_active(parent);
	}
672 673

	if (!rc)
674
		return kn;
675

676
	kernfs_put(kn);
677 678 679
	return ERR_PTR(rc);
}

680 681 682
static struct dentry *kernfs_iop_lookup(struct inode *dir,
					struct dentry *dentry,
					unsigned int flags)
683
{
T
Tejun Heo 已提交
684
	struct dentry *ret;
685 686
	struct kernfs_node *parent = dentry->d_parent->d_fsdata;
	struct kernfs_node *kn;
687 688 689
	struct inode *inode;
	const void *ns = NULL;

690
	mutex_lock(&kernfs_mutex);
691

692
	if (kernfs_ns_enabled(parent))
693
		ns = kernfs_info(dir->i_sb)->ns;
694

695
	kn = kernfs_find_ns(parent, dentry->d_name.name, ns);
696 697

	/* no such entry */
698
	if (!kn) {
T
Tejun Heo 已提交
699
		ret = NULL;
700 701
		goto out_unlock;
	}
702 703
	kernfs_get(kn);
	dentry->d_fsdata = kn;
704 705

	/* attach dentry and inode */
706
	inode = kernfs_get_inode(dir->i_sb, kn);
707 708 709 710 711 712 713 714
	if (!inode) {
		ret = ERR_PTR(-ENOMEM);
		goto out_unlock;
	}

	/* instantiate and hash dentry */
	ret = d_materialise_unique(dentry, inode);
 out_unlock:
715
	mutex_unlock(&kernfs_mutex);
716 717 718
	return ret;
}

T
Tejun Heo 已提交
719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754
static int kernfs_iop_mkdir(struct inode *dir, struct dentry *dentry,
			    umode_t mode)
{
	struct kernfs_node *parent = dir->i_private;
	struct kernfs_dir_ops *kdops = kernfs_root(parent)->dir_ops;

	if (!kdops || !kdops->mkdir)
		return -EPERM;

	return kdops->mkdir(parent, dentry->d_name.name, mode);
}

static int kernfs_iop_rmdir(struct inode *dir, struct dentry *dentry)
{
	struct kernfs_node *kn  = dentry->d_fsdata;
	struct kernfs_dir_ops *kdops = kernfs_root(kn)->dir_ops;

	if (!kdops || !kdops->rmdir)
		return -EPERM;

	return kdops->rmdir(kn);
}

static int kernfs_iop_rename(struct inode *old_dir, struct dentry *old_dentry,
			     struct inode *new_dir, struct dentry *new_dentry)
{
	struct kernfs_node *kn  = old_dentry->d_fsdata;
	struct kernfs_node *new_parent = new_dir->i_private;
	struct kernfs_dir_ops *kdops = kernfs_root(kn)->dir_ops;

	if (!kdops || !kdops->rename)
		return -EPERM;

	return kdops->rename(kn, new_parent, new_dentry->d_name.name);
}

755
const struct inode_operations kernfs_dir_iops = {
756 757 758 759 760 761 762 763
	.lookup		= kernfs_iop_lookup,
	.permission	= kernfs_iop_permission,
	.setattr	= kernfs_iop_setattr,
	.getattr	= kernfs_iop_getattr,
	.setxattr	= kernfs_iop_setxattr,
	.removexattr	= kernfs_iop_removexattr,
	.getxattr	= kernfs_iop_getxattr,
	.listxattr	= kernfs_iop_listxattr,
T
Tejun Heo 已提交
764 765 766 767

	.mkdir		= kernfs_iop_mkdir,
	.rmdir		= kernfs_iop_rmdir,
	.rename		= kernfs_iop_rename,
768 769
};

770
static struct kernfs_node *kernfs_leftmost_descendant(struct kernfs_node *pos)
771
{
772
	struct kernfs_node *last;
773 774 775 776 777 778

	while (true) {
		struct rb_node *rbn;

		last = pos;

T
Tejun Heo 已提交
779
		if (kernfs_type(pos) != KERNFS_DIR)
780 781
			break;

782
		rbn = rb_first(&pos->dir.children);
783 784 785
		if (!rbn)
			break;

786
		pos = rb_to_kn(rbn);
787 788 789 790 791 792
	}

	return last;
}

/**
793
 * kernfs_next_descendant_post - find the next descendant for post-order walk
794
 * @pos: the current position (%NULL to initiate traversal)
795
 * @root: kernfs_node whose descendants to walk
796 797 798 799 800
 *
 * Find the next descendant to visit for post-order traversal of @root's
 * descendants.  @root is included in the iteration and the last node to be
 * visited.
 */
801 802
static struct kernfs_node *kernfs_next_descendant_post(struct kernfs_node *pos,
						       struct kernfs_node *root)
803 804 805
{
	struct rb_node *rbn;

806
	lockdep_assert_held(&kernfs_mutex);
807 808 809

	/* if first iteration, visit leftmost descendant which may be root */
	if (!pos)
810
		return kernfs_leftmost_descendant(root);
811 812 813 814 815 816

	/* if we visited @root, we're done */
	if (pos == root)
		return NULL;

	/* if there's an unvisited sibling, visit its leftmost descendant */
817
	rbn = rb_next(&pos->rb);
818
	if (rbn)
819
		return kernfs_leftmost_descendant(rb_to_kn(rbn));
820 821

	/* no sibling left, visit parent */
822
	return pos->parent;
823 824
}

825
static void __kernfs_remove(struct kernfs_addrm_cxt *acxt,
826
			    struct kernfs_node *kn)
827
{
828
	struct kernfs_node *pos, *next;
829

830
	if (!kn)
831 832
		return;

833
	pr_debug("kernfs %s: removing\n", kn->name);
834 835 836 837

	next = NULL;
	do {
		pos = next;
838
		next = kernfs_next_descendant_post(pos, kn);
839
		if (pos)
840
			kernfs_remove_one(acxt, pos);
841 842 843 844
	} while (next);
}

/**
845 846
 * kernfs_remove - remove a kernfs_node recursively
 * @kn: the kernfs_node to remove
847
 *
848
 * Remove @kn along with all its subdirectories and files.
849
 */
850
void kernfs_remove(struct kernfs_node *kn)
851
{
852
	struct kernfs_addrm_cxt acxt;
853

854
	kernfs_addrm_start(&acxt);
855
	__kernfs_remove(&acxt, kn);
856
	kernfs_addrm_finish(&acxt);
857 858 859
}

/**
860 861 862 863
 * kernfs_remove_by_name_ns - find a kernfs_node by name and remove it
 * @parent: parent of the target
 * @name: name of the kernfs_node to remove
 * @ns: namespace tag of the kernfs_node to remove
864
 *
865 866
 * Look for the kernfs_node with @name and @ns under @parent and remove it.
 * Returns 0 on success, -ENOENT if such entry doesn't exist.
867
 */
868
int kernfs_remove_by_name_ns(struct kernfs_node *parent, const char *name,
869 870
			     const void *ns)
{
871
	struct kernfs_addrm_cxt acxt;
872
	struct kernfs_node *kn;
873

874
	if (!parent) {
875
		WARN(1, KERN_WARNING "kernfs: can not remove '%s', no directory\n",
876 877 878 879
			name);
		return -ENOENT;
	}

880
	kernfs_addrm_start(&acxt);
881

882 883 884
	kn = kernfs_find_ns(parent, name, ns);
	if (kn)
		__kernfs_remove(&acxt, kn);
885

886
	kernfs_addrm_finish(&acxt);
887

888
	if (kn)
889 890 891 892 893 894 895
		return 0;
	else
		return -ENOENT;
}

/**
 * kernfs_rename_ns - move and rename a kernfs_node
896
 * @kn: target node
897 898 899 900
 * @new_parent: new parent to put @sd under
 * @new_name: new name
 * @new_ns: new namespace tag
 */
901
int kernfs_rename_ns(struct kernfs_node *kn, struct kernfs_node *new_parent,
902 903 904 905
		     const char *new_name, const void *new_ns)
{
	int error;

906
	error = -ENOENT;
T
Tejun Heo 已提交
907
	if (!kernfs_get_active(new_parent))
908
		goto out;
T
Tejun Heo 已提交
909 910 911 912
	if (!kernfs_get_active(kn))
		goto out_put_new_parent;

	mutex_lock(&kernfs_mutex);
913

914
	error = 0;
915 916
	if ((kn->parent == new_parent) && (kn->ns == new_ns) &&
	    (strcmp(kn->name, new_name) == 0))
T
Tejun Heo 已提交
917
		goto out_unlock;	/* nothing to rename */
918 919 920

	error = -EEXIST;
	if (kernfs_find_ns(new_parent, new_name, new_ns))
T
Tejun Heo 已提交
921
		goto out_unlock;
922

923
	/* rename kernfs_node */
924
	if (strcmp(kn->name, new_name) != 0) {
925 926 927
		error = -ENOMEM;
		new_name = kstrdup(new_name, GFP_KERNEL);
		if (!new_name)
T
Tejun Heo 已提交
928
			goto out_unlock;
929

930 931 932 933 934
		if (kn->flags & KERNFS_STATIC_NAME)
			kn->flags &= ~KERNFS_STATIC_NAME;
		else
			kfree(kn->name);

935
		kn->name = new_name;
936 937 938 939 940
	}

	/*
	 * Move to the appropriate place in the appropriate directories rbtree.
	 */
941
	kernfs_unlink_sibling(kn);
942
	kernfs_get(new_parent);
943 944
	kernfs_put(kn->parent);
	kn->ns = new_ns;
945
	kn->hash = kernfs_name_hash(kn->name, kn->ns);
946
	kn->parent = new_parent;
947
	kernfs_link_sibling(kn);
948 949

	error = 0;
T
Tejun Heo 已提交
950
out_unlock:
951
	mutex_unlock(&kernfs_mutex);
T
Tejun Heo 已提交
952 953 954 955
	kernfs_put_active(kn);
out_put_new_parent:
	kernfs_put_active(new_parent);
out:
956 957 958 959
	return error;
}

/* Relationship between s_mode and the DT_xxx types */
960
static inline unsigned char dt_type(struct kernfs_node *kn)
961
{
962
	return (kn->mode >> 12) & 15;
963 964
}

965
static int kernfs_dir_fop_release(struct inode *inode, struct file *filp)
966 967 968 969 970
{
	kernfs_put(filp->private_data);
	return 0;
}

971
static struct kernfs_node *kernfs_dir_pos(const void *ns,
972
	struct kernfs_node *parent, loff_t hash, struct kernfs_node *pos)
973 974
{
	if (pos) {
T
Tejun Heo 已提交
975
		int valid = pos->parent == parent && hash == pos->hash;
976 977 978 979 980
		kernfs_put(pos);
		if (!valid)
			pos = NULL;
	}
	if (!pos && (hash > 1) && (hash < INT_MAX)) {
981
		struct rb_node *node = parent->dir.children.rb_node;
982
		while (node) {
983
			pos = rb_to_kn(node);
984

985
			if (hash < pos->hash)
986
				node = node->rb_left;
987
			else if (hash > pos->hash)
988 989 990 991 992 993
				node = node->rb_right;
			else
				break;
		}
	}
	/* Skip over entries in the wrong namespace */
994 995
	while (pos && pos->ns != ns) {
		struct rb_node *node = rb_next(&pos->rb);
996 997 998
		if (!node)
			pos = NULL;
		else
999
			pos = rb_to_kn(node);
1000 1001 1002 1003
	}
	return pos;
}

1004
static struct kernfs_node *kernfs_dir_next_pos(const void *ns,
1005
	struct kernfs_node *parent, ino_t ino, struct kernfs_node *pos)
1006
{
1007
	pos = kernfs_dir_pos(ns, parent, ino, pos);
1008 1009
	if (pos)
		do {
1010
			struct rb_node *node = rb_next(&pos->rb);
1011 1012 1013
			if (!node)
				pos = NULL;
			else
1014
				pos = rb_to_kn(node);
1015
		} while (pos && pos->ns != ns);
1016 1017 1018
	return pos;
}

1019
static int kernfs_fop_readdir(struct file *file, struct dir_context *ctx)
1020 1021
{
	struct dentry *dentry = file->f_path.dentry;
1022 1023
	struct kernfs_node *parent = dentry->d_fsdata;
	struct kernfs_node *pos = file->private_data;
1024 1025 1026 1027
	const void *ns = NULL;

	if (!dir_emit_dots(file, ctx))
		return 0;
1028
	mutex_lock(&kernfs_mutex);
1029

1030
	if (kernfs_ns_enabled(parent))
1031
		ns = kernfs_info(dentry->d_sb)->ns;
1032

1033
	for (pos = kernfs_dir_pos(ns, parent, ctx->pos, pos);
1034
	     pos;
1035
	     pos = kernfs_dir_next_pos(ns, parent, ctx->pos, pos)) {
1036
		const char *name = pos->name;
1037 1038
		unsigned int type = dt_type(pos);
		int len = strlen(name);
1039
		ino_t ino = pos->ino;
1040

1041
		ctx->pos = pos->hash;
1042 1043 1044
		file->private_data = pos;
		kernfs_get(pos);

1045
		mutex_unlock(&kernfs_mutex);
1046 1047
		if (!dir_emit(ctx, name, len, ino, type))
			return 0;
1048
		mutex_lock(&kernfs_mutex);
1049
	}
1050
	mutex_unlock(&kernfs_mutex);
1051 1052 1053 1054 1055
	file->private_data = NULL;
	ctx->pos = INT_MAX;
	return 0;
}

1056 1057
static loff_t kernfs_dir_fop_llseek(struct file *file, loff_t offset,
				    int whence)
1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068
{
	struct inode *inode = file_inode(file);
	loff_t ret;

	mutex_lock(&inode->i_mutex);
	ret = generic_file_llseek(file, offset, whence);
	mutex_unlock(&inode->i_mutex);

	return ret;
}

1069
const struct file_operations kernfs_dir_fops = {
1070
	.read		= generic_read_dir,
1071 1072 1073
	.iterate	= kernfs_fop_readdir,
	.release	= kernfs_dir_fop_release,
	.llseek		= kernfs_dir_fop_llseek,
1074
};