dir.c 24.7 KB
Newer Older
1 2 3 4 5 6 7 8 9
/*
 * fs/kernfs/dir.c - kernfs directory implementation
 *
 * Copyright (c) 2001-3 Patrick Mochel
 * Copyright (c) 2007 SUSE Linux Products GmbH
 * Copyright (c) 2007, 2013 Tejun Heo <tj@kernel.org>
 *
 * This file is released under the GPLv2.
 */
10 11 12 13 14 15 16 17 18 19

#include <linux/fs.h>
#include <linux/namei.h>
#include <linux/idr.h>
#include <linux/slab.h>
#include <linux/security.h>
#include <linux/hash.h>

#include "kernfs-internal.h"

20
DEFINE_MUTEX(kernfs_mutex);
21

22
#define rb_to_kn(X) rb_entry((X), struct kernfs_node, rb)
23 24

/**
25
 *	kernfs_name_hash
26 27 28 29 30
 *	@name: Null terminated string to hash
 *	@ns:   Namespace tag to hash
 *
 *	Returns 31 bit hash of ns + name (so it fits in an off_t )
 */
31
static unsigned int kernfs_name_hash(const char *name, const void *ns)
32 33 34 35 36 37 38 39 40 41 42 43 44 45 46
{
	unsigned long hash = init_name_hash();
	unsigned int len = strlen(name);
	while (len--)
		hash = partial_name_hash(*name++, hash);
	hash = (end_name_hash(hash) ^ hash_ptr((void *)ns, 31));
	hash &= 0x7fffffffU;
	/* Reserve hash numbers 0, 1 and INT_MAX for magic directory entries */
	if (hash < 1)
		hash += 2;
	if (hash >= INT_MAX)
		hash = INT_MAX - 1;
	return hash;
}

47 48
static int kernfs_name_compare(unsigned int hash, const char *name,
			       const void *ns, const struct kernfs_node *kn)
49
{
50 51 52 53 54
	if (hash != kn->hash)
		return hash - kn->hash;
	if (ns != kn->ns)
		return ns - kn->ns;
	return strcmp(name, kn->name);
55 56
}

57 58
static int kernfs_sd_compare(const struct kernfs_node *left,
			     const struct kernfs_node *right)
59
{
60
	return kernfs_name_compare(left->hash, left->name, left->ns, right);
61 62 63
}

/**
64
 *	kernfs_link_sibling - link kernfs_node into sibling rbtree
65
 *	@kn: kernfs_node of interest
66
 *
67
 *	Link @kn into its sibling rbtree which starts from
68
 *	@kn->parent->dir.children.
69 70
 *
 *	Locking:
71
 *	mutex_lock(kernfs_mutex)
72 73 74 75
 *
 *	RETURNS:
 *	0 on susccess -EEXIST on failure.
 */
76
static int kernfs_link_sibling(struct kernfs_node *kn)
77
{
78
	struct rb_node **node = &kn->parent->dir.children.rb_node;
79 80
	struct rb_node *parent = NULL;

T
Tejun Heo 已提交
81
	if (kernfs_type(kn) == KERNFS_DIR)
82
		kn->parent->dir.subdirs++;
83 84

	while (*node) {
85
		struct kernfs_node *pos;
86 87
		int result;

88
		pos = rb_to_kn(*node);
89
		parent = *node;
90
		result = kernfs_sd_compare(kn, pos);
91
		if (result < 0)
92
			node = &pos->rb.rb_left;
93
		else if (result > 0)
94
			node = &pos->rb.rb_right;
95 96 97 98
		else
			return -EEXIST;
	}
	/* add new node and rebalance the tree */
99 100
	rb_link_node(&kn->rb, parent, node);
	rb_insert_color(&kn->rb, &kn->parent->dir.children);
101 102 103 104
	return 0;
}

/**
105
 *	kernfs_unlink_sibling - unlink kernfs_node from sibling rbtree
106
 *	@kn: kernfs_node of interest
107
 *
108
 *	Unlink @kn from its sibling rbtree which starts from
109
 *	kn->parent->dir.children.
110 111
 *
 *	Locking:
112
 *	mutex_lock(kernfs_mutex)
113
 */
114
static void kernfs_unlink_sibling(struct kernfs_node *kn)
115
{
T
Tejun Heo 已提交
116
	if (kernfs_type(kn) == KERNFS_DIR)
117
		kn->parent->dir.subdirs--;
118

119
	rb_erase(&kn->rb, &kn->parent->dir.children);
120 121 122
}

/**
123
 *	kernfs_get_active - get an active reference to kernfs_node
124
 *	@kn: kernfs_node to get an active reference to
125
 *
126
 *	Get an active reference of @kn.  This function is noop if @kn
127 128 129
 *	is NULL.
 *
 *	RETURNS:
130
 *	Pointer to @kn on success, NULL on failure.
131
 */
132
struct kernfs_node *kernfs_get_active(struct kernfs_node *kn)
133
{
134
	if (unlikely(!kn))
135 136
		return NULL;

137
	if (!atomic_inc_unless_negative(&kn->active))
138 139
		return NULL;

T
Tejun Heo 已提交
140
	if (kn->flags & KERNFS_LOCKDEP)
141 142
		rwsem_acquire_read(&kn->dep_map, 0, 1, _RET_IP_);
	return kn;
143 144 145
}

/**
146
 *	kernfs_put_active - put an active reference to kernfs_node
147
 *	@kn: kernfs_node to put an active reference to
148
 *
149
 *	Put an active reference to @kn.  This function is noop if @kn
150 151
 *	is NULL.
 */
152
void kernfs_put_active(struct kernfs_node *kn)
153 154 155
{
	int v;

156
	if (unlikely(!kn))
157 158
		return;

T
Tejun Heo 已提交
159
	if (kn->flags & KERNFS_LOCKDEP)
160
		rwsem_release(&kn->dep_map, 1, _RET_IP_);
161
	v = atomic_dec_return(&kn->active);
T
Tejun Heo 已提交
162
	if (likely(v != KN_DEACTIVATED_BIAS))
163 164
		return;

165 166 167
	/*
	 * atomic_dec_return() is a mb(), we'll always see the updated
	 * kn->u.completion.
168
	 */
169
	complete(kn->u.completion);
170 171 172
}

/**
173
 *	kernfs_deactivate - deactivate kernfs_node
174
 *	@kn: kernfs_node to deactivate
175 176 177
 *
 *	Deny new active references and drain existing ones.
 */
178
static void kernfs_deactivate(struct kernfs_node *kn)
179 180 181 182
{
	DECLARE_COMPLETION_ONSTACK(wait);
	int v;

T
Tejun Heo 已提交
183
	BUG_ON(!(kn->flags & KERNFS_REMOVED));
184

T
Tejun Heo 已提交
185
	if (!(kernfs_type(kn) & KERNFS_ACTIVE_REF))
186 187
		return;

188
	kn->u.completion = (void *)&wait;
189

190
	rwsem_acquire(&kn->dep_map, 0, 0, _RET_IP_);
191
	/* atomic_add_return() is a mb(), put_active() will always see
192
	 * the updated kn->u.completion.
193
	 */
T
Tejun Heo 已提交
194
	v = atomic_add_return(KN_DEACTIVATED_BIAS, &kn->active);
195

T
Tejun Heo 已提交
196
	if (v != KN_DEACTIVATED_BIAS) {
197
		lock_contended(&kn->dep_map, _RET_IP_);
198 199 200
		wait_for_completion(&wait);
	}

201 202
	lock_acquired(&kn->dep_map, _RET_IP_);
	rwsem_release(&kn->dep_map, 1, _RET_IP_);
203 204 205
}

/**
206 207
 * kernfs_get - get a reference count on a kernfs_node
 * @kn: the target kernfs_node
208
 */
209
void kernfs_get(struct kernfs_node *kn)
210
{
211
	if (kn) {
212 213
		WARN_ON(!atomic_read(&kn->count));
		atomic_inc(&kn->count);
214 215 216 217 218
	}
}
EXPORT_SYMBOL_GPL(kernfs_get);

/**
219 220
 * kernfs_put - put a reference count on a kernfs_node
 * @kn: the target kernfs_node
221
 *
222
 * Put a reference count of @kn and destroy it if it reached zero.
223
 */
224
void kernfs_put(struct kernfs_node *kn)
225
{
226
	struct kernfs_node *parent;
227
	struct kernfs_root *root;
228

229
	if (!kn || !atomic_dec_and_test(&kn->count))
230
		return;
231
	root = kernfs_root(kn);
232 233
 repeat:
	/* Moving/renaming is always done while holding reference.
234
	 * kn->parent won't change beneath us.
235
	 */
236
	parent = kn->parent;
237

238 239
	WARN(!(kn->flags & KERNFS_REMOVED), "kernfs: free using entry: %s/%s\n",
	     parent ? parent->name : "", kn->name);
240

T
Tejun Heo 已提交
241
	if (kernfs_type(kn) == KERNFS_LINK)
242
		kernfs_put(kn->symlink.target_kn);
243
	if (!(kn->flags & KERNFS_STATIC_NAME))
244 245 246 247 248 249
		kfree(kn->name);
	if (kn->iattr) {
		if (kn->iattr->ia_secdata)
			security_release_secctx(kn->iattr->ia_secdata,
						kn->iattr->ia_secdata_len);
		simple_xattrs_free(&kn->iattr->xattrs);
250
	}
251 252
	kfree(kn->iattr);
	ida_simple_remove(&root->ino_ida, kn->ino);
253
	kmem_cache_free(kernfs_node_cache, kn);
254

255 256
	kn = parent;
	if (kn) {
257
		if (atomic_dec_and_test(&kn->count))
258 259
			goto repeat;
	} else {
260
		/* just released the root kn, free @root too */
261
		ida_destroy(&root->ino_ida);
262 263
		kfree(root);
	}
264 265 266
}
EXPORT_SYMBOL_GPL(kernfs_put);

267
static int kernfs_dop_revalidate(struct dentry *dentry, unsigned int flags)
268
{
269
	struct kernfs_node *kn;
270 271 272 273

	if (flags & LOOKUP_RCU)
		return -ECHILD;

T
Tejun Heo 已提交
274 275 276 277
	/* Always perform fresh lookup for negatives */
	if (!dentry->d_inode)
		goto out_bad_unlocked;

278
	kn = dentry->d_fsdata;
279
	mutex_lock(&kernfs_mutex);
280

281
	/* The kernfs node has been deleted */
T
Tejun Heo 已提交
282
	if (kn->flags & KERNFS_REMOVED)
283 284
		goto out_bad;

285
	/* The kernfs node has been moved? */
286
	if (dentry->d_parent->d_fsdata != kn->parent)
287 288
		goto out_bad;

289
	/* The kernfs node has been renamed */
290
	if (strcmp(dentry->d_name.name, kn->name) != 0)
291 292
		goto out_bad;

293
	/* The kernfs node has been moved to a different namespace */
294
	if (kn->parent && kernfs_ns_enabled(kn->parent) &&
295
	    kernfs_info(dentry->d_sb)->ns != kn->ns)
296 297
		goto out_bad;

298
	mutex_unlock(&kernfs_mutex);
299 300 301
out_valid:
	return 1;
out_bad:
302
	mutex_unlock(&kernfs_mutex);
T
Tejun Heo 已提交
303 304 305 306 307 308 309
out_bad_unlocked:
	/*
	 * @dentry doesn't match the underlying kernfs node, drop the
	 * dentry and force lookup.  If we have submounts we must allow the
	 * vfs caches to lie about the state of the filesystem to prevent
	 * leaks and other nasty things, so use check_submounts_and_drop()
	 * instead of d_drop().
310 311 312 313 314 315 316
	 */
	if (check_submounts_and_drop(dentry) != 0)
		goto out_valid;

	return 0;
}

317
static void kernfs_dop_release(struct dentry *dentry)
318 319 320 321
{
	kernfs_put(dentry->d_fsdata);
}

322
const struct dentry_operations kernfs_dops = {
323 324
	.d_revalidate	= kernfs_dop_revalidate,
	.d_release	= kernfs_dop_release,
325 326
};

327
struct kernfs_node *kernfs_new_node(struct kernfs_root *root, const char *name,
328
				    umode_t mode, unsigned flags)
329 330
{
	char *dup_name = NULL;
331
	struct kernfs_node *kn;
332
	int ret;
333

334
	if (!(flags & KERNFS_STATIC_NAME)) {
335 336 337 338 339
		name = dup_name = kstrdup(name, GFP_KERNEL);
		if (!name)
			return NULL;
	}

340
	kn = kmem_cache_zalloc(kernfs_node_cache, GFP_KERNEL);
341
	if (!kn)
342 343
		goto err_out1;

344 345
	ret = ida_simple_get(&root->ino_ida, 1, 0, GFP_KERNEL);
	if (ret < 0)
346
		goto err_out2;
347
	kn->ino = ret;
348

349 350
	atomic_set(&kn->count, 1);
	atomic_set(&kn->active, 0);
351

352 353
	kn->name = name;
	kn->mode = mode;
354
	kn->flags = flags | KERNFS_REMOVED;
355

356
	return kn;
357 358

 err_out2:
359
	kmem_cache_free(kernfs_node_cache, kn);
360 361 362 363 364 365
 err_out1:
	kfree(dup_name);
	return NULL;
}

/**
366
 *	kernfs_addrm_start - prepare for kernfs_node add/remove
367
 *	@acxt: pointer to kernfs_addrm_cxt to be used
368 369
 *
 *	This function is called when the caller is about to add or remove
370 371
 *	kernfs_node.  This function acquires kernfs_mutex.  @acxt is used
 *	to keep and pass context to other addrm functions.
372 373
 *
 *	LOCKING:
374
 *	Kernel thread context (may sleep).  kernfs_mutex is locked on
375 376
 *	return.
 */
377
void kernfs_addrm_start(struct kernfs_addrm_cxt *acxt)
378
	__acquires(kernfs_mutex)
379 380 381
{
	memset(acxt, 0, sizeof(*acxt));

382
	mutex_lock(&kernfs_mutex);
383 384 385
}

/**
386
 *	kernfs_add_one - add kernfs_node to parent without warning
387
 *	@acxt: addrm context to use
388 389
 *	@kn: kernfs_node to be added
 *	@parent: the parent kernfs_node to add @kn to
390
 *
391 392 393
 *	Get @parent and set @kn->parent to it and increment nlink of the
 *	parent inode if @kn is a directory and link into the children list
 *	of the parent.
394 395
 *
 *	This function should be called between calls to
396 397
 *	kernfs_addrm_start() and kernfs_addrm_finish() and should be passed
 *	the same @acxt as passed to kernfs_addrm_start().
398 399
 *
 *	LOCKING:
400
 *	Determined by kernfs_addrm_start().
401 402 403 404 405
 *
 *	RETURNS:
 *	0 on success, -EEXIST if entry with the given name already
 *	exists.
 */
406
int kernfs_add_one(struct kernfs_addrm_cxt *acxt, struct kernfs_node *kn,
407
		  struct kernfs_node *parent)
408
{
409
	bool has_ns = kernfs_ns_enabled(parent);
410
	struct kernfs_iattrs *ps_iattr;
411 412
	int ret;

413
	if (has_ns != (bool)kn->ns) {
414
		WARN(1, KERN_WARNING "kernfs: ns %s in '%s' for '%s'\n",
415
		     has_ns ? "required" : "invalid", parent->name, kn->name);
416 417 418
		return -EINVAL;
	}

T
Tejun Heo 已提交
419
	if (kernfs_type(parent) != KERNFS_DIR)
420 421
		return -EINVAL;

422 423 424
	if (parent->flags & KERNFS_REMOVED)
		return -ENOENT;

425
	kn->hash = kernfs_name_hash(kn->name, kn->ns);
426
	kn->parent = parent;
427
	kernfs_get(parent);
428

429
	ret = kernfs_link_sibling(kn);
430 431 432 433
	if (ret)
		return ret;

	/* Update timestamps on the parent */
434
	ps_iattr = parent->iattr;
435 436 437 438 439 440
	if (ps_iattr) {
		struct iattr *ps_iattrs = &ps_iattr->ia_iattr;
		ps_iattrs->ia_ctime = ps_iattrs->ia_mtime = CURRENT_TIME;
	}

	/* Mark the entry added into directory tree */
T
Tejun Heo 已提交
441
	kn->flags &= ~KERNFS_REMOVED;
442 443 444 445 446

	return 0;
}

/**
447
 *	kernfs_remove_one - remove kernfs_node from parent
448
 *	@acxt: addrm context to use
449
 *	@kn: kernfs_node to be removed
450
 *
451 452
 *	Mark @kn removed and drop nlink of parent inode if @kn is a
 *	directory.  @kn is unlinked from the children list.
453 454
 *
 *	This function should be called between calls to
455 456
 *	kernfs_addrm_start() and kernfs_addrm_finish() and should be
 *	passed the same @acxt as passed to kernfs_addrm_start().
457 458
 *
 *	LOCKING:
459
 *	Determined by kernfs_addrm_start().
460
 */
461 462
static void kernfs_remove_one(struct kernfs_addrm_cxt *acxt,
			      struct kernfs_node *kn)
463
{
464
	struct kernfs_iattrs *ps_iattr;
465 466 467 468 469

	/*
	 * Removal can be called multiple times on the same node.  Only the
	 * first invocation is effective and puts the base ref.
	 */
T
Tejun Heo 已提交
470
	if (kn->flags & KERNFS_REMOVED)
471 472
		return;

473
	if (kn->parent) {
474
		kernfs_unlink_sibling(kn);
475

476
		/* Update timestamps on the parent */
477
		ps_iattr = kn->parent->iattr;
478 479 480 481
		if (ps_iattr) {
			ps_iattr->ia_iattr.ia_ctime = CURRENT_TIME;
			ps_iattr->ia_iattr.ia_mtime = CURRENT_TIME;
		}
482 483
	}

T
Tejun Heo 已提交
484
	kn->flags |= KERNFS_REMOVED;
485 486
	kn->u.removed_list = acxt->removed;
	acxt->removed = kn;
487 488 489
}

/**
490
 *	kernfs_addrm_finish - finish up kernfs_node add/remove
491 492
 *	@acxt: addrm context to finish up
 *
493
 *	Finish up kernfs_node add/remove.  Resources acquired by
494
 *	kernfs_addrm_start() are released and removed kernfs_nodes are
495 496 497
 *	cleaned up.
 *
 *	LOCKING:
498
 *	kernfs_mutex is released.
499
 */
500
void kernfs_addrm_finish(struct kernfs_addrm_cxt *acxt)
501
	__releases(kernfs_mutex)
502
{
503
	/* release resources acquired by kernfs_addrm_start() */
504
	mutex_unlock(&kernfs_mutex);
505

506
	/* kill removed kernfs_nodes */
507
	while (acxt->removed) {
508
		struct kernfs_node *kn = acxt->removed;
509

510
		acxt->removed = kn->u.removed_list;
511

512 513
		kernfs_deactivate(kn);
		kernfs_unmap_bin_file(kn);
514
		kernfs_put(kn);
515 516 517 518
	}
}

/**
519 520
 * kernfs_find_ns - find kernfs_node with the given name
 * @parent: kernfs_node to search under
521 522 523
 * @name: name to look for
 * @ns: the namespace tag to use
 *
524 525
 * Look for kernfs_node with name @name under @parent.  Returns pointer to
 * the found kernfs_node on success, %NULL on failure.
526
 */
527 528 529
static struct kernfs_node *kernfs_find_ns(struct kernfs_node *parent,
					  const unsigned char *name,
					  const void *ns)
530
{
531
	struct rb_node *node = parent->dir.children.rb_node;
532
	bool has_ns = kernfs_ns_enabled(parent);
533 534
	unsigned int hash;

535
	lockdep_assert_held(&kernfs_mutex);
536 537

	if (has_ns != (bool)ns) {
538
		WARN(1, KERN_WARNING "kernfs: ns %s in '%s' for '%s'\n",
539
		     has_ns ? "required" : "invalid", parent->name, name);
540 541 542
		return NULL;
	}

543
	hash = kernfs_name_hash(name, ns);
544
	while (node) {
545
		struct kernfs_node *kn;
546 547
		int result;

548
		kn = rb_to_kn(node);
549
		result = kernfs_name_compare(hash, name, ns, kn);
550 551 552 553 554
		if (result < 0)
			node = node->rb_left;
		else if (result > 0)
			node = node->rb_right;
		else
555
			return kn;
556 557 558 559 560
	}
	return NULL;
}

/**
561 562
 * kernfs_find_and_get_ns - find and get kernfs_node with the given name
 * @parent: kernfs_node to search under
563 564 565
 * @name: name to look for
 * @ns: the namespace tag to use
 *
566
 * Look for kernfs_node with name @name under @parent and get a reference
567
 * if found.  This function may sleep and returns pointer to the found
568
 * kernfs_node on success, %NULL on failure.
569
 */
570 571
struct kernfs_node *kernfs_find_and_get_ns(struct kernfs_node *parent,
					   const char *name, const void *ns)
572
{
573
	struct kernfs_node *kn;
574

575
	mutex_lock(&kernfs_mutex);
576 577
	kn = kernfs_find_ns(parent, name, ns);
	kernfs_get(kn);
578
	mutex_unlock(&kernfs_mutex);
579

580
	return kn;
581 582 583
}
EXPORT_SYMBOL_GPL(kernfs_find_and_get_ns);

584 585
/**
 * kernfs_create_root - create a new kernfs hierarchy
T
Tejun Heo 已提交
586
 * @kdops: optional directory syscall operations for the hierarchy
587 588 589 590 591
 * @priv: opaque data associated with the new directory
 *
 * Returns the root of the new hierarchy on success, ERR_PTR() value on
 * failure.
 */
T
Tejun Heo 已提交
592
struct kernfs_root *kernfs_create_root(struct kernfs_dir_ops *kdops, void *priv)
593 594
{
	struct kernfs_root *root;
595
	struct kernfs_node *kn;
596 597 598 599 600

	root = kzalloc(sizeof(*root), GFP_KERNEL);
	if (!root)
		return ERR_PTR(-ENOMEM);

601 602
	ida_init(&root->ino_ida);

603
	kn = kernfs_new_node(root, "", S_IFDIR | S_IRUGO | S_IXUGO, KERNFS_DIR);
604
	if (!kn) {
605
		ida_destroy(&root->ino_ida);
606 607 608 609
		kfree(root);
		return ERR_PTR(-ENOMEM);
	}

T
Tejun Heo 已提交
610
	kn->flags &= ~KERNFS_REMOVED;
611
	kn->priv = priv;
612
	kn->dir.root = root;
613

T
Tejun Heo 已提交
614
	root->dir_ops = kdops;
615
	root->kn = kn;
616 617 618 619 620 621 622 623 624 625 626 627 628

	return root;
}

/**
 * kernfs_destroy_root - destroy a kernfs hierarchy
 * @root: root of the hierarchy to destroy
 *
 * Destroy the hierarchy anchored at @root by removing all existing
 * directories and destroying @root.
 */
void kernfs_destroy_root(struct kernfs_root *root)
{
629
	kernfs_remove(root->kn);	/* will also free @root */
630 631
}

632 633 634 635
/**
 * kernfs_create_dir_ns - create a directory
 * @parent: parent in which to create a new directory
 * @name: name of the new directory
636
 * @mode: mode of the new directory
637 638 639 640 641
 * @priv: opaque data associated with the new directory
 * @ns: optional namespace tag of the directory
 *
 * Returns the created node on success, ERR_PTR() value on failure.
 */
642
struct kernfs_node *kernfs_create_dir_ns(struct kernfs_node *parent,
643 644
					 const char *name, umode_t mode,
					 void *priv, const void *ns)
645
{
646
	struct kernfs_addrm_cxt acxt;
647
	struct kernfs_node *kn;
648 649 650
	int rc;

	/* allocate */
651 652
	kn = kernfs_new_node(kernfs_root(parent), name, mode | S_IFDIR,
			     KERNFS_DIR);
653
	if (!kn)
654 655
		return ERR_PTR(-ENOMEM);

656 657
	kn->dir.root = parent->dir.root;
	kn->ns = ns;
658
	kn->priv = priv;
659 660

	/* link in */
661 662 663
	kernfs_addrm_start(&acxt);
	rc = kernfs_add_one(&acxt, kn, parent);
	kernfs_addrm_finish(&acxt);
664 665

	if (!rc)
666
		return kn;
667

668
	kernfs_put(kn);
669 670 671
	return ERR_PTR(rc);
}

672 673 674
static struct dentry *kernfs_iop_lookup(struct inode *dir,
					struct dentry *dentry,
					unsigned int flags)
675
{
T
Tejun Heo 已提交
676
	struct dentry *ret;
677 678
	struct kernfs_node *parent = dentry->d_parent->d_fsdata;
	struct kernfs_node *kn;
679 680 681
	struct inode *inode;
	const void *ns = NULL;

682
	mutex_lock(&kernfs_mutex);
683

684
	if (kernfs_ns_enabled(parent))
685
		ns = kernfs_info(dir->i_sb)->ns;
686

687
	kn = kernfs_find_ns(parent, dentry->d_name.name, ns);
688 689

	/* no such entry */
690
	if (!kn) {
T
Tejun Heo 已提交
691
		ret = NULL;
692 693
		goto out_unlock;
	}
694 695
	kernfs_get(kn);
	dentry->d_fsdata = kn;
696 697

	/* attach dentry and inode */
698
	inode = kernfs_get_inode(dir->i_sb, kn);
699 700 701 702 703 704 705 706
	if (!inode) {
		ret = ERR_PTR(-ENOMEM);
		goto out_unlock;
	}

	/* instantiate and hash dentry */
	ret = d_materialise_unique(dentry, inode);
 out_unlock:
707
	mutex_unlock(&kernfs_mutex);
708 709 710
	return ret;
}

T
Tejun Heo 已提交
711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746
static int kernfs_iop_mkdir(struct inode *dir, struct dentry *dentry,
			    umode_t mode)
{
	struct kernfs_node *parent = dir->i_private;
	struct kernfs_dir_ops *kdops = kernfs_root(parent)->dir_ops;

	if (!kdops || !kdops->mkdir)
		return -EPERM;

	return kdops->mkdir(parent, dentry->d_name.name, mode);
}

static int kernfs_iop_rmdir(struct inode *dir, struct dentry *dentry)
{
	struct kernfs_node *kn  = dentry->d_fsdata;
	struct kernfs_dir_ops *kdops = kernfs_root(kn)->dir_ops;

	if (!kdops || !kdops->rmdir)
		return -EPERM;

	return kdops->rmdir(kn);
}

static int kernfs_iop_rename(struct inode *old_dir, struct dentry *old_dentry,
			     struct inode *new_dir, struct dentry *new_dentry)
{
	struct kernfs_node *kn  = old_dentry->d_fsdata;
	struct kernfs_node *new_parent = new_dir->i_private;
	struct kernfs_dir_ops *kdops = kernfs_root(kn)->dir_ops;

	if (!kdops || !kdops->rename)
		return -EPERM;

	return kdops->rename(kn, new_parent, new_dentry->d_name.name);
}

747
const struct inode_operations kernfs_dir_iops = {
748 749 750 751 752 753 754 755
	.lookup		= kernfs_iop_lookup,
	.permission	= kernfs_iop_permission,
	.setattr	= kernfs_iop_setattr,
	.getattr	= kernfs_iop_getattr,
	.setxattr	= kernfs_iop_setxattr,
	.removexattr	= kernfs_iop_removexattr,
	.getxattr	= kernfs_iop_getxattr,
	.listxattr	= kernfs_iop_listxattr,
T
Tejun Heo 已提交
756 757 758 759

	.mkdir		= kernfs_iop_mkdir,
	.rmdir		= kernfs_iop_rmdir,
	.rename		= kernfs_iop_rename,
760 761
};

762
static struct kernfs_node *kernfs_leftmost_descendant(struct kernfs_node *pos)
763
{
764
	struct kernfs_node *last;
765 766 767 768 769 770

	while (true) {
		struct rb_node *rbn;

		last = pos;

T
Tejun Heo 已提交
771
		if (kernfs_type(pos) != KERNFS_DIR)
772 773
			break;

774
		rbn = rb_first(&pos->dir.children);
775 776 777
		if (!rbn)
			break;

778
		pos = rb_to_kn(rbn);
779 780 781 782 783 784
	}

	return last;
}

/**
785
 * kernfs_next_descendant_post - find the next descendant for post-order walk
786
 * @pos: the current position (%NULL to initiate traversal)
787
 * @root: kernfs_node whose descendants to walk
788 789 790 791 792
 *
 * Find the next descendant to visit for post-order traversal of @root's
 * descendants.  @root is included in the iteration and the last node to be
 * visited.
 */
793 794
static struct kernfs_node *kernfs_next_descendant_post(struct kernfs_node *pos,
						       struct kernfs_node *root)
795 796 797
{
	struct rb_node *rbn;

798
	lockdep_assert_held(&kernfs_mutex);
799 800 801

	/* if first iteration, visit leftmost descendant which may be root */
	if (!pos)
802
		return kernfs_leftmost_descendant(root);
803 804 805 806 807 808

	/* if we visited @root, we're done */
	if (pos == root)
		return NULL;

	/* if there's an unvisited sibling, visit its leftmost descendant */
809
	rbn = rb_next(&pos->rb);
810
	if (rbn)
811
		return kernfs_leftmost_descendant(rb_to_kn(rbn));
812 813

	/* no sibling left, visit parent */
814
	return pos->parent;
815 816
}

817
static void __kernfs_remove(struct kernfs_addrm_cxt *acxt,
818
			    struct kernfs_node *kn)
819
{
820
	struct kernfs_node *pos, *next;
821

822
	if (!kn)
823 824
		return;

825
	pr_debug("kernfs %s: removing\n", kn->name);
826 827 828 829

	next = NULL;
	do {
		pos = next;
830
		next = kernfs_next_descendant_post(pos, kn);
831
		if (pos)
832
			kernfs_remove_one(acxt, pos);
833 834 835 836
	} while (next);
}

/**
837 838
 * kernfs_remove - remove a kernfs_node recursively
 * @kn: the kernfs_node to remove
839
 *
840
 * Remove @kn along with all its subdirectories and files.
841
 */
842
void kernfs_remove(struct kernfs_node *kn)
843
{
844
	struct kernfs_addrm_cxt acxt;
845

846
	kernfs_addrm_start(&acxt);
847
	__kernfs_remove(&acxt, kn);
848
	kernfs_addrm_finish(&acxt);
849 850 851
}

/**
852 853 854 855
 * kernfs_remove_by_name_ns - find a kernfs_node by name and remove it
 * @parent: parent of the target
 * @name: name of the kernfs_node to remove
 * @ns: namespace tag of the kernfs_node to remove
856
 *
857 858
 * Look for the kernfs_node with @name and @ns under @parent and remove it.
 * Returns 0 on success, -ENOENT if such entry doesn't exist.
859
 */
860
int kernfs_remove_by_name_ns(struct kernfs_node *parent, const char *name,
861 862
			     const void *ns)
{
863
	struct kernfs_addrm_cxt acxt;
864
	struct kernfs_node *kn;
865

866
	if (!parent) {
867
		WARN(1, KERN_WARNING "kernfs: can not remove '%s', no directory\n",
868 869 870 871
			name);
		return -ENOENT;
	}

872
	kernfs_addrm_start(&acxt);
873

874 875 876
	kn = kernfs_find_ns(parent, name, ns);
	if (kn)
		__kernfs_remove(&acxt, kn);
877

878
	kernfs_addrm_finish(&acxt);
879

880
	if (kn)
881 882 883 884 885 886 887
		return 0;
	else
		return -ENOENT;
}

/**
 * kernfs_rename_ns - move and rename a kernfs_node
888
 * @kn: target node
889 890 891 892
 * @new_parent: new parent to put @sd under
 * @new_name: new name
 * @new_ns: new namespace tag
 */
893
int kernfs_rename_ns(struct kernfs_node *kn, struct kernfs_node *new_parent,
894 895 896 897
		     const char *new_name, const void *new_ns)
{
	int error;

898
	mutex_lock(&kernfs_mutex);
899

900 901 902 903
	error = -ENOENT;
	if ((kn->flags | new_parent->flags) & KERNFS_REMOVED)
		goto out;

904
	error = 0;
905 906
	if ((kn->parent == new_parent) && (kn->ns == new_ns) &&
	    (strcmp(kn->name, new_name) == 0))
907 908 909 910 911 912
		goto out;	/* nothing to rename */

	error = -EEXIST;
	if (kernfs_find_ns(new_parent, new_name, new_ns))
		goto out;

913
	/* rename kernfs_node */
914
	if (strcmp(kn->name, new_name) != 0) {
915 916 917 918 919
		error = -ENOMEM;
		new_name = kstrdup(new_name, GFP_KERNEL);
		if (!new_name)
			goto out;

920 921 922 923 924
		if (kn->flags & KERNFS_STATIC_NAME)
			kn->flags &= ~KERNFS_STATIC_NAME;
		else
			kfree(kn->name);

925
		kn->name = new_name;
926 927 928 929 930
	}

	/*
	 * Move to the appropriate place in the appropriate directories rbtree.
	 */
931
	kernfs_unlink_sibling(kn);
932
	kernfs_get(new_parent);
933 934
	kernfs_put(kn->parent);
	kn->ns = new_ns;
935
	kn->hash = kernfs_name_hash(kn->name, kn->ns);
936
	kn->parent = new_parent;
937
	kernfs_link_sibling(kn);
938 939 940

	error = 0;
 out:
941
	mutex_unlock(&kernfs_mutex);
942 943 944 945
	return error;
}

/* Relationship between s_mode and the DT_xxx types */
946
static inline unsigned char dt_type(struct kernfs_node *kn)
947
{
948
	return (kn->mode >> 12) & 15;
949 950
}

951
static int kernfs_dir_fop_release(struct inode *inode, struct file *filp)
952 953 954 955 956
{
	kernfs_put(filp->private_data);
	return 0;
}

957
static struct kernfs_node *kernfs_dir_pos(const void *ns,
958
	struct kernfs_node *parent, loff_t hash, struct kernfs_node *pos)
959 960
{
	if (pos) {
T
Tejun Heo 已提交
961
		int valid = !(pos->flags & KERNFS_REMOVED) &&
962
			pos->parent == parent && hash == pos->hash;
963 964 965 966 967
		kernfs_put(pos);
		if (!valid)
			pos = NULL;
	}
	if (!pos && (hash > 1) && (hash < INT_MAX)) {
968
		struct rb_node *node = parent->dir.children.rb_node;
969
		while (node) {
970
			pos = rb_to_kn(node);
971

972
			if (hash < pos->hash)
973
				node = node->rb_left;
974
			else if (hash > pos->hash)
975 976 977 978 979 980
				node = node->rb_right;
			else
				break;
		}
	}
	/* Skip over entries in the wrong namespace */
981 982
	while (pos && pos->ns != ns) {
		struct rb_node *node = rb_next(&pos->rb);
983 984 985
		if (!node)
			pos = NULL;
		else
986
			pos = rb_to_kn(node);
987 988 989 990
	}
	return pos;
}

991
static struct kernfs_node *kernfs_dir_next_pos(const void *ns,
992
	struct kernfs_node *parent, ino_t ino, struct kernfs_node *pos)
993
{
994
	pos = kernfs_dir_pos(ns, parent, ino, pos);
995 996
	if (pos)
		do {
997
			struct rb_node *node = rb_next(&pos->rb);
998 999 1000
			if (!node)
				pos = NULL;
			else
1001
				pos = rb_to_kn(node);
1002
		} while (pos && pos->ns != ns);
1003 1004 1005
	return pos;
}

1006
static int kernfs_fop_readdir(struct file *file, struct dir_context *ctx)
1007 1008
{
	struct dentry *dentry = file->f_path.dentry;
1009 1010
	struct kernfs_node *parent = dentry->d_fsdata;
	struct kernfs_node *pos = file->private_data;
1011 1012 1013 1014
	const void *ns = NULL;

	if (!dir_emit_dots(file, ctx))
		return 0;
1015
	mutex_lock(&kernfs_mutex);
1016

1017
	if (kernfs_ns_enabled(parent))
1018
		ns = kernfs_info(dentry->d_sb)->ns;
1019

1020
	for (pos = kernfs_dir_pos(ns, parent, ctx->pos, pos);
1021
	     pos;
1022
	     pos = kernfs_dir_next_pos(ns, parent, ctx->pos, pos)) {
1023
		const char *name = pos->name;
1024 1025
		unsigned int type = dt_type(pos);
		int len = strlen(name);
1026
		ino_t ino = pos->ino;
1027

1028
		ctx->pos = pos->hash;
1029 1030 1031
		file->private_data = pos;
		kernfs_get(pos);

1032
		mutex_unlock(&kernfs_mutex);
1033 1034
		if (!dir_emit(ctx, name, len, ino, type))
			return 0;
1035
		mutex_lock(&kernfs_mutex);
1036
	}
1037
	mutex_unlock(&kernfs_mutex);
1038 1039 1040 1041 1042
	file->private_data = NULL;
	ctx->pos = INT_MAX;
	return 0;
}

1043 1044
static loff_t kernfs_dir_fop_llseek(struct file *file, loff_t offset,
				    int whence)
1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055
{
	struct inode *inode = file_inode(file);
	loff_t ret;

	mutex_lock(&inode->i_mutex);
	ret = generic_file_llseek(file, offset, whence);
	mutex_unlock(&inode->i_mutex);

	return ret;
}

1056
const struct file_operations kernfs_dir_fops = {
1057
	.read		= generic_read_dir,
1058 1059 1060
	.iterate	= kernfs_fop_readdir,
	.release	= kernfs_dir_fop_release,
	.llseek		= kernfs_dir_fop_llseek,
1061
};