dir.c 23.3 KB
Newer Older
L
Linus Torvalds 已提交
1
/*
T
Tejun Heo 已提交
2 3 4 5 6 7 8 9 10
 * fs/sysfs/dir.c - sysfs core and dir operation implementation
 *
 * Copyright (c) 2001-3 Patrick Mochel
 * Copyright (c) 2007 SUSE Linux Products GmbH
 * Copyright (c) 2007 Tejun Heo <teheo@suse.de>
 *
 * This file is released under the GPLv2.
 *
 * Please see Documentation/filesystems/sysfs.txt for more information.
L
Linus Torvalds 已提交
11 12 13 14 15 16 17 18
 */

#undef DEBUG

#include <linux/fs.h>
#include <linux/mount.h>
#include <linux/module.h>
#include <linux/kobject.h>
19
#include <linux/namei.h>
20
#include <linux/idr.h>
21
#include <linux/completion.h>
D
Dave Young 已提交
22
#include <linux/mutex.h>
23
#include <linux/slab.h>
24
#include <linux/security.h>
L
Linus Torvalds 已提交
25 26
#include "sysfs.h"

27
DEFINE_MUTEX(sysfs_mutex);
28
DEFINE_MUTEX(sysfs_rename_mutex);
R
Roel Kluin 已提交
29
DEFINE_SPINLOCK(sysfs_assoc_lock);
L
Linus Torvalds 已提交
30

R
Roel Kluin 已提交
31
static DEFINE_SPINLOCK(sysfs_ino_lock);
32 33
static DEFINE_IDA(sysfs_ino_ida);

34 35 36 37 38
/**
 *	sysfs_link_sibling - link sysfs_dirent into sibling list
 *	@sd: sysfs_dirent of interest
 *
 *	Link @sd into its sibling list which starts from
39
 *	sd->s_parent->s_dir.children.
40 41
 *
 *	Locking:
42
 *	mutex_lock(sysfs_mutex)
43
 */
44
static void sysfs_link_sibling(struct sysfs_dirent *sd)
45 46
{
	struct sysfs_dirent *parent_sd = sd->s_parent;
E
Eric W. Biederman 已提交
47
	struct sysfs_dirent **pos;
48 49

	BUG_ON(sd->s_sibling);
E
Eric W. Biederman 已提交
50 51 52

	/* Store directory entries in order by ino.  This allows
	 * readdir to properly restart without having to add a
53
	 * cursor into the s_dir.children list.
E
Eric W. Biederman 已提交
54
	 */
55
	for (pos = &parent_sd->s_dir.children; *pos; pos = &(*pos)->s_sibling) {
E
Eric W. Biederman 已提交
56 57 58 59 60
		if (sd->s_ino < (*pos)->s_ino)
			break;
	}
	sd->s_sibling = *pos;
	*pos = sd;
61 62 63 64 65 66 67
}

/**
 *	sysfs_unlink_sibling - unlink sysfs_dirent from sibling list
 *	@sd: sysfs_dirent of interest
 *
 *	Unlink @sd from its sibling list which starts from
68
 *	sd->s_parent->s_dir.children.
69 70
 *
 *	Locking:
71
 *	mutex_lock(sysfs_mutex)
72
 */
73
static void sysfs_unlink_sibling(struct sysfs_dirent *sd)
74 75 76
{
	struct sysfs_dirent **pos;

77 78
	for (pos = &sd->s_parent->s_dir.children; *pos;
	     pos = &(*pos)->s_sibling) {
79 80 81 82 83 84 85 86
		if (*pos == sd) {
			*pos = sd->s_sibling;
			sd->s_sibling = NULL;
			break;
		}
	}
}

T
Tejun Heo 已提交
87 88 89 90 91
/**
 *	sysfs_get_dentry - get dentry for the given sysfs_dirent
 *	@sd: sysfs_dirent of interest
 *
 *	Get dentry for @sd.  Dentry is looked up if currently not
T
Tejun Heo 已提交
92 93
 *	present.  This function descends from the root looking up
 *	dentry for each step.
T
Tejun Heo 已提交
94 95
 *
 *	LOCKING:
96
 *	mutex_lock(sysfs_rename_mutex)
T
Tejun Heo 已提交
97 98 99 100 101 102
 *
 *	RETURNS:
 *	Pointer to found dentry on success, ERR_PTR() value on error.
 */
struct dentry *sysfs_get_dentry(struct sysfs_dirent *sd)
{
T
Tejun Heo 已提交
103
	struct dentry *dentry = dget(sysfs_sb->s_root);
T
Tejun Heo 已提交
104

T
Tejun Heo 已提交
105 106 107
	while (dentry->d_fsdata != sd) {
		struct sysfs_dirent *cur;
		struct dentry *parent;
T
Tejun Heo 已提交
108

T
Tejun Heo 已提交
109 110 111
		/* find the first ancestor which hasn't been looked up */
		cur = sd;
		while (cur->s_parent != dentry->d_fsdata)
T
Tejun Heo 已提交
112 113 114
			cur = cur->s_parent;

		/* look it up */
T
Tejun Heo 已提交
115 116
		parent = dentry;
		mutex_lock(&parent->d_inode->i_mutex);
117
		dentry = lookup_one_noperm(cur->s_name, parent);
T
Tejun Heo 已提交
118 119
		mutex_unlock(&parent->d_inode->i_mutex);
		dput(parent);
T
Tejun Heo 已提交
120

T
Tejun Heo 已提交
121 122
		if (IS_ERR(dentry))
			break;
T
Tejun Heo 已提交
123 124 125 126
	}
	return dentry;
}

127 128 129 130 131 132 133 134 135 136
/**
 *	sysfs_get_active - get an active reference to sysfs_dirent
 *	@sd: sysfs_dirent to get an active reference to
 *
 *	Get an active reference of @sd.  This function is noop if @sd
 *	is NULL.
 *
 *	RETURNS:
 *	Pointer to @sd on success, NULL on failure.
 */
137
static struct sysfs_dirent *sysfs_get_active(struct sysfs_dirent *sd)
138
{
139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155
	if (unlikely(!sd))
		return NULL;

	while (1) {
		int v, t;

		v = atomic_read(&sd->s_active);
		if (unlikely(v < 0))
			return NULL;

		t = atomic_cmpxchg(&sd->s_active, v, v + 1);
		if (likely(t == v))
			return sd;
		if (t < 0)
			return NULL;

		cpu_relax();
156 157 158 159 160 161 162 163 164 165
	}
}

/**
 *	sysfs_put_active - put an active reference to sysfs_dirent
 *	@sd: sysfs_dirent to put an active reference to
 *
 *	Put an active reference to @sd.  This function is noop if @sd
 *	is NULL.
 */
166
static void sysfs_put_active(struct sysfs_dirent *sd)
167
{
168 169 170 171 172 173 174 175 176 177 178
	struct completion *cmpl;
	int v;

	if (unlikely(!sd))
		return;

	v = atomic_dec_return(&sd->s_active);
	if (likely(v != SD_DEACTIVATED_BIAS))
		return;

	/* atomic_dec_return() is a mb(), we'll always see the updated
179
	 * sd->s_sibling.
180
	 */
181
	cmpl = (void *)sd->s_sibling;
182
	complete(cmpl);
183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227
}

/**
 *	sysfs_get_active_two - get active references to sysfs_dirent and parent
 *	@sd: sysfs_dirent of interest
 *
 *	Get active reference to @sd and its parent.  Parent's active
 *	reference is grabbed first.  This function is noop if @sd is
 *	NULL.
 *
 *	RETURNS:
 *	Pointer to @sd on success, NULL on failure.
 */
struct sysfs_dirent *sysfs_get_active_two(struct sysfs_dirent *sd)
{
	if (sd) {
		if (sd->s_parent && unlikely(!sysfs_get_active(sd->s_parent)))
			return NULL;
		if (unlikely(!sysfs_get_active(sd))) {
			sysfs_put_active(sd->s_parent);
			return NULL;
		}
	}
	return sd;
}

/**
 *	sysfs_put_active_two - put active references to sysfs_dirent and parent
 *	@sd: sysfs_dirent of interest
 *
 *	Put active references to @sd and its parent.  This function is
 *	noop if @sd is NULL.
 */
void sysfs_put_active_two(struct sysfs_dirent *sd)
{
	if (sd) {
		sysfs_put_active(sd);
		sysfs_put_active(sd->s_parent);
	}
}

/**
 *	sysfs_deactivate - deactivate sysfs_dirent
 *	@sd: sysfs_dirent to deactivate
 *
228
 *	Deny new active references and drain existing ones.
229
 */
230
static void sysfs_deactivate(struct sysfs_dirent *sd)
231
{
232 233
	DECLARE_COMPLETION_ONSTACK(wait);
	int v;
234

235
	BUG_ON(sd->s_sibling || !(sd->s_flags & SYSFS_FLAG_REMOVED));
236
	sd->s_sibling = (void *)&wait;
237 238

	/* atomic_add_return() is a mb(), put_active() will always see
239
	 * the updated sd->s_sibling.
240
	 */
241 242 243 244 245
	v = atomic_add_return(SD_DEACTIVATED_BIAS, &sd->s_active);

	if (v != SD_DEACTIVATED_BIAS)
		wait_for_completion(&wait);

246
	sd->s_sibling = NULL;
247 248
}

T
Tejun Heo 已提交
249
static int sysfs_alloc_ino(ino_t *pino)
250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274
{
	int ino, rc;

 retry:
	spin_lock(&sysfs_ino_lock);
	rc = ida_get_new_above(&sysfs_ino_ida, 2, &ino);
	spin_unlock(&sysfs_ino_lock);

	if (rc == -EAGAIN) {
		if (ida_pre_get(&sysfs_ino_ida, GFP_KERNEL))
			goto retry;
		rc = -ENOMEM;
	}

	*pino = ino;
	return rc;
}

static void sysfs_free_ino(ino_t ino)
{
	spin_lock(&sysfs_ino_lock);
	ida_remove(&sysfs_ino_ida, ino);
	spin_unlock(&sysfs_ino_lock);
}

275 276
void release_sysfs_dirent(struct sysfs_dirent * sd)
{
T
Tejun Heo 已提交
277 278 279
	struct sysfs_dirent *parent_sd;

 repeat:
280 281 282
	/* Moving/renaming is always done while holding reference.
	 * sd->s_parent won't change beneath us.
	 */
T
Tejun Heo 已提交
283 284
	parent_sd = sd->s_parent;

285
	if (sysfs_type(sd) == SYSFS_KOBJ_LINK)
T
Tejun Heo 已提交
286
		sysfs_put(sd->s_symlink.target_sd);
287
	if (sysfs_type(sd) & SYSFS_COPY_NAME)
T
Tejun Heo 已提交
288
		kfree(sd->s_name);
289 290 291
	if (sd->s_iattr && sd->s_iattr->ia_secdata)
		security_release_secctx(sd->s_iattr->ia_secdata,
					sd->s_iattr->ia_secdata_len);
292
	kfree(sd->s_iattr);
293
	sysfs_free_ino(sd->s_ino);
294
	kmem_cache_free(sysfs_dir_cachep, sd);
T
Tejun Heo 已提交
295 296 297 298

	sd = parent_sd;
	if (sd && atomic_dec_and_test(&sd->s_count))
		goto repeat;
299 300
}

301
static void sysfs_dentry_iput(struct dentry *dentry, struct inode *inode)
L
Linus Torvalds 已提交
302 303 304
{
	struct sysfs_dirent * sd = dentry->d_fsdata;

E
Eric W. Biederman 已提交
305
	sysfs_put(sd);
L
Linus Torvalds 已提交
306 307 308
	iput(inode);
}

A
Al Viro 已提交
309
static const struct dentry_operations sysfs_dentry_ops = {
310
	.d_iput		= sysfs_dentry_iput,
L
Linus Torvalds 已提交
311 312
};

313
struct sysfs_dirent *sysfs_new_dirent(const char *name, umode_t mode, int type)
L
Linus Torvalds 已提交
314
{
T
Tejun Heo 已提交
315
	char *dup_name = NULL;
316
	struct sysfs_dirent *sd;
T
Tejun Heo 已提交
317 318 319 320

	if (type & SYSFS_COPY_NAME) {
		name = dup_name = kstrdup(name, GFP_KERNEL);
		if (!name)
321
			return NULL;
T
Tejun Heo 已提交
322
	}
L
Linus Torvalds 已提交
323

324
	sd = kmem_cache_zalloc(sysfs_dir_cachep, GFP_KERNEL);
L
Linus Torvalds 已提交
325
	if (!sd)
326
		goto err_out1;
L
Linus Torvalds 已提交
327

T
Tejun Heo 已提交
328
	if (sysfs_alloc_ino(&sd->s_ino))
329
		goto err_out2;
330

L
Linus Torvalds 已提交
331
	atomic_set(&sd->s_count, 1);
332
	atomic_set(&sd->s_active, 0);
333

T
Tejun Heo 已提交
334
	sd->s_name = name;
335
	sd->s_mode = mode;
336
	sd->s_flags = type;
L
Linus Torvalds 已提交
337 338

	return sd;
T
Tejun Heo 已提交
339

340
 err_out2:
T
Tejun Heo 已提交
341
	kmem_cache_free(sysfs_dir_cachep, sd);
342 343
 err_out1:
	kfree(dup_name);
T
Tejun Heo 已提交
344
	return NULL;
L
Linus Torvalds 已提交
345 346
}

347 348 349 350 351 352
static int sysfs_ilookup_test(struct inode *inode, void *arg)
{
	struct sysfs_dirent *sd = arg;
	return inode->i_ino == sd->s_ino;
}

353
/**
354 355 356
 *	sysfs_addrm_start - prepare for sysfs_dirent add/remove
 *	@acxt: pointer to sysfs_addrm_cxt to be used
 *	@parent_sd: parent sysfs_dirent
357
 *
358 359 360 361 362
 *	This function is called when the caller is about to add or
 *	remove sysfs_dirent under @parent_sd.  This function acquires
 *	sysfs_mutex, grabs inode for @parent_sd if available and lock
 *	i_mutex of it.  @acxt is used to keep and pass context to
 *	other addrm functions.
363 364
 *
 *	LOCKING:
365 366 367
 *	Kernel thread context (may sleep).  sysfs_mutex is locked on
 *	return.  i_mutex of parent inode is locked on return if
 *	available.
368
 */
369 370
void sysfs_addrm_start(struct sysfs_addrm_cxt *acxt,
		       struct sysfs_dirent *parent_sd)
371
{
372
	struct inode *inode;
373

374 375 376
	memset(acxt, 0, sizeof(*acxt));
	acxt->parent_sd = parent_sd;

377 378 379
	/* Lookup parent inode.  inode initialization is protected by
	 * sysfs_mutex, so inode existence can be determined by
	 * looking up inode while holding sysfs_mutex.
380 381 382
	 */
	mutex_lock(&sysfs_mutex);

383 384 385 386
	inode = ilookup5(sysfs_sb, parent_sd->s_ino, sysfs_ilookup_test,
			 parent_sd);
	if (inode) {
		WARN_ON(inode->i_state & I_NEW);
387 388 389 390 391 392 393 394 395 396 397 398 399

		/* parent inode available */
		acxt->parent_inode = inode;

		/* sysfs_mutex is below i_mutex in lock hierarchy.
		 * First, trylock i_mutex.  If fails, unlock
		 * sysfs_mutex and lock them in order.
		 */
		if (!mutex_trylock(&inode->i_mutex)) {
			mutex_unlock(&sysfs_mutex);
			mutex_lock(&inode->i_mutex);
			mutex_lock(&sysfs_mutex);
		}
400
	}
401 402 403
}

/**
404
 *	__sysfs_add_one - add sysfs_dirent to parent without warning
405 406 407 408
 *	@acxt: addrm context to use
 *	@sd: sysfs_dirent to be added
 *
 *	Get @acxt->parent_sd and set sd->s_parent to it and increment
409 410
 *	nlink of parent inode if @sd is a directory and link into the
 *	children list of the parent.
411 412 413 414 415 416 417
 *
 *	This function should be called between calls to
 *	sysfs_addrm_start() and sysfs_addrm_finish() and should be
 *	passed the same @acxt as passed to sysfs_addrm_start().
 *
 *	LOCKING:
 *	Determined by sysfs_addrm_start().
418 419 420 421
 *
 *	RETURNS:
 *	0 on success, -EEXIST if entry with the given name already
 *	exists.
422
 */
423
int __sysfs_add_one(struct sysfs_addrm_cxt *acxt, struct sysfs_dirent *sd)
424
{
425
	if (sysfs_find_dirent(acxt->parent_sd, sd->s_name))
426 427
		return -EEXIST;

428 429 430 431 432 433
	sd->s_parent = sysfs_get(acxt->parent_sd);

	if (sysfs_type(sd) == SYSFS_DIR && acxt->parent_inode)
		inc_nlink(acxt->parent_inode);

	acxt->cnt++;
434 435

	sysfs_link_sibling(sd);
436 437

	return 0;
438 439
}

440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459
/**
 *	sysfs_pathname - return full path to sysfs dirent
 *	@sd: sysfs_dirent whose path we want
 *	@path: caller allocated buffer
 *
 *	Gives the name "/" to the sysfs_root entry; any path returned
 *	is relative to wherever sysfs is mounted.
 *
 *	XXX: does no error checking on @path size
 */
static char *sysfs_pathname(struct sysfs_dirent *sd, char *path)
{
	if (sd->s_parent) {
		sysfs_pathname(sd->s_parent, path);
		strcat(path, "/");
	}
	strcat(path, sd->s_name);
	return path;
}

460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484
/**
 *	sysfs_add_one - add sysfs_dirent to parent
 *	@acxt: addrm context to use
 *	@sd: sysfs_dirent to be added
 *
 *	Get @acxt->parent_sd and set sd->s_parent to it and increment
 *	nlink of parent inode if @sd is a directory and link into the
 *	children list of the parent.
 *
 *	This function should be called between calls to
 *	sysfs_addrm_start() and sysfs_addrm_finish() and should be
 *	passed the same @acxt as passed to sysfs_addrm_start().
 *
 *	LOCKING:
 *	Determined by sysfs_addrm_start().
 *
 *	RETURNS:
 *	0 on success, -EEXIST if entry with the given name already
 *	exists.
 */
int sysfs_add_one(struct sysfs_addrm_cxt *acxt, struct sysfs_dirent *sd)
{
	int ret;

	ret = __sysfs_add_one(acxt, sd);
485 486 487 488 489 490 491 492 493 494
	if (ret == -EEXIST) {
		char *path = kzalloc(PATH_MAX, GFP_KERNEL);
		WARN(1, KERN_WARNING
		     "sysfs: cannot create duplicate filename '%s'\n",
		     (path == NULL) ? sd->s_name :
		     strcat(strcat(sysfs_pathname(acxt->parent_sd, path), "/"),
		            sd->s_name));
		kfree(path);
	}

495 496 497
	return ret;
}

498 499 500
/**
 *	sysfs_remove_one - remove sysfs_dirent from parent
 *	@acxt: addrm context to use
501
 *	@sd: sysfs_dirent to be removed
502 503
 *
 *	Mark @sd removed and drop nlink of parent inode if @sd is a
504
 *	directory.  @sd is unlinked from the children list.
505 506 507 508 509 510 511 512 513 514
 *
 *	This function should be called between calls to
 *	sysfs_addrm_start() and sysfs_addrm_finish() and should be
 *	passed the same @acxt as passed to sysfs_addrm_start().
 *
 *	LOCKING:
 *	Determined by sysfs_addrm_start().
 */
void sysfs_remove_one(struct sysfs_addrm_cxt *acxt, struct sysfs_dirent *sd)
{
515 516 517
	BUG_ON(sd->s_flags & SYSFS_FLAG_REMOVED);

	sysfs_unlink_sibling(sd);
518 519 520 521 522 523 524 525 526 527 528

	sd->s_flags |= SYSFS_FLAG_REMOVED;
	sd->s_sibling = acxt->removed;
	acxt->removed = sd;

	if (sysfs_type(sd) == SYSFS_DIR && acxt->parent_inode)
		drop_nlink(acxt->parent_inode);

	acxt->cnt++;
}

529 530 531 532 533 534 535 536 537 538 539
/**
 *	sysfs_drop_dentry - drop dentry for the specified sysfs_dirent
 *	@sd: target sysfs_dirent
 *
 *	Drop dentry for @sd.  @sd must have been unlinked from its
 *	parent on entry to this function such that it can't be looked
 *	up anymore.
 */
static void sysfs_drop_dentry(struct sysfs_dirent *sd)
{
	struct inode *inode;
540 541 542 543 544
	struct dentry *dentry;

	inode = ilookup(sysfs_sb, sd->s_ino);
	if (!inode)
		return;
545

546 547 548 549 550 551
	/* Drop any existing dentries associated with sd.
	 *
	 * For the dentry to be properly freed we need to grab a
	 * reference to the dentry under the dcache lock,  unhash it,
	 * and then put it.  The playing with the dentry count allows
	 * dput to immediately free the dentry  if it is not in use.
552
	 */
553
repeat:
554
	spin_lock(&dcache_lock);
555 556 557 558
	list_for_each_entry(dentry, &inode->i_dentry, d_alias) {
		if (d_unhashed(dentry))
			continue;
		dget_locked(dentry);
559 560 561
		spin_lock(&dentry->d_lock);
		__d_drop(dentry);
		spin_unlock(&dentry->d_lock);
562 563 564
		spin_unlock(&dcache_lock);
		dput(dentry);
		goto repeat;
565 566 567 568
	}
	spin_unlock(&dcache_lock);

	/* adjust nlink and update timestamp */
569
	mutex_lock(&inode->i_mutex);
570

571 572 573
	inode->i_ctime = CURRENT_TIME;
	drop_nlink(inode);
	if (sysfs_type(sd) == SYSFS_DIR)
574 575
		drop_nlink(inode);

576 577 578
	mutex_unlock(&inode->i_mutex);

	iput(inode);
579 580
}

581 582 583 584 585 586 587 588 589 590 591
/**
 *	sysfs_addrm_finish - finish up sysfs_dirent add/remove
 *	@acxt: addrm context to finish up
 *
 *	Finish up sysfs_dirent add/remove.  Resources acquired by
 *	sysfs_addrm_start() are released and removed sysfs_dirents are
 *	cleaned up.  Timestamps on the parent inode are updated.
 *
 *	LOCKING:
 *	All mutexes acquired by sysfs_addrm_start() are released.
 */
592
void sysfs_addrm_finish(struct sysfs_addrm_cxt *acxt)
593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615
{
	/* release resources acquired by sysfs_addrm_start() */
	mutex_unlock(&sysfs_mutex);
	if (acxt->parent_inode) {
		struct inode *inode = acxt->parent_inode;

		/* if added/removed, update timestamps on the parent */
		if (acxt->cnt)
			inode->i_ctime = inode->i_mtime = CURRENT_TIME;

		mutex_unlock(&inode->i_mutex);
		iput(inode);
	}

	/* kill removed sysfs_dirents */
	while (acxt->removed) {
		struct sysfs_dirent *sd = acxt->removed;

		acxt->removed = sd->s_sibling;
		sd->s_sibling = NULL;

		sysfs_drop_dentry(sd);
		sysfs_deactivate(sd);
616
		unmap_bin_file(sd);
617
		sysfs_put(sd);
T
Tejun Heo 已提交
618
	}
619 620
}

621 622 623 624 625 626
/**
 *	sysfs_find_dirent - find sysfs_dirent with the given name
 *	@parent_sd: sysfs_dirent to search under
 *	@name: name to look for
 *
 *	Look for sysfs_dirent with name @name under @parent_sd.
627
 *
628
 *	LOCKING:
629
 *	mutex_lock(sysfs_mutex)
630
 *
631 632
 *	RETURNS:
 *	Pointer to sysfs_dirent if found, NULL if not.
633
 */
634 635
struct sysfs_dirent *sysfs_find_dirent(struct sysfs_dirent *parent_sd,
				       const unsigned char *name)
636
{
637 638
	struct sysfs_dirent *sd;

639
	for (sd = parent_sd->s_dir.children; sd; sd = sd->s_sibling)
E
Eric W. Biederman 已提交
640
		if (!strcmp(sd->s_name, name))
641 642 643
			return sd;
	return NULL;
}
644

645 646 647 648 649 650 651 652 653
/**
 *	sysfs_get_dirent - find and get sysfs_dirent with the given name
 *	@parent_sd: sysfs_dirent to search under
 *	@name: name to look for
 *
 *	Look for sysfs_dirent with name @name under @parent_sd and get
 *	it if found.
 *
 *	LOCKING:
654
 *	Kernel thread context (may sleep).  Grabs sysfs_mutex.
655 656 657 658 659 660 661 662 663
 *
 *	RETURNS:
 *	Pointer to sysfs_dirent if found, NULL if not.
 */
struct sysfs_dirent *sysfs_get_dirent(struct sysfs_dirent *parent_sd,
				      const unsigned char *name)
{
	struct sysfs_dirent *sd;

664
	mutex_lock(&sysfs_mutex);
665 666
	sd = sysfs_find_dirent(parent_sd, name);
	sysfs_get(sd);
667
	mutex_unlock(&sysfs_mutex);
668 669

	return sd;
670
}
671
EXPORT_SYMBOL_GPL(sysfs_get_dirent);
672

673 674
static int create_dir(struct kobject *kobj, struct sysfs_dirent *parent_sd,
		      const char *name, struct sysfs_dirent **p_sd)
L
Linus Torvalds 已提交
675 676
{
	umode_t mode = S_IFDIR| S_IRWXU | S_IRUGO | S_IXUGO;
677
	struct sysfs_addrm_cxt acxt;
678
	struct sysfs_dirent *sd;
679
	int rc;
L
Linus Torvalds 已提交
680

681
	/* allocate */
682
	sd = sysfs_new_dirent(name, mode, SYSFS_DIR);
683
	if (!sd)
684
		return -ENOMEM;
T
Tejun Heo 已提交
685
	sd->s_dir.kobj = kobj;
686

687
	/* link in */
688
	sysfs_addrm_start(&acxt, parent_sd);
689 690
	rc = sysfs_add_one(&acxt, sd);
	sysfs_addrm_finish(&acxt);
691

692 693 694
	if (rc == 0)
		*p_sd = sd;
	else
695
		sysfs_put(sd);
696

697
	return rc;
L
Linus Torvalds 已提交
698 699
}

700 701
int sysfs_create_subdir(struct kobject *kobj, const char *name,
			struct sysfs_dirent **p_sd)
L
Linus Torvalds 已提交
702
{
703
	return create_dir(kobj, kobj->sd, name, p_sd);
L
Linus Torvalds 已提交
704 705 706 707 708 709
}

/**
 *	sysfs_create_dir - create a directory for an object.
 *	@kobj:		object we're creating directory for. 
 */
710
int sysfs_create_dir(struct kobject * kobj)
L
Linus Torvalds 已提交
711
{
712
	struct sysfs_dirent *parent_sd, *sd;
L
Linus Torvalds 已提交
713 714 715 716
	int error = 0;

	BUG_ON(!kobj);

717
	if (kobj->parent)
718
		parent_sd = kobj->parent->sd;
L
Linus Torvalds 已提交
719
	else
E
Eric W. Biederman 已提交
720
		parent_sd = &sysfs_root;
L
Linus Torvalds 已提交
721

722
	error = create_dir(kobj, parent_sd, kobject_name(kobj), &sd);
L
Linus Torvalds 已提交
723
	if (!error)
724
		kobj->sd = sd;
L
Linus Torvalds 已提交
725 726 727 728 729 730
	return error;
}

static struct dentry * sysfs_lookup(struct inode *dir, struct dentry *dentry,
				struct nameidata *nd)
{
731
	struct dentry *ret = NULL;
732 733
	struct sysfs_dirent *parent_sd = dentry->d_parent->d_fsdata;
	struct sysfs_dirent *sd;
734
	struct inode *inode;
L
Linus Torvalds 已提交
735

736 737
	mutex_lock(&sysfs_mutex);

738
	sd = sysfs_find_dirent(parent_sd, dentry->d_name.name);
L
Linus Torvalds 已提交
739

740
	/* no such entry */
741 742
	if (!sd) {
		ret = ERR_PTR(-ENOENT);
743
		goto out_unlock;
744
	}
745 746

	/* attach dentry and inode */
747
	inode = sysfs_get_inode(sd);
748 749 750 751
	if (!inode) {
		ret = ERR_PTR(-ENOMEM);
		goto out_unlock;
	}
752

T
Tejun Heo 已提交
753 754 755
	/* instantiate and hash dentry */
	dentry->d_op = &sysfs_dentry_ops;
	dentry->d_fsdata = sysfs_get(sd);
756
	d_instantiate(dentry, inode);
T
Tejun Heo 已提交
757
	d_rehash(dentry);
758

759
 out_unlock:
760
	mutex_unlock(&sysfs_mutex);
761
	return ret;
L
Linus Torvalds 已提交
762 763
}

764
const struct inode_operations sysfs_dir_inode_operations = {
L
Linus Torvalds 已提交
765
	.lookup		= sysfs_lookup,
766
	.setattr	= sysfs_setattr,
767
	.setxattr	= sysfs_setxattr,
L
Linus Torvalds 已提交
768 769
};

770
static void remove_dir(struct sysfs_dirent *sd)
L
Linus Torvalds 已提交
771
{
772
	struct sysfs_addrm_cxt acxt;
L
Linus Torvalds 已提交
773

774 775 776
	sysfs_addrm_start(&acxt, sd->s_parent);
	sysfs_remove_one(&acxt, sd);
	sysfs_addrm_finish(&acxt);
L
Linus Torvalds 已提交
777 778
}

779
void sysfs_remove_subdir(struct sysfs_dirent *sd)
L
Linus Torvalds 已提交
780
{
781
	remove_dir(sd);
L
Linus Torvalds 已提交
782 783 784
}


785
static void __sysfs_remove_dir(struct sysfs_dirent *dir_sd)
L
Linus Torvalds 已提交
786
{
787
	struct sysfs_addrm_cxt acxt;
788
	struct sysfs_dirent **pos;
L
Linus Torvalds 已提交
789

790
	if (!dir_sd)
L
Linus Torvalds 已提交
791 792
		return;

793
	pr_debug("sysfs %s: removing dir\n", dir_sd->s_name);
794
	sysfs_addrm_start(&acxt, dir_sd);
795
	pos = &dir_sd->s_dir.children;
796 797 798
	while (*pos) {
		struct sysfs_dirent *sd = *pos;

E
Eric W. Biederman 已提交
799
		if (sysfs_type(sd) != SYSFS_DIR)
800
			sysfs_remove_one(&acxt, sd);
801
		else
802
			pos = &(*pos)->s_sibling;
L
Linus Torvalds 已提交
803
	}
804
	sysfs_addrm_finish(&acxt);
805

806
	remove_dir(dir_sd);
807 808 809 810 811 812 813 814 815 816 817 818 819
}

/**
 *	sysfs_remove_dir - remove an object's directory.
 *	@kobj:	object.
 *
 *	The only thing special about this is that we remove any files in
 *	the directory before we remove the directory, and we've inlined
 *	what used to be sysfs_rmdir() below, instead of calling separately.
 */

void sysfs_remove_dir(struct kobject * kobj)
{
820
	struct sysfs_dirent *sd = kobj->sd;
821

T
Tejun Heo 已提交
822
	spin_lock(&sysfs_assoc_lock);
823
	kobj->sd = NULL;
T
Tejun Heo 已提交
824
	spin_unlock(&sysfs_assoc_lock);
825

826
	__sysfs_remove_dir(sd);
L
Linus Torvalds 已提交
827 828
}

829
int sysfs_rename_dir(struct kobject * kobj, const char *new_name)
L
Linus Torvalds 已提交
830
{
831
	struct sysfs_dirent *sd = kobj->sd;
832
	struct dentry *parent = NULL;
833 834
	struct dentry *old_dentry = NULL, *new_dentry = NULL;
	const char *dup_name = NULL;
835
	int error;
L
Linus Torvalds 已提交
836

837 838
	mutex_lock(&sysfs_rename_mutex);

839 840 841 842
	error = 0;
	if (strcmp(sd->s_name, new_name) == 0)
		goto out;	/* nothing to rename */

T
Tejun Heo 已提交
843
	/* get the original dentry */
844 845 846
	old_dentry = sysfs_get_dentry(sd);
	if (IS_ERR(old_dentry)) {
		error = PTR_ERR(old_dentry);
847
		old_dentry = NULL;
848
		goto out;
849 850
	}

T
Tejun Heo 已提交
851
	parent = old_dentry->d_parent;
L
Linus Torvalds 已提交
852

853 854
	/* lock parent and get dentry for new name */
	mutex_lock(&parent->d_inode->i_mutex);
855
	mutex_lock(&sysfs_mutex);
L
Linus Torvalds 已提交
856

857 858
	error = -EEXIST;
	if (sysfs_find_dirent(sd->s_parent, new_name))
859
		goto out_unlock;
860

861 862 863
	error = -ENOMEM;
	new_dentry = d_alloc_name(parent, new_name);
	if (!new_dentry)
864
		goto out_unlock;
865

866
	/* rename sysfs_dirent */
T
Tejun Heo 已提交
867 868 869
	error = -ENOMEM;
	new_name = dup_name = kstrdup(new_name, GFP_KERNEL);
	if (!new_name)
870
		goto out_unlock;
T
Tejun Heo 已提交
871

872
	dup_name = sd->s_name;
T
Tejun Heo 已提交
873 874
	sd->s_name = new_name;

T
Tejun Heo 已提交
875
	/* rename */
876
	d_add(new_dentry, NULL);
E
Eric W. Biederman 已提交
877
	d_move(old_dentry, new_dentry);
878 879 880

	error = 0;
 out_unlock:
881
	mutex_unlock(&sysfs_mutex);
882
	mutex_unlock(&parent->d_inode->i_mutex);
883 884 885
	kfree(dup_name);
	dput(old_dentry);
	dput(new_dentry);
886
 out:
887
	mutex_unlock(&sysfs_rename_mutex);
L
Linus Torvalds 已提交
888 889 890
	return error;
}

891
int sysfs_move_dir(struct kobject *kobj, struct kobject *new_parent_kobj)
892
{
893 894 895 896
	struct sysfs_dirent *sd = kobj->sd;
	struct sysfs_dirent *new_parent_sd;
	struct dentry *old_parent, *new_parent = NULL;
	struct dentry *old_dentry = NULL, *new_dentry = NULL;
897 898
	int error;

899
	mutex_lock(&sysfs_rename_mutex);
900
	BUG_ON(!sd->s_parent);
901 902
	new_parent_sd = (new_parent_kobj && new_parent_kobj->sd) ?
		new_parent_kobj->sd : &sysfs_root;
903

904 905 906 907
	error = 0;
	if (sd->s_parent == new_parent_sd)
		goto out;	/* nothing to move */

908 909 910 911
	/* get dentries */
	old_dentry = sysfs_get_dentry(sd);
	if (IS_ERR(old_dentry)) {
		error = PTR_ERR(old_dentry);
912
		old_dentry = NULL;
913
		goto out;
914
	}
E
Eric W. Biederman 已提交
915
	old_parent = old_dentry->d_parent;
916 917 918 919

	new_parent = sysfs_get_dentry(new_parent_sd);
	if (IS_ERR(new_parent)) {
		error = PTR_ERR(new_parent);
920
		new_parent = NULL;
921
		goto out;
922
	}
923 924

again:
925 926 927
	mutex_lock(&old_parent->d_inode->i_mutex);
	if (!mutex_trylock(&new_parent->d_inode->i_mutex)) {
		mutex_unlock(&old_parent->d_inode->i_mutex);
928 929
		goto again;
	}
930
	mutex_lock(&sysfs_mutex);
931

932 933
	error = -EEXIST;
	if (sysfs_find_dirent(new_parent_sd, sd->s_name))
934
		goto out_unlock;
935 936 937 938 939 940 941

	error = -ENOMEM;
	new_dentry = d_alloc_name(new_parent, sd->s_name);
	if (!new_dentry)
		goto out_unlock;

	error = 0;
942
	d_add(new_dentry, NULL);
E
Eric W. Biederman 已提交
943
	d_move(old_dentry, new_dentry);
944 945

	/* Remove from old parent's list and insert into new parent's list. */
946
	sysfs_unlink_sibling(sd);
947
	sysfs_get(new_parent_sd);
948
	drop_nlink(old_parent->d_inode);
949 950
	sysfs_put(sd->s_parent);
	sd->s_parent = new_parent_sd;
951
	inc_nlink(new_parent->d_inode);
952
	sysfs_link_sibling(sd);
953

954
 out_unlock:
955
	mutex_unlock(&sysfs_mutex);
956 957
	mutex_unlock(&new_parent->d_inode->i_mutex);
	mutex_unlock(&old_parent->d_inode->i_mutex);
958
 out:
959 960 961
	dput(new_parent);
	dput(old_dentry);
	dput(new_dentry);
962
	mutex_unlock(&sysfs_rename_mutex);
963 964 965
	return error;
}

L
Linus Torvalds 已提交
966 967 968 969 970 971 972 973
/* Relationship between s_mode and the DT_xxx types */
static inline unsigned char dt_type(struct sysfs_dirent *sd)
{
	return (sd->s_mode >> 12) & 15;
}

static int sysfs_readdir(struct file * filp, void * dirent, filldir_t filldir)
{
974
	struct dentry *dentry = filp->f_path.dentry;
L
Linus Torvalds 已提交
975
	struct sysfs_dirent * parent_sd = dentry->d_fsdata;
E
Eric W. Biederman 已提交
976
	struct sysfs_dirent *pos;
L
Linus Torvalds 已提交
977 978
	ino_t ino;

E
Eric W. Biederman 已提交
979 980 981
	if (filp->f_pos == 0) {
		ino = parent_sd->s_ino;
		if (filldir(dirent, ".", 1, filp->f_pos, ino, DT_DIR) == 0)
L
Linus Torvalds 已提交
982
			filp->f_pos++;
E
Eric W. Biederman 已提交
983 984 985 986 987 988 989
	}
	if (filp->f_pos == 1) {
		if (parent_sd->s_parent)
			ino = parent_sd->s_parent->s_ino;
		else
			ino = parent_sd->s_ino;
		if (filldir(dirent, "..", 2, filp->f_pos, ino, DT_DIR) == 0)
L
Linus Torvalds 已提交
990
			filp->f_pos++;
E
Eric W. Biederman 已提交
991 992 993
	}
	if ((filp->f_pos > 1) && (filp->f_pos < INT_MAX)) {
		mutex_lock(&sysfs_mutex);
994

E
Eric W. Biederman 已提交
995
		/* Skip the dentries we have already reported */
996
		pos = parent_sd->s_dir.children;
E
Eric W. Biederman 已提交
997 998
		while (pos && (filp->f_pos > pos->s_ino))
			pos = pos->s_sibling;
999

E
Eric W. Biederman 已提交
1000 1001 1002
		for ( ; pos; pos = pos->s_sibling) {
			const char * name;
			int len;
L
Linus Torvalds 已提交
1003

E
Eric W. Biederman 已提交
1004 1005 1006
			name = pos->s_name;
			len = strlen(name);
			filp->f_pos = ino = pos->s_ino;
L
Linus Torvalds 已提交
1007

E
Eric W. Biederman 已提交
1008 1009
			if (filldir(dirent, name, len, filp->f_pos, ino,
					 dt_type(pos)) < 0)
L
Linus Torvalds 已提交
1010 1011
				break;
		}
E
Eric W. Biederman 已提交
1012 1013
		if (!pos)
			filp->f_pos = INT_MAX;
1014
		mutex_unlock(&sysfs_mutex);
L
Linus Torvalds 已提交
1015
	}
E
Eric W. Biederman 已提交
1016
	return 0;
L
Linus Torvalds 已提交
1017 1018
}

E
Eric W. Biederman 已提交
1019

1020
const struct file_operations sysfs_dir_operations = {
L
Linus Torvalds 已提交
1021 1022
	.read		= generic_read_dir,
	.readdir	= sysfs_readdir,
1023
	.llseek		= generic_file_llseek,
L
Linus Torvalds 已提交
1024
};