dir.c 21.8 KB
Newer Older
L
Linus Torvalds 已提交
1 2 3 4 5 6 7 8 9 10
/*
 * dir.c - Operations for sysfs directories.
 */

#undef DEBUG

#include <linux/fs.h>
#include <linux/mount.h>
#include <linux/module.h>
#include <linux/kobject.h>
11
#include <linux/namei.h>
12
#include <linux/idr.h>
13
#include <linux/completion.h>
D
Dave Young 已提交
14
#include <linux/mutex.h>
L
Linus Torvalds 已提交
15 16
#include "sysfs.h"

17
DEFINE_MUTEX(sysfs_mutex);
18
DEFINE_MUTEX(sysfs_rename_mutex);
T
Tejun Heo 已提交
19
spinlock_t sysfs_assoc_lock = SPIN_LOCK_UNLOCKED;
L
Linus Torvalds 已提交
20

21 22 23
static spinlock_t sysfs_ino_lock = SPIN_LOCK_UNLOCKED;
static DEFINE_IDA(sysfs_ino_ida);

24 25 26 27 28 29 30 31
/**
 *	sysfs_link_sibling - link sysfs_dirent into sibling list
 *	@sd: sysfs_dirent of interest
 *
 *	Link @sd into its sibling list which starts from
 *	sd->s_parent->s_children.
 *
 *	Locking:
32
 *	mutex_lock(sysfs_mutex)
33
 */
34
static void sysfs_link_sibling(struct sysfs_dirent *sd)
35 36
{
	struct sysfs_dirent *parent_sd = sd->s_parent;
E
Eric W. Biederman 已提交
37
	struct sysfs_dirent **pos;
38 39

	BUG_ON(sd->s_sibling);
E
Eric W. Biederman 已提交
40 41 42 43 44 45 46 47 48 49 50

	/* Store directory entries in order by ino.  This allows
	 * readdir to properly restart without having to add a
	 * cursor into the s_children list.
	 */
	for (pos = &parent_sd->s_children; *pos; pos = &(*pos)->s_sibling) {
		if (sd->s_ino < (*pos)->s_ino)
			break;
	}
	sd->s_sibling = *pos;
	*pos = sd;
51 52 53 54 55 56 57 58 59 60
}

/**
 *	sysfs_unlink_sibling - unlink sysfs_dirent from sibling list
 *	@sd: sysfs_dirent of interest
 *
 *	Unlink @sd from its sibling list which starts from
 *	sd->s_parent->s_children.
 *
 *	Locking:
61
 *	mutex_lock(sysfs_mutex)
62
 */
63
static void sysfs_unlink_sibling(struct sysfs_dirent *sd)
64 65 66 67 68 69 70 71 72 73 74 75
{
	struct sysfs_dirent **pos;

	for (pos = &sd->s_parent->s_children; *pos; pos = &(*pos)->s_sibling) {
		if (*pos == sd) {
			*pos = sd->s_sibling;
			sd->s_sibling = NULL;
			break;
		}
	}
}

T
Tejun Heo 已提交
76 77 78 79 80
/**
 *	sysfs_get_dentry - get dentry for the given sysfs_dirent
 *	@sd: sysfs_dirent of interest
 *
 *	Get dentry for @sd.  Dentry is looked up if currently not
T
Tejun Heo 已提交
81 82
 *	present.  This function descends from the root looking up
 *	dentry for each step.
T
Tejun Heo 已提交
83 84
 *
 *	LOCKING:
85
 *	mutex_lock(sysfs_rename_mutex)
T
Tejun Heo 已提交
86 87 88 89 90 91
 *
 *	RETURNS:
 *	Pointer to found dentry on success, ERR_PTR() value on error.
 */
struct dentry *sysfs_get_dentry(struct sysfs_dirent *sd)
{
T
Tejun Heo 已提交
92
	struct dentry *dentry = dget(sysfs_sb->s_root);
T
Tejun Heo 已提交
93

T
Tejun Heo 已提交
94 95 96
	while (dentry->d_fsdata != sd) {
		struct sysfs_dirent *cur;
		struct dentry *parent;
T
Tejun Heo 已提交
97

T
Tejun Heo 已提交
98 99 100
		/* find the first ancestor which hasn't been looked up */
		cur = sd;
		while (cur->s_parent != dentry->d_fsdata)
T
Tejun Heo 已提交
101 102 103
			cur = cur->s_parent;

		/* look it up */
T
Tejun Heo 已提交
104 105 106
		parent = dentry;
		mutex_lock(&parent->d_inode->i_mutex);
		dentry = lookup_one_len_kern(cur->s_name, parent,
T
Tejun Heo 已提交
107
					     strlen(cur->s_name));
T
Tejun Heo 已提交
108 109
		mutex_unlock(&parent->d_inode->i_mutex);
		dput(parent);
T
Tejun Heo 已提交
110

T
Tejun Heo 已提交
111 112
		if (IS_ERR(dentry))
			break;
T
Tejun Heo 已提交
113 114 115 116
	}
	return dentry;
}

117 118 119 120 121 122 123 124 125 126 127 128
/**
 *	sysfs_get_active - get an active reference to sysfs_dirent
 *	@sd: sysfs_dirent to get an active reference to
 *
 *	Get an active reference of @sd.  This function is noop if @sd
 *	is NULL.
 *
 *	RETURNS:
 *	Pointer to @sd on success, NULL on failure.
 */
struct sysfs_dirent *sysfs_get_active(struct sysfs_dirent *sd)
{
129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145
	if (unlikely(!sd))
		return NULL;

	while (1) {
		int v, t;

		v = atomic_read(&sd->s_active);
		if (unlikely(v < 0))
			return NULL;

		t = atomic_cmpxchg(&sd->s_active, v, v + 1);
		if (likely(t == v))
			return sd;
		if (t < 0)
			return NULL;

		cpu_relax();
146 147 148 149 150 151 152 153 154 155 156 157
	}
}

/**
 *	sysfs_put_active - put an active reference to sysfs_dirent
 *	@sd: sysfs_dirent to put an active reference to
 *
 *	Put an active reference to @sd.  This function is noop if @sd
 *	is NULL.
 */
void sysfs_put_active(struct sysfs_dirent *sd)
{
158 159 160 161 162 163 164 165 166 167 168
	struct completion *cmpl;
	int v;

	if (unlikely(!sd))
		return;

	v = atomic_dec_return(&sd->s_active);
	if (likely(v != SD_DEACTIVATED_BIAS))
		return;

	/* atomic_dec_return() is a mb(), we'll always see the updated
169
	 * sd->s_sibling.
170
	 */
171
	cmpl = (void *)sd->s_sibling;
172
	complete(cmpl);
173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217
}

/**
 *	sysfs_get_active_two - get active references to sysfs_dirent and parent
 *	@sd: sysfs_dirent of interest
 *
 *	Get active reference to @sd and its parent.  Parent's active
 *	reference is grabbed first.  This function is noop if @sd is
 *	NULL.
 *
 *	RETURNS:
 *	Pointer to @sd on success, NULL on failure.
 */
struct sysfs_dirent *sysfs_get_active_two(struct sysfs_dirent *sd)
{
	if (sd) {
		if (sd->s_parent && unlikely(!sysfs_get_active(sd->s_parent)))
			return NULL;
		if (unlikely(!sysfs_get_active(sd))) {
			sysfs_put_active(sd->s_parent);
			return NULL;
		}
	}
	return sd;
}

/**
 *	sysfs_put_active_two - put active references to sysfs_dirent and parent
 *	@sd: sysfs_dirent of interest
 *
 *	Put active references to @sd and its parent.  This function is
 *	noop if @sd is NULL.
 */
void sysfs_put_active_two(struct sysfs_dirent *sd)
{
	if (sd) {
		sysfs_put_active(sd);
		sysfs_put_active(sd->s_parent);
	}
}

/**
 *	sysfs_deactivate - deactivate sysfs_dirent
 *	@sd: sysfs_dirent to deactivate
 *
218
 *	Deny new active references and drain existing ones.
219
 */
220
static void sysfs_deactivate(struct sysfs_dirent *sd)
221
{
222 223
	DECLARE_COMPLETION_ONSTACK(wait);
	int v;
224

225
	BUG_ON(sd->s_sibling || !(sd->s_flags & SYSFS_FLAG_REMOVED));
226
	sd->s_sibling = (void *)&wait;
227 228

	/* atomic_add_return() is a mb(), put_active() will always see
229
	 * the updated sd->s_sibling.
230
	 */
231 232 233 234 235
	v = atomic_add_return(SD_DEACTIVATED_BIAS, &sd->s_active);

	if (v != SD_DEACTIVATED_BIAS)
		wait_for_completion(&wait);

236
	sd->s_sibling = NULL;
237 238
}

T
Tejun Heo 已提交
239
static int sysfs_alloc_ino(ino_t *pino)
240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264
{
	int ino, rc;

 retry:
	spin_lock(&sysfs_ino_lock);
	rc = ida_get_new_above(&sysfs_ino_ida, 2, &ino);
	spin_unlock(&sysfs_ino_lock);

	if (rc == -EAGAIN) {
		if (ida_pre_get(&sysfs_ino_ida, GFP_KERNEL))
			goto retry;
		rc = -ENOMEM;
	}

	*pino = ino;
	return rc;
}

static void sysfs_free_ino(ino_t ino)
{
	spin_lock(&sysfs_ino_lock);
	ida_remove(&sysfs_ino_ida, ino);
	spin_unlock(&sysfs_ino_lock);
}

265 266
void release_sysfs_dirent(struct sysfs_dirent * sd)
{
T
Tejun Heo 已提交
267 268 269
	struct sysfs_dirent *parent_sd;

 repeat:
270 271 272
	/* Moving/renaming is always done while holding reference.
	 * sd->s_parent won't change beneath us.
	 */
T
Tejun Heo 已提交
273 274
	parent_sd = sd->s_parent;

275
	if (sysfs_type(sd) == SYSFS_KOBJ_LINK)
276
		sysfs_put(sd->s_elem.symlink.target_sd);
277
	if (sysfs_type(sd) & SYSFS_COPY_NAME)
T
Tejun Heo 已提交
278
		kfree(sd->s_name);
279
	kfree(sd->s_iattr);
280
	sysfs_free_ino(sd->s_ino);
281
	kmem_cache_free(sysfs_dir_cachep, sd);
T
Tejun Heo 已提交
282 283 284 285

	sd = parent_sd;
	if (sd && atomic_dec_and_test(&sd->s_count))
		goto repeat;
286 287
}

L
Linus Torvalds 已提交
288 289 290 291
static void sysfs_d_iput(struct dentry * dentry, struct inode * inode)
{
	struct sysfs_dirent * sd = dentry->d_fsdata;

E
Eric W. Biederman 已提交
292
	sysfs_put(sd);
L
Linus Torvalds 已提交
293 294 295 296 297 298 299
	iput(inode);
}

static struct dentry_operations sysfs_dentry_ops = {
	.d_iput		= sysfs_d_iput,
};

300
struct sysfs_dirent *sysfs_new_dirent(const char *name, umode_t mode, int type)
L
Linus Torvalds 已提交
301
{
T
Tejun Heo 已提交
302
	char *dup_name = NULL;
303
	struct sysfs_dirent *sd;
T
Tejun Heo 已提交
304 305 306 307

	if (type & SYSFS_COPY_NAME) {
		name = dup_name = kstrdup(name, GFP_KERNEL);
		if (!name)
308
			return NULL;
T
Tejun Heo 已提交
309
	}
L
Linus Torvalds 已提交
310

311
	sd = kmem_cache_zalloc(sysfs_dir_cachep, GFP_KERNEL);
L
Linus Torvalds 已提交
312
	if (!sd)
313
		goto err_out1;
L
Linus Torvalds 已提交
314

T
Tejun Heo 已提交
315
	if (sysfs_alloc_ino(&sd->s_ino))
316
		goto err_out2;
317

L
Linus Torvalds 已提交
318
	atomic_set(&sd->s_count, 1);
319
	atomic_set(&sd->s_active, 0);
320
	atomic_set(&sd->s_event, 1);
321

T
Tejun Heo 已提交
322
	sd->s_name = name;
323
	sd->s_mode = mode;
324
	sd->s_flags = type;
L
Linus Torvalds 已提交
325 326

	return sd;
T
Tejun Heo 已提交
327

328
 err_out2:
T
Tejun Heo 已提交
329
	kmem_cache_free(sysfs_dir_cachep, sd);
330 331
 err_out1:
	kfree(dup_name);
T
Tejun Heo 已提交
332
	return NULL;
L
Linus Torvalds 已提交
333 334
}

335 336 337 338 339 340 341 342
/**
 *	sysfs_attach_dentry - associate sysfs_dirent with dentry
 *	@sd: target sysfs_dirent
 *	@dentry: dentry to associate
 *
 *	LOCKING:
 *	mutex_lock(sysfs_mutex)
 */
343 344 345 346 347 348 349
static void sysfs_attach_dentry(struct sysfs_dirent *sd, struct dentry *dentry)
{
	dentry->d_op = &sysfs_dentry_ops;
	dentry->d_fsdata = sysfs_get(sd);
	d_rehash(dentry);
}

350 351 352 353 354 355
static int sysfs_ilookup_test(struct inode *inode, void *arg)
{
	struct sysfs_dirent *sd = arg;
	return inode->i_ino == sd->s_ino;
}

356
/**
357 358 359
 *	sysfs_addrm_start - prepare for sysfs_dirent add/remove
 *	@acxt: pointer to sysfs_addrm_cxt to be used
 *	@parent_sd: parent sysfs_dirent
360
 *
361 362 363 364 365
 *	This function is called when the caller is about to add or
 *	remove sysfs_dirent under @parent_sd.  This function acquires
 *	sysfs_mutex, grabs inode for @parent_sd if available and lock
 *	i_mutex of it.  @acxt is used to keep and pass context to
 *	other addrm functions.
366 367
 *
 *	LOCKING:
368 369 370
 *	Kernel thread context (may sleep).  sysfs_mutex is locked on
 *	return.  i_mutex of parent inode is locked on return if
 *	available.
371
 */
372 373
void sysfs_addrm_start(struct sysfs_addrm_cxt *acxt,
		       struct sysfs_dirent *parent_sd)
374
{
375
	struct inode *inode;
376

377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423
	memset(acxt, 0, sizeof(*acxt));
	acxt->parent_sd = parent_sd;

	/* Lookup parent inode.  inode initialization and I_NEW
	 * clearing are protected by sysfs_mutex.  By grabbing it and
	 * looking up with _nowait variant, inode state can be
	 * determined reliably.
	 */
	mutex_lock(&sysfs_mutex);

	inode = ilookup5_nowait(sysfs_sb, parent_sd->s_ino, sysfs_ilookup_test,
				parent_sd);

	if (inode && !(inode->i_state & I_NEW)) {
		/* parent inode available */
		acxt->parent_inode = inode;

		/* sysfs_mutex is below i_mutex in lock hierarchy.
		 * First, trylock i_mutex.  If fails, unlock
		 * sysfs_mutex and lock them in order.
		 */
		if (!mutex_trylock(&inode->i_mutex)) {
			mutex_unlock(&sysfs_mutex);
			mutex_lock(&inode->i_mutex);
			mutex_lock(&sysfs_mutex);
		}
	} else
		iput(inode);
}

/**
 *	sysfs_add_one - add sysfs_dirent to parent
 *	@acxt: addrm context to use
 *	@sd: sysfs_dirent to be added
 *
 *	Get @acxt->parent_sd and set sd->s_parent to it and increment
 *	nlink of parent inode if @sd is a directory.  @sd is NOT
 *	linked into the children list of the parent.  The caller
 *	should invoke sysfs_link_sibling() after this function
 *	completes if @sd needs to be on the children list.
 *
 *	This function should be called between calls to
 *	sysfs_addrm_start() and sysfs_addrm_finish() and should be
 *	passed the same @acxt as passed to sysfs_addrm_start().
 *
 *	LOCKING:
 *	Determined by sysfs_addrm_start().
424 425 426 427
 *
 *	RETURNS:
 *	0 on success, -EEXIST if entry with the given name already
 *	exists.
428
 */
429
int sysfs_add_one(struct sysfs_addrm_cxt *acxt, struct sysfs_dirent *sd)
430
{
431 432 433
	if (sysfs_find_dirent(acxt->parent_sd, sd->s_name))
		return -EEXIST;

434 435 436 437 438 439
	sd->s_parent = sysfs_get(acxt->parent_sd);

	if (sysfs_type(sd) == SYSFS_DIR && acxt->parent_inode)
		inc_nlink(acxt->parent_inode);

	acxt->cnt++;
440 441

	sysfs_link_sibling(sd);
442 443

	return 0;
444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464
}

/**
 *	sysfs_remove_one - remove sysfs_dirent from parent
 *	@acxt: addrm context to use
 *	@sd: sysfs_dirent to be added
 *
 *	Mark @sd removed and drop nlink of parent inode if @sd is a
 *	directory.  @sd is NOT unlinked from the children list of the
 *	parent.  The caller is repsonsible for removing @sd from the
 *	children list before calling this function.
 *
 *	This function should be called between calls to
 *	sysfs_addrm_start() and sysfs_addrm_finish() and should be
 *	passed the same @acxt as passed to sysfs_addrm_start().
 *
 *	LOCKING:
 *	Determined by sysfs_addrm_start().
 */
void sysfs_remove_one(struct sysfs_addrm_cxt *acxt, struct sysfs_dirent *sd)
{
465 466 467
	BUG_ON(sd->s_flags & SYSFS_FLAG_REMOVED);

	sysfs_unlink_sibling(sd);
468 469 470 471 472 473 474 475 476 477 478

	sd->s_flags |= SYSFS_FLAG_REMOVED;
	sd->s_sibling = acxt->removed;
	acxt->removed = sd;

	if (sysfs_type(sd) == SYSFS_DIR && acxt->parent_inode)
		drop_nlink(acxt->parent_inode);

	acxt->cnt++;
}

479 480 481 482 483 484 485 486 487 488 489
/**
 *	sysfs_drop_dentry - drop dentry for the specified sysfs_dirent
 *	@sd: target sysfs_dirent
 *
 *	Drop dentry for @sd.  @sd must have been unlinked from its
 *	parent on entry to this function such that it can't be looked
 *	up anymore.
 */
static void sysfs_drop_dentry(struct sysfs_dirent *sd)
{
	struct inode *inode;
490 491 492 493 494
	struct dentry *dentry;

	inode = ilookup(sysfs_sb, sd->s_ino);
	if (!inode)
		return;
495

496 497 498 499 500 501
	/* Drop any existing dentries associated with sd.
	 *
	 * For the dentry to be properly freed we need to grab a
	 * reference to the dentry under the dcache lock,  unhash it,
	 * and then put it.  The playing with the dentry count allows
	 * dput to immediately free the dentry  if it is not in use.
502
	 */
503
repeat:
504
	spin_lock(&dcache_lock);
505 506 507 508
	list_for_each_entry(dentry, &inode->i_dentry, d_alias) {
		if (d_unhashed(dentry))
			continue;
		dget_locked(dentry);
509 510 511
		spin_lock(&dentry->d_lock);
		__d_drop(dentry);
		spin_unlock(&dentry->d_lock);
512 513 514
		spin_unlock(&dcache_lock);
		dput(dentry);
		goto repeat;
515 516 517 518
	}
	spin_unlock(&dcache_lock);

	/* adjust nlink and update timestamp */
519
	mutex_lock(&inode->i_mutex);
520

521 522 523
	inode->i_ctime = CURRENT_TIME;
	drop_nlink(inode);
	if (sysfs_type(sd) == SYSFS_DIR)
524 525
		drop_nlink(inode);

526 527 528
	mutex_unlock(&inode->i_mutex);

	iput(inode);
529 530
}

531 532 533 534 535 536 537 538 539 540 541
/**
 *	sysfs_addrm_finish - finish up sysfs_dirent add/remove
 *	@acxt: addrm context to finish up
 *
 *	Finish up sysfs_dirent add/remove.  Resources acquired by
 *	sysfs_addrm_start() are released and removed sysfs_dirents are
 *	cleaned up.  Timestamps on the parent inode are updated.
 *
 *	LOCKING:
 *	All mutexes acquired by sysfs_addrm_start() are released.
 */
542
void sysfs_addrm_finish(struct sysfs_addrm_cxt *acxt)
543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566
{
	/* release resources acquired by sysfs_addrm_start() */
	mutex_unlock(&sysfs_mutex);
	if (acxt->parent_inode) {
		struct inode *inode = acxt->parent_inode;

		/* if added/removed, update timestamps on the parent */
		if (acxt->cnt)
			inode->i_ctime = inode->i_mtime = CURRENT_TIME;

		mutex_unlock(&inode->i_mutex);
		iput(inode);
	}

	/* kill removed sysfs_dirents */
	while (acxt->removed) {
		struct sysfs_dirent *sd = acxt->removed;

		acxt->removed = sd->s_sibling;
		sd->s_sibling = NULL;

		sysfs_drop_dentry(sd);
		sysfs_deactivate(sd);
		sysfs_put(sd);
T
Tejun Heo 已提交
567
	}
568 569
}

570 571 572 573 574 575
/**
 *	sysfs_find_dirent - find sysfs_dirent with the given name
 *	@parent_sd: sysfs_dirent to search under
 *	@name: name to look for
 *
 *	Look for sysfs_dirent with name @name under @parent_sd.
576
 *
577
 *	LOCKING:
578
 *	mutex_lock(sysfs_mutex)
579
 *
580 581
 *	RETURNS:
 *	Pointer to sysfs_dirent if found, NULL if not.
582
 */
583 584
struct sysfs_dirent *sysfs_find_dirent(struct sysfs_dirent *parent_sd,
				       const unsigned char *name)
585
{
586 587 588
	struct sysfs_dirent *sd;

	for (sd = parent_sd->s_children; sd; sd = sd->s_sibling)
E
Eric W. Biederman 已提交
589
		if (!strcmp(sd->s_name, name))
590 591 592
			return sd;
	return NULL;
}
593

594 595 596 597 598 599 600 601 602
/**
 *	sysfs_get_dirent - find and get sysfs_dirent with the given name
 *	@parent_sd: sysfs_dirent to search under
 *	@name: name to look for
 *
 *	Look for sysfs_dirent with name @name under @parent_sd and get
 *	it if found.
 *
 *	LOCKING:
603
 *	Kernel thread context (may sleep).  Grabs sysfs_mutex.
604 605 606 607 608 609 610 611 612
 *
 *	RETURNS:
 *	Pointer to sysfs_dirent if found, NULL if not.
 */
struct sysfs_dirent *sysfs_get_dirent(struct sysfs_dirent *parent_sd,
				      const unsigned char *name)
{
	struct sysfs_dirent *sd;

613
	mutex_lock(&sysfs_mutex);
614 615
	sd = sysfs_find_dirent(parent_sd, name);
	sysfs_get(sd);
616
	mutex_unlock(&sysfs_mutex);
617 618

	return sd;
619 620
}

621 622
static int create_dir(struct kobject *kobj, struct sysfs_dirent *parent_sd,
		      const char *name, struct sysfs_dirent **p_sd)
L
Linus Torvalds 已提交
623 624
{
	umode_t mode = S_IFDIR| S_IRWXU | S_IRUGO | S_IXUGO;
625
	struct sysfs_addrm_cxt acxt;
626
	struct sysfs_dirent *sd;
627
	int rc;
L
Linus Torvalds 已提交
628

629
	/* allocate */
630
	sd = sysfs_new_dirent(name, mode, SYSFS_DIR);
631
	if (!sd)
632
		return -ENOMEM;
633
	sd->s_elem.dir.kobj = kobj;
634

635
	/* link in */
636
	sysfs_addrm_start(&acxt, parent_sd);
637 638
	rc = sysfs_add_one(&acxt, sd);
	sysfs_addrm_finish(&acxt);
639

640 641 642
	if (rc == 0)
		*p_sd = sd;
	else
643
		sysfs_put(sd);
644

645
	return rc;
L
Linus Torvalds 已提交
646 647
}

648 649
int sysfs_create_subdir(struct kobject *kobj, const char *name,
			struct sysfs_dirent **p_sd)
L
Linus Torvalds 已提交
650
{
651
	return create_dir(kobj, kobj->sd, name, p_sd);
L
Linus Torvalds 已提交
652 653 654 655 656 657
}

/**
 *	sysfs_create_dir - create a directory for an object.
 *	@kobj:		object we're creating directory for. 
 */
658
int sysfs_create_dir(struct kobject * kobj)
L
Linus Torvalds 已提交
659
{
660
	struct sysfs_dirent *parent_sd, *sd;
L
Linus Torvalds 已提交
661 662 663 664
	int error = 0;

	BUG_ON(!kobj);

665
	if (kobj->parent)
666
		parent_sd = kobj->parent->sd;
L
Linus Torvalds 已提交
667
	else
E
Eric W. Biederman 已提交
668
		parent_sd = &sysfs_root;
L
Linus Torvalds 已提交
669

670
	error = create_dir(kobj, parent_sd, kobject_name(kobj), &sd);
L
Linus Torvalds 已提交
671
	if (!error)
672
		kobj->sd = sd;
L
Linus Torvalds 已提交
673 674 675 676 677 678
	return error;
}

static struct dentry * sysfs_lookup(struct inode *dir, struct dentry *dentry,
				struct nameidata *nd)
{
679
	struct dentry *ret = NULL;
680 681
	struct sysfs_dirent *parent_sd = dentry->d_parent->d_fsdata;
	struct sysfs_dirent *sd;
682
	struct inode *inode;
L
Linus Torvalds 已提交
683

684 685
	mutex_lock(&sysfs_mutex);

686
	sd = sysfs_find_dirent(parent_sd, dentry->d_name.name);
L
Linus Torvalds 已提交
687

688
	/* no such entry */
689
	if (!sd)
690
		goto out_unlock;
691 692

	/* attach dentry and inode */
693
	inode = sysfs_get_inode(sd);
694 695 696 697
	if (!inode) {
		ret = ERR_PTR(-ENOMEM);
		goto out_unlock;
	}
698

699
	d_instantiate(dentry, inode);
700 701
	sysfs_attach_dentry(sd, dentry);

702
 out_unlock:
703
	mutex_unlock(&sysfs_mutex);
704
	return ret;
L
Linus Torvalds 已提交
705 706
}

707
const struct inode_operations sysfs_dir_inode_operations = {
L
Linus Torvalds 已提交
708
	.lookup		= sysfs_lookup,
709
	.setattr	= sysfs_setattr,
L
Linus Torvalds 已提交
710 711
};

712
static void remove_dir(struct sysfs_dirent *sd)
L
Linus Torvalds 已提交
713
{
714
	struct sysfs_addrm_cxt acxt;
L
Linus Torvalds 已提交
715

716 717 718
	sysfs_addrm_start(&acxt, sd->s_parent);
	sysfs_remove_one(&acxt, sd);
	sysfs_addrm_finish(&acxt);
L
Linus Torvalds 已提交
719 720
}

721
void sysfs_remove_subdir(struct sysfs_dirent *sd)
L
Linus Torvalds 已提交
722
{
723
	remove_dir(sd);
L
Linus Torvalds 已提交
724 725 726
}


727
static void __sysfs_remove_dir(struct sysfs_dirent *dir_sd)
L
Linus Torvalds 已提交
728
{
729
	struct sysfs_addrm_cxt acxt;
730
	struct sysfs_dirent **pos;
L
Linus Torvalds 已提交
731

732
	if (!dir_sd)
L
Linus Torvalds 已提交
733 734
		return;

735
	pr_debug("sysfs %s: removing dir\n", dir_sd->s_name);
736
	sysfs_addrm_start(&acxt, dir_sd);
737
	pos = &dir_sd->s_children;
738 739 740
	while (*pos) {
		struct sysfs_dirent *sd = *pos;

E
Eric W. Biederman 已提交
741
		if (sysfs_type(sd) != SYSFS_DIR)
742
			sysfs_remove_one(&acxt, sd);
743
		else
744
			pos = &(*pos)->s_sibling;
L
Linus Torvalds 已提交
745
	}
746
	sysfs_addrm_finish(&acxt);
747

748
	remove_dir(dir_sd);
749 750 751 752 753 754 755 756 757 758 759 760 761
}

/**
 *	sysfs_remove_dir - remove an object's directory.
 *	@kobj:	object.
 *
 *	The only thing special about this is that we remove any files in
 *	the directory before we remove the directory, and we've inlined
 *	what used to be sysfs_rmdir() below, instead of calling separately.
 */

void sysfs_remove_dir(struct kobject * kobj)
{
762
	struct sysfs_dirent *sd = kobj->sd;
763

T
Tejun Heo 已提交
764
	spin_lock(&sysfs_assoc_lock);
765
	kobj->sd = NULL;
T
Tejun Heo 已提交
766
	spin_unlock(&sysfs_assoc_lock);
767

768
	__sysfs_remove_dir(sd);
L
Linus Torvalds 已提交
769 770
}

771
int sysfs_rename_dir(struct kobject * kobj, const char *new_name)
L
Linus Torvalds 已提交
772
{
773
	struct sysfs_dirent *sd = kobj->sd;
774
	struct dentry *parent = NULL;
775 776
	struct dentry *old_dentry = NULL, *new_dentry = NULL;
	const char *dup_name = NULL;
777
	int error;
L
Linus Torvalds 已提交
778

779 780
	mutex_lock(&sysfs_rename_mutex);

781 782 783 784
	error = 0;
	if (strcmp(sd->s_name, new_name) == 0)
		goto out;	/* nothing to rename */

T
Tejun Heo 已提交
785
	/* get the original dentry */
786 787 788
	old_dentry = sysfs_get_dentry(sd);
	if (IS_ERR(old_dentry)) {
		error = PTR_ERR(old_dentry);
789
		goto out;
790 791
	}

T
Tejun Heo 已提交
792
	parent = old_dentry->d_parent;
L
Linus Torvalds 已提交
793

794 795
	/* lock parent and get dentry for new name */
	mutex_lock(&parent->d_inode->i_mutex);
796
	mutex_lock(&sysfs_mutex);
L
Linus Torvalds 已提交
797

798 799
	error = -EEXIST;
	if (sysfs_find_dirent(sd->s_parent, new_name))
800
		goto out_unlock;
801

802 803 804
	error = -ENOMEM;
	new_dentry = d_alloc_name(parent, new_name);
	if (!new_dentry)
805
		goto out_unlock;
806

T
Tejun Heo 已提交
807 808 809 810
	/* rename kobject and sysfs_dirent */
	error = -ENOMEM;
	new_name = dup_name = kstrdup(new_name, GFP_KERNEL);
	if (!new_name)
811
		goto out_unlock;
T
Tejun Heo 已提交
812

813 814
	error = kobject_set_name(kobj, "%s", new_name);
	if (error)
815
		goto out_unlock;
816

817
	dup_name = sd->s_name;
T
Tejun Heo 已提交
818 819
	sd->s_name = new_name;

T
Tejun Heo 已提交
820
	/* rename */
821
	d_add(new_dentry, NULL);
E
Eric W. Biederman 已提交
822
	d_move(old_dentry, new_dentry);
823 824 825

	error = 0;
 out_unlock:
826
	mutex_unlock(&sysfs_mutex);
827
	mutex_unlock(&parent->d_inode->i_mutex);
828 829 830
	kfree(dup_name);
	dput(old_dentry);
	dput(new_dentry);
831
 out:
832
	mutex_unlock(&sysfs_rename_mutex);
L
Linus Torvalds 已提交
833 834 835
	return error;
}

836
int sysfs_move_dir(struct kobject *kobj, struct kobject *new_parent_kobj)
837
{
838 839 840 841
	struct sysfs_dirent *sd = kobj->sd;
	struct sysfs_dirent *new_parent_sd;
	struct dentry *old_parent, *new_parent = NULL;
	struct dentry *old_dentry = NULL, *new_dentry = NULL;
842 843
	int error;

844
	mutex_lock(&sysfs_rename_mutex);
845 846 847
	BUG_ON(!sd->s_parent);
	new_parent_sd = new_parent_kobj->sd ? new_parent_kobj->sd : &sysfs_root;

848 849 850 851
	error = 0;
	if (sd->s_parent == new_parent_sd)
		goto out;	/* nothing to move */

852 853 854 855
	/* get dentries */
	old_dentry = sysfs_get_dentry(sd);
	if (IS_ERR(old_dentry)) {
		error = PTR_ERR(old_dentry);
856
		goto out;
857
	}
E
Eric W. Biederman 已提交
858
	old_parent = old_dentry->d_parent;
859 860 861 862

	new_parent = sysfs_get_dentry(new_parent_sd);
	if (IS_ERR(new_parent)) {
		error = PTR_ERR(new_parent);
863
		goto out;
864
	}
865 866

again:
867 868 869
	mutex_lock(&old_parent->d_inode->i_mutex);
	if (!mutex_trylock(&new_parent->d_inode->i_mutex)) {
		mutex_unlock(&old_parent->d_inode->i_mutex);
870 871
		goto again;
	}
872
	mutex_lock(&sysfs_mutex);
873

874 875
	error = -EEXIST;
	if (sysfs_find_dirent(new_parent_sd, sd->s_name))
876
		goto out_unlock;
877 878 879 880 881 882 883

	error = -ENOMEM;
	new_dentry = d_alloc_name(new_parent, sd->s_name);
	if (!new_dentry)
		goto out_unlock;

	error = 0;
884
	d_add(new_dentry, NULL);
E
Eric W. Biederman 已提交
885
	d_move(old_dentry, new_dentry);
886 887 888
	dput(new_dentry);

	/* Remove from old parent's list and insert into new parent's list. */
889
	sysfs_unlink_sibling(sd);
890 891 892
	sysfs_get(new_parent_sd);
	sysfs_put(sd->s_parent);
	sd->s_parent = new_parent_sd;
893
	sysfs_link_sibling(sd);
894

895
 out_unlock:
896
	mutex_unlock(&sysfs_mutex);
897 898
	mutex_unlock(&new_parent->d_inode->i_mutex);
	mutex_unlock(&old_parent->d_inode->i_mutex);
899
 out:
900 901 902
	dput(new_parent);
	dput(old_dentry);
	dput(new_dentry);
903
	mutex_unlock(&sysfs_rename_mutex);
904 905 906
	return error;
}

L
Linus Torvalds 已提交
907 908 909 910 911 912 913 914
/* Relationship between s_mode and the DT_xxx types */
static inline unsigned char dt_type(struct sysfs_dirent *sd)
{
	return (sd->s_mode >> 12) & 15;
}

static int sysfs_readdir(struct file * filp, void * dirent, filldir_t filldir)
{
915
	struct dentry *dentry = filp->f_path.dentry;
L
Linus Torvalds 已提交
916
	struct sysfs_dirent * parent_sd = dentry->d_fsdata;
E
Eric W. Biederman 已提交
917
	struct sysfs_dirent *pos;
L
Linus Torvalds 已提交
918 919
	ino_t ino;

E
Eric W. Biederman 已提交
920 921 922
	if (filp->f_pos == 0) {
		ino = parent_sd->s_ino;
		if (filldir(dirent, ".", 1, filp->f_pos, ino, DT_DIR) == 0)
L
Linus Torvalds 已提交
923
			filp->f_pos++;
E
Eric W. Biederman 已提交
924 925 926 927 928 929 930
	}
	if (filp->f_pos == 1) {
		if (parent_sd->s_parent)
			ino = parent_sd->s_parent->s_ino;
		else
			ino = parent_sd->s_ino;
		if (filldir(dirent, "..", 2, filp->f_pos, ino, DT_DIR) == 0)
L
Linus Torvalds 已提交
931
			filp->f_pos++;
E
Eric W. Biederman 已提交
932 933 934
	}
	if ((filp->f_pos > 1) && (filp->f_pos < INT_MAX)) {
		mutex_lock(&sysfs_mutex);
935

E
Eric W. Biederman 已提交
936 937 938 939
		/* Skip the dentries we have already reported */
		pos = parent_sd->s_children;
		while (pos && (filp->f_pos > pos->s_ino))
			pos = pos->s_sibling;
940

E
Eric W. Biederman 已提交
941 942 943
		for ( ; pos; pos = pos->s_sibling) {
			const char * name;
			int len;
L
Linus Torvalds 已提交
944

E
Eric W. Biederman 已提交
945 946 947
			name = pos->s_name;
			len = strlen(name);
			filp->f_pos = ino = pos->s_ino;
L
Linus Torvalds 已提交
948

E
Eric W. Biederman 已提交
949 950
			if (filldir(dirent, name, len, filp->f_pos, ino,
					 dt_type(pos)) < 0)
L
Linus Torvalds 已提交
951 952
				break;
		}
E
Eric W. Biederman 已提交
953 954
		if (!pos)
			filp->f_pos = INT_MAX;
955
		mutex_unlock(&sysfs_mutex);
L
Linus Torvalds 已提交
956
	}
E
Eric W. Biederman 已提交
957
	return 0;
L
Linus Torvalds 已提交
958 959
}

E
Eric W. Biederman 已提交
960

961
const struct file_operations sysfs_dir_operations = {
L
Linus Torvalds 已提交
962 963 964
	.read		= generic_read_dir,
	.readdir	= sysfs_readdir,
};