dir.c 21.4 KB
Newer Older
L
Linus Torvalds 已提交
1 2 3 4 5 6 7 8 9 10
/*
 * dir.c - Operations for sysfs directories.
 */

#undef DEBUG

#include <linux/fs.h>
#include <linux/mount.h>
#include <linux/module.h>
#include <linux/kobject.h>
11
#include <linux/namei.h>
12
#include <linux/idr.h>
13
#include <linux/completion.h>
D
Dave Young 已提交
14
#include <linux/mutex.h>
L
Linus Torvalds 已提交
15 16
#include "sysfs.h"

17
DEFINE_MUTEX(sysfs_mutex);
18
DEFINE_MUTEX(sysfs_rename_mutex);
T
Tejun Heo 已提交
19
spinlock_t sysfs_assoc_lock = SPIN_LOCK_UNLOCKED;
L
Linus Torvalds 已提交
20

21 22 23
static spinlock_t sysfs_ino_lock = SPIN_LOCK_UNLOCKED;
static DEFINE_IDA(sysfs_ino_ida);

24 25 26 27 28 29 30 31
/**
 *	sysfs_link_sibling - link sysfs_dirent into sibling list
 *	@sd: sysfs_dirent of interest
 *
 *	Link @sd into its sibling list which starts from
 *	sd->s_parent->s_children.
 *
 *	Locking:
32
 *	mutex_lock(sysfs_mutex)
33
 */
34
static void sysfs_link_sibling(struct sysfs_dirent *sd)
35 36
{
	struct sysfs_dirent *parent_sd = sd->s_parent;
E
Eric W. Biederman 已提交
37
	struct sysfs_dirent **pos;
38 39

	BUG_ON(sd->s_sibling);
E
Eric W. Biederman 已提交
40 41 42 43 44 45 46 47 48 49 50

	/* Store directory entries in order by ino.  This allows
	 * readdir to properly restart without having to add a
	 * cursor into the s_children list.
	 */
	for (pos = &parent_sd->s_children; *pos; pos = &(*pos)->s_sibling) {
		if (sd->s_ino < (*pos)->s_ino)
			break;
	}
	sd->s_sibling = *pos;
	*pos = sd;
51 52 53 54 55 56 57 58 59 60
}

/**
 *	sysfs_unlink_sibling - unlink sysfs_dirent from sibling list
 *	@sd: sysfs_dirent of interest
 *
 *	Unlink @sd from its sibling list which starts from
 *	sd->s_parent->s_children.
 *
 *	Locking:
61
 *	mutex_lock(sysfs_mutex)
62
 */
63
static void sysfs_unlink_sibling(struct sysfs_dirent *sd)
64 65 66 67 68 69 70 71 72 73 74 75
{
	struct sysfs_dirent **pos;

	for (pos = &sd->s_parent->s_children; *pos; pos = &(*pos)->s_sibling) {
		if (*pos == sd) {
			*pos = sd->s_sibling;
			sd->s_sibling = NULL;
			break;
		}
	}
}

T
Tejun Heo 已提交
76 77 78 79 80
/**
 *	sysfs_get_dentry - get dentry for the given sysfs_dirent
 *	@sd: sysfs_dirent of interest
 *
 *	Get dentry for @sd.  Dentry is looked up if currently not
T
Tejun Heo 已提交
81 82
 *	present.  This function descends from the root looking up
 *	dentry for each step.
T
Tejun Heo 已提交
83 84
 *
 *	LOCKING:
85
 *	mutex_lock(sysfs_rename_mutex)
T
Tejun Heo 已提交
86 87 88 89 90 91
 *
 *	RETURNS:
 *	Pointer to found dentry on success, ERR_PTR() value on error.
 */
struct dentry *sysfs_get_dentry(struct sysfs_dirent *sd)
{
T
Tejun Heo 已提交
92
	struct dentry *dentry = dget(sysfs_sb->s_root);
T
Tejun Heo 已提交
93

T
Tejun Heo 已提交
94 95 96
	while (dentry->d_fsdata != sd) {
		struct sysfs_dirent *cur;
		struct dentry *parent;
T
Tejun Heo 已提交
97

T
Tejun Heo 已提交
98 99 100
		/* find the first ancestor which hasn't been looked up */
		cur = sd;
		while (cur->s_parent != dentry->d_fsdata)
T
Tejun Heo 已提交
101 102 103
			cur = cur->s_parent;

		/* look it up */
T
Tejun Heo 已提交
104 105 106
		parent = dentry;
		mutex_lock(&parent->d_inode->i_mutex);
		dentry = lookup_one_len_kern(cur->s_name, parent,
T
Tejun Heo 已提交
107
					     strlen(cur->s_name));
T
Tejun Heo 已提交
108 109
		mutex_unlock(&parent->d_inode->i_mutex);
		dput(parent);
T
Tejun Heo 已提交
110

T
Tejun Heo 已提交
111 112
		if (IS_ERR(dentry))
			break;
T
Tejun Heo 已提交
113 114 115 116
	}
	return dentry;
}

117 118 119 120 121 122 123 124 125 126 127 128
/**
 *	sysfs_get_active - get an active reference to sysfs_dirent
 *	@sd: sysfs_dirent to get an active reference to
 *
 *	Get an active reference of @sd.  This function is noop if @sd
 *	is NULL.
 *
 *	RETURNS:
 *	Pointer to @sd on success, NULL on failure.
 */
struct sysfs_dirent *sysfs_get_active(struct sysfs_dirent *sd)
{
129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145
	if (unlikely(!sd))
		return NULL;

	while (1) {
		int v, t;

		v = atomic_read(&sd->s_active);
		if (unlikely(v < 0))
			return NULL;

		t = atomic_cmpxchg(&sd->s_active, v, v + 1);
		if (likely(t == v))
			return sd;
		if (t < 0)
			return NULL;

		cpu_relax();
146 147 148 149 150 151 152 153 154 155 156 157
	}
}

/**
 *	sysfs_put_active - put an active reference to sysfs_dirent
 *	@sd: sysfs_dirent to put an active reference to
 *
 *	Put an active reference to @sd.  This function is noop if @sd
 *	is NULL.
 */
void sysfs_put_active(struct sysfs_dirent *sd)
{
158 159 160 161 162 163 164 165 166 167 168
	struct completion *cmpl;
	int v;

	if (unlikely(!sd))
		return;

	v = atomic_dec_return(&sd->s_active);
	if (likely(v != SD_DEACTIVATED_BIAS))
		return;

	/* atomic_dec_return() is a mb(), we'll always see the updated
169
	 * sd->s_sibling.
170
	 */
171
	cmpl = (void *)sd->s_sibling;
172
	complete(cmpl);
173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217
}

/**
 *	sysfs_get_active_two - get active references to sysfs_dirent and parent
 *	@sd: sysfs_dirent of interest
 *
 *	Get active reference to @sd and its parent.  Parent's active
 *	reference is grabbed first.  This function is noop if @sd is
 *	NULL.
 *
 *	RETURNS:
 *	Pointer to @sd on success, NULL on failure.
 */
struct sysfs_dirent *sysfs_get_active_two(struct sysfs_dirent *sd)
{
	if (sd) {
		if (sd->s_parent && unlikely(!sysfs_get_active(sd->s_parent)))
			return NULL;
		if (unlikely(!sysfs_get_active(sd))) {
			sysfs_put_active(sd->s_parent);
			return NULL;
		}
	}
	return sd;
}

/**
 *	sysfs_put_active_two - put active references to sysfs_dirent and parent
 *	@sd: sysfs_dirent of interest
 *
 *	Put active references to @sd and its parent.  This function is
 *	noop if @sd is NULL.
 */
void sysfs_put_active_two(struct sysfs_dirent *sd)
{
	if (sd) {
		sysfs_put_active(sd);
		sysfs_put_active(sd->s_parent);
	}
}

/**
 *	sysfs_deactivate - deactivate sysfs_dirent
 *	@sd: sysfs_dirent to deactivate
 *
218
 *	Deny new active references and drain existing ones.
219
 */
220
static void sysfs_deactivate(struct sysfs_dirent *sd)
221
{
222 223
	DECLARE_COMPLETION_ONSTACK(wait);
	int v;
224

225
	BUG_ON(sd->s_sibling || !(sd->s_flags & SYSFS_FLAG_REMOVED));
226
	sd->s_sibling = (void *)&wait;
227 228

	/* atomic_add_return() is a mb(), put_active() will always see
229
	 * the updated sd->s_sibling.
230
	 */
231 232 233 234 235
	v = atomic_add_return(SD_DEACTIVATED_BIAS, &sd->s_active);

	if (v != SD_DEACTIVATED_BIAS)
		wait_for_completion(&wait);

236
	sd->s_sibling = NULL;
237 238
}

T
Tejun Heo 已提交
239
static int sysfs_alloc_ino(ino_t *pino)
240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264
{
	int ino, rc;

 retry:
	spin_lock(&sysfs_ino_lock);
	rc = ida_get_new_above(&sysfs_ino_ida, 2, &ino);
	spin_unlock(&sysfs_ino_lock);

	if (rc == -EAGAIN) {
		if (ida_pre_get(&sysfs_ino_ida, GFP_KERNEL))
			goto retry;
		rc = -ENOMEM;
	}

	*pino = ino;
	return rc;
}

static void sysfs_free_ino(ino_t ino)
{
	spin_lock(&sysfs_ino_lock);
	ida_remove(&sysfs_ino_ida, ino);
	spin_unlock(&sysfs_ino_lock);
}

265 266
void release_sysfs_dirent(struct sysfs_dirent * sd)
{
T
Tejun Heo 已提交
267 268 269
	struct sysfs_dirent *parent_sd;

 repeat:
270 271 272
	/* Moving/renaming is always done while holding reference.
	 * sd->s_parent won't change beneath us.
	 */
T
Tejun Heo 已提交
273 274
	parent_sd = sd->s_parent;

275
	if (sysfs_type(sd) == SYSFS_KOBJ_LINK)
T
Tejun Heo 已提交
276
		sysfs_put(sd->s_symlink.target_sd);
277
	if (sysfs_type(sd) & SYSFS_COPY_NAME)
T
Tejun Heo 已提交
278
		kfree(sd->s_name);
279
	kfree(sd->s_iattr);
280
	sysfs_free_ino(sd->s_ino);
281
	kmem_cache_free(sysfs_dir_cachep, sd);
T
Tejun Heo 已提交
282 283 284 285

	sd = parent_sd;
	if (sd && atomic_dec_and_test(&sd->s_count))
		goto repeat;
286 287
}

L
Linus Torvalds 已提交
288 289 290 291
static void sysfs_d_iput(struct dentry * dentry, struct inode * inode)
{
	struct sysfs_dirent * sd = dentry->d_fsdata;

E
Eric W. Biederman 已提交
292
	sysfs_put(sd);
L
Linus Torvalds 已提交
293 294 295 296 297 298 299
	iput(inode);
}

static struct dentry_operations sysfs_dentry_ops = {
	.d_iput		= sysfs_d_iput,
};

300
struct sysfs_dirent *sysfs_new_dirent(const char *name, umode_t mode, int type)
L
Linus Torvalds 已提交
301
{
T
Tejun Heo 已提交
302
	char *dup_name = NULL;
303
	struct sysfs_dirent *sd;
T
Tejun Heo 已提交
304 305 306 307

	if (type & SYSFS_COPY_NAME) {
		name = dup_name = kstrdup(name, GFP_KERNEL);
		if (!name)
308
			return NULL;
T
Tejun Heo 已提交
309
	}
L
Linus Torvalds 已提交
310

311
	sd = kmem_cache_zalloc(sysfs_dir_cachep, GFP_KERNEL);
L
Linus Torvalds 已提交
312
	if (!sd)
313
		goto err_out1;
L
Linus Torvalds 已提交
314

T
Tejun Heo 已提交
315
	if (sysfs_alloc_ino(&sd->s_ino))
316
		goto err_out2;
317

L
Linus Torvalds 已提交
318
	atomic_set(&sd->s_count, 1);
319
	atomic_set(&sd->s_active, 0);
320
	atomic_set(&sd->s_event, 1);
321

T
Tejun Heo 已提交
322
	sd->s_name = name;
323
	sd->s_mode = mode;
324
	sd->s_flags = type;
L
Linus Torvalds 已提交
325 326

	return sd;
T
Tejun Heo 已提交
327

328
 err_out2:
T
Tejun Heo 已提交
329
	kmem_cache_free(sysfs_dir_cachep, sd);
330 331
 err_out1:
	kfree(dup_name);
T
Tejun Heo 已提交
332
	return NULL;
L
Linus Torvalds 已提交
333 334
}

335 336 337 338 339 340
static int sysfs_ilookup_test(struct inode *inode, void *arg)
{
	struct sysfs_dirent *sd = arg;
	return inode->i_ino == sd->s_ino;
}

341
/**
342 343 344
 *	sysfs_addrm_start - prepare for sysfs_dirent add/remove
 *	@acxt: pointer to sysfs_addrm_cxt to be used
 *	@parent_sd: parent sysfs_dirent
345
 *
346 347 348 349 350
 *	This function is called when the caller is about to add or
 *	remove sysfs_dirent under @parent_sd.  This function acquires
 *	sysfs_mutex, grabs inode for @parent_sd if available and lock
 *	i_mutex of it.  @acxt is used to keep and pass context to
 *	other addrm functions.
351 352
 *
 *	LOCKING:
353 354 355
 *	Kernel thread context (may sleep).  sysfs_mutex is locked on
 *	return.  i_mutex of parent inode is locked on return if
 *	available.
356
 */
357 358
void sysfs_addrm_start(struct sysfs_addrm_cxt *acxt,
		       struct sysfs_dirent *parent_sd)
359
{
360
	struct inode *inode;
361

362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397
	memset(acxt, 0, sizeof(*acxt));
	acxt->parent_sd = parent_sd;

	/* Lookup parent inode.  inode initialization and I_NEW
	 * clearing are protected by sysfs_mutex.  By grabbing it and
	 * looking up with _nowait variant, inode state can be
	 * determined reliably.
	 */
	mutex_lock(&sysfs_mutex);

	inode = ilookup5_nowait(sysfs_sb, parent_sd->s_ino, sysfs_ilookup_test,
				parent_sd);

	if (inode && !(inode->i_state & I_NEW)) {
		/* parent inode available */
		acxt->parent_inode = inode;

		/* sysfs_mutex is below i_mutex in lock hierarchy.
		 * First, trylock i_mutex.  If fails, unlock
		 * sysfs_mutex and lock them in order.
		 */
		if (!mutex_trylock(&inode->i_mutex)) {
			mutex_unlock(&sysfs_mutex);
			mutex_lock(&inode->i_mutex);
			mutex_lock(&sysfs_mutex);
		}
	} else
		iput(inode);
}

/**
 *	sysfs_add_one - add sysfs_dirent to parent
 *	@acxt: addrm context to use
 *	@sd: sysfs_dirent to be added
 *
 *	Get @acxt->parent_sd and set sd->s_parent to it and increment
398 399
 *	nlink of parent inode if @sd is a directory and link into the
 *	children list of the parent.
400 401 402 403 404 405 406
 *
 *	This function should be called between calls to
 *	sysfs_addrm_start() and sysfs_addrm_finish() and should be
 *	passed the same @acxt as passed to sysfs_addrm_start().
 *
 *	LOCKING:
 *	Determined by sysfs_addrm_start().
407 408 409 410
 *
 *	RETURNS:
 *	0 on success, -EEXIST if entry with the given name already
 *	exists.
411
 */
412
int sysfs_add_one(struct sysfs_addrm_cxt *acxt, struct sysfs_dirent *sd)
413
{
414 415 416 417
	if (sysfs_find_dirent(acxt->parent_sd, sd->s_name)) {
		printk(KERN_WARNING "sysfs: duplicate filename '%s' "
		       "can not be created\n", sd->s_name);
		WARN_ON(1);
418
		return -EEXIST;
419
	}
420

421 422 423 424 425 426
	sd->s_parent = sysfs_get(acxt->parent_sd);

	if (sysfs_type(sd) == SYSFS_DIR && acxt->parent_inode)
		inc_nlink(acxt->parent_inode);

	acxt->cnt++;
427 428

	sysfs_link_sibling(sd);
429 430

	return 0;
431 432 433 434 435 436 437 438
}

/**
 *	sysfs_remove_one - remove sysfs_dirent from parent
 *	@acxt: addrm context to use
 *	@sd: sysfs_dirent to be added
 *
 *	Mark @sd removed and drop nlink of parent inode if @sd is a
439
 *	directory.  @sd is unlinked from the children list.
440 441 442 443 444 445 446 447 448 449
 *
 *	This function should be called between calls to
 *	sysfs_addrm_start() and sysfs_addrm_finish() and should be
 *	passed the same @acxt as passed to sysfs_addrm_start().
 *
 *	LOCKING:
 *	Determined by sysfs_addrm_start().
 */
void sysfs_remove_one(struct sysfs_addrm_cxt *acxt, struct sysfs_dirent *sd)
{
450 451 452
	BUG_ON(sd->s_flags & SYSFS_FLAG_REMOVED);

	sysfs_unlink_sibling(sd);
453 454 455 456 457 458 459 460 461 462 463

	sd->s_flags |= SYSFS_FLAG_REMOVED;
	sd->s_sibling = acxt->removed;
	acxt->removed = sd;

	if (sysfs_type(sd) == SYSFS_DIR && acxt->parent_inode)
		drop_nlink(acxt->parent_inode);

	acxt->cnt++;
}

464 465 466 467 468 469 470 471 472 473 474
/**
 *	sysfs_drop_dentry - drop dentry for the specified sysfs_dirent
 *	@sd: target sysfs_dirent
 *
 *	Drop dentry for @sd.  @sd must have been unlinked from its
 *	parent on entry to this function such that it can't be looked
 *	up anymore.
 */
static void sysfs_drop_dentry(struct sysfs_dirent *sd)
{
	struct inode *inode;
475 476 477 478 479
	struct dentry *dentry;

	inode = ilookup(sysfs_sb, sd->s_ino);
	if (!inode)
		return;
480

481 482 483 484 485 486
	/* Drop any existing dentries associated with sd.
	 *
	 * For the dentry to be properly freed we need to grab a
	 * reference to the dentry under the dcache lock,  unhash it,
	 * and then put it.  The playing with the dentry count allows
	 * dput to immediately free the dentry  if it is not in use.
487
	 */
488
repeat:
489
	spin_lock(&dcache_lock);
490 491 492 493
	list_for_each_entry(dentry, &inode->i_dentry, d_alias) {
		if (d_unhashed(dentry))
			continue;
		dget_locked(dentry);
494 495 496
		spin_lock(&dentry->d_lock);
		__d_drop(dentry);
		spin_unlock(&dentry->d_lock);
497 498 499
		spin_unlock(&dcache_lock);
		dput(dentry);
		goto repeat;
500 501 502 503
	}
	spin_unlock(&dcache_lock);

	/* adjust nlink and update timestamp */
504
	mutex_lock(&inode->i_mutex);
505

506 507 508
	inode->i_ctime = CURRENT_TIME;
	drop_nlink(inode);
	if (sysfs_type(sd) == SYSFS_DIR)
509 510
		drop_nlink(inode);

511 512 513
	mutex_unlock(&inode->i_mutex);

	iput(inode);
514 515
}

516 517 518 519 520 521 522 523 524 525 526
/**
 *	sysfs_addrm_finish - finish up sysfs_dirent add/remove
 *	@acxt: addrm context to finish up
 *
 *	Finish up sysfs_dirent add/remove.  Resources acquired by
 *	sysfs_addrm_start() are released and removed sysfs_dirents are
 *	cleaned up.  Timestamps on the parent inode are updated.
 *
 *	LOCKING:
 *	All mutexes acquired by sysfs_addrm_start() are released.
 */
527
void sysfs_addrm_finish(struct sysfs_addrm_cxt *acxt)
528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551
{
	/* release resources acquired by sysfs_addrm_start() */
	mutex_unlock(&sysfs_mutex);
	if (acxt->parent_inode) {
		struct inode *inode = acxt->parent_inode;

		/* if added/removed, update timestamps on the parent */
		if (acxt->cnt)
			inode->i_ctime = inode->i_mtime = CURRENT_TIME;

		mutex_unlock(&inode->i_mutex);
		iput(inode);
	}

	/* kill removed sysfs_dirents */
	while (acxt->removed) {
		struct sysfs_dirent *sd = acxt->removed;

		acxt->removed = sd->s_sibling;
		sd->s_sibling = NULL;

		sysfs_drop_dentry(sd);
		sysfs_deactivate(sd);
		sysfs_put(sd);
T
Tejun Heo 已提交
552
	}
553 554
}

555 556 557 558 559 560
/**
 *	sysfs_find_dirent - find sysfs_dirent with the given name
 *	@parent_sd: sysfs_dirent to search under
 *	@name: name to look for
 *
 *	Look for sysfs_dirent with name @name under @parent_sd.
561
 *
562
 *	LOCKING:
563
 *	mutex_lock(sysfs_mutex)
564
 *
565 566
 *	RETURNS:
 *	Pointer to sysfs_dirent if found, NULL if not.
567
 */
568 569
struct sysfs_dirent *sysfs_find_dirent(struct sysfs_dirent *parent_sd,
				       const unsigned char *name)
570
{
571 572 573
	struct sysfs_dirent *sd;

	for (sd = parent_sd->s_children; sd; sd = sd->s_sibling)
E
Eric W. Biederman 已提交
574
		if (!strcmp(sd->s_name, name))
575 576 577
			return sd;
	return NULL;
}
578

579 580 581 582 583 584 585 586 587
/**
 *	sysfs_get_dirent - find and get sysfs_dirent with the given name
 *	@parent_sd: sysfs_dirent to search under
 *	@name: name to look for
 *
 *	Look for sysfs_dirent with name @name under @parent_sd and get
 *	it if found.
 *
 *	LOCKING:
588
 *	Kernel thread context (may sleep).  Grabs sysfs_mutex.
589 590 591 592 593 594 595 596 597
 *
 *	RETURNS:
 *	Pointer to sysfs_dirent if found, NULL if not.
 */
struct sysfs_dirent *sysfs_get_dirent(struct sysfs_dirent *parent_sd,
				      const unsigned char *name)
{
	struct sysfs_dirent *sd;

598
	mutex_lock(&sysfs_mutex);
599 600
	sd = sysfs_find_dirent(parent_sd, name);
	sysfs_get(sd);
601
	mutex_unlock(&sysfs_mutex);
602 603

	return sd;
604 605
}

606 607
static int create_dir(struct kobject *kobj, struct sysfs_dirent *parent_sd,
		      const char *name, struct sysfs_dirent **p_sd)
L
Linus Torvalds 已提交
608 609
{
	umode_t mode = S_IFDIR| S_IRWXU | S_IRUGO | S_IXUGO;
610
	struct sysfs_addrm_cxt acxt;
611
	struct sysfs_dirent *sd;
612
	int rc;
L
Linus Torvalds 已提交
613

614
	/* allocate */
615
	sd = sysfs_new_dirent(name, mode, SYSFS_DIR);
616
	if (!sd)
617
		return -ENOMEM;
T
Tejun Heo 已提交
618
	sd->s_dir.kobj = kobj;
619

620
	/* link in */
621
	sysfs_addrm_start(&acxt, parent_sd);
622 623
	rc = sysfs_add_one(&acxt, sd);
	sysfs_addrm_finish(&acxt);
624

625 626 627
	if (rc == 0)
		*p_sd = sd;
	else
628
		sysfs_put(sd);
629

630
	return rc;
L
Linus Torvalds 已提交
631 632
}

633 634
int sysfs_create_subdir(struct kobject *kobj, const char *name,
			struct sysfs_dirent **p_sd)
L
Linus Torvalds 已提交
635
{
636
	return create_dir(kobj, kobj->sd, name, p_sd);
L
Linus Torvalds 已提交
637 638 639 640 641 642
}

/**
 *	sysfs_create_dir - create a directory for an object.
 *	@kobj:		object we're creating directory for. 
 */
643
int sysfs_create_dir(struct kobject * kobj)
L
Linus Torvalds 已提交
644
{
645
	struct sysfs_dirent *parent_sd, *sd;
L
Linus Torvalds 已提交
646 647 648 649
	int error = 0;

	BUG_ON(!kobj);

650
	if (kobj->parent)
651
		parent_sd = kobj->parent->sd;
L
Linus Torvalds 已提交
652
	else
E
Eric W. Biederman 已提交
653
		parent_sd = &sysfs_root;
L
Linus Torvalds 已提交
654

655
	error = create_dir(kobj, parent_sd, kobject_name(kobj), &sd);
L
Linus Torvalds 已提交
656
	if (!error)
657
		kobj->sd = sd;
L
Linus Torvalds 已提交
658 659 660 661 662 663
	return error;
}

static struct dentry * sysfs_lookup(struct inode *dir, struct dentry *dentry,
				struct nameidata *nd)
{
664
	struct dentry *ret = NULL;
665 666
	struct sysfs_dirent *parent_sd = dentry->d_parent->d_fsdata;
	struct sysfs_dirent *sd;
667
	struct inode *inode;
L
Linus Torvalds 已提交
668

669 670
	mutex_lock(&sysfs_mutex);

671
	sd = sysfs_find_dirent(parent_sd, dentry->d_name.name);
L
Linus Torvalds 已提交
672

673
	/* no such entry */
674
	if (!sd)
675
		goto out_unlock;
676 677

	/* attach dentry and inode */
678
	inode = sysfs_get_inode(sd);
679 680 681 682
	if (!inode) {
		ret = ERR_PTR(-ENOMEM);
		goto out_unlock;
	}
683

T
Tejun Heo 已提交
684 685 686
	/* instantiate and hash dentry */
	dentry->d_op = &sysfs_dentry_ops;
	dentry->d_fsdata = sysfs_get(sd);
687
	d_instantiate(dentry, inode);
T
Tejun Heo 已提交
688
	d_rehash(dentry);
689

690
 out_unlock:
691
	mutex_unlock(&sysfs_mutex);
692
	return ret;
L
Linus Torvalds 已提交
693 694
}

695
const struct inode_operations sysfs_dir_inode_operations = {
L
Linus Torvalds 已提交
696
	.lookup		= sysfs_lookup,
697
	.setattr	= sysfs_setattr,
L
Linus Torvalds 已提交
698 699
};

700
static void remove_dir(struct sysfs_dirent *sd)
L
Linus Torvalds 已提交
701
{
702
	struct sysfs_addrm_cxt acxt;
L
Linus Torvalds 已提交
703

704 705 706
	sysfs_addrm_start(&acxt, sd->s_parent);
	sysfs_remove_one(&acxt, sd);
	sysfs_addrm_finish(&acxt);
L
Linus Torvalds 已提交
707 708
}

709
void sysfs_remove_subdir(struct sysfs_dirent *sd)
L
Linus Torvalds 已提交
710
{
711
	remove_dir(sd);
L
Linus Torvalds 已提交
712 713 714
}


715
static void __sysfs_remove_dir(struct sysfs_dirent *dir_sd)
L
Linus Torvalds 已提交
716
{
717
	struct sysfs_addrm_cxt acxt;
718
	struct sysfs_dirent **pos;
L
Linus Torvalds 已提交
719

720
	if (!dir_sd)
L
Linus Torvalds 已提交
721 722
		return;

723
	pr_debug("sysfs %s: removing dir\n", dir_sd->s_name);
724
	sysfs_addrm_start(&acxt, dir_sd);
725
	pos = &dir_sd->s_children;
726 727 728
	while (*pos) {
		struct sysfs_dirent *sd = *pos;

E
Eric W. Biederman 已提交
729
		if (sysfs_type(sd) != SYSFS_DIR)
730
			sysfs_remove_one(&acxt, sd);
731
		else
732
			pos = &(*pos)->s_sibling;
L
Linus Torvalds 已提交
733
	}
734
	sysfs_addrm_finish(&acxt);
735

736
	remove_dir(dir_sd);
737 738 739 740 741 742 743 744 745 746 747 748 749
}

/**
 *	sysfs_remove_dir - remove an object's directory.
 *	@kobj:	object.
 *
 *	The only thing special about this is that we remove any files in
 *	the directory before we remove the directory, and we've inlined
 *	what used to be sysfs_rmdir() below, instead of calling separately.
 */

void sysfs_remove_dir(struct kobject * kobj)
{
750
	struct sysfs_dirent *sd = kobj->sd;
751

T
Tejun Heo 已提交
752
	spin_lock(&sysfs_assoc_lock);
753
	kobj->sd = NULL;
T
Tejun Heo 已提交
754
	spin_unlock(&sysfs_assoc_lock);
755

756
	__sysfs_remove_dir(sd);
L
Linus Torvalds 已提交
757 758
}

759
int sysfs_rename_dir(struct kobject * kobj, const char *new_name)
L
Linus Torvalds 已提交
760
{
761
	struct sysfs_dirent *sd = kobj->sd;
762
	struct dentry *parent = NULL;
763 764
	struct dentry *old_dentry = NULL, *new_dentry = NULL;
	const char *dup_name = NULL;
765
	int error;
L
Linus Torvalds 已提交
766

767 768
	mutex_lock(&sysfs_rename_mutex);

769 770 771 772
	error = 0;
	if (strcmp(sd->s_name, new_name) == 0)
		goto out;	/* nothing to rename */

T
Tejun Heo 已提交
773
	/* get the original dentry */
774 775 776
	old_dentry = sysfs_get_dentry(sd);
	if (IS_ERR(old_dentry)) {
		error = PTR_ERR(old_dentry);
777
		goto out;
778 779
	}

T
Tejun Heo 已提交
780
	parent = old_dentry->d_parent;
L
Linus Torvalds 已提交
781

782 783
	/* lock parent and get dentry for new name */
	mutex_lock(&parent->d_inode->i_mutex);
784
	mutex_lock(&sysfs_mutex);
L
Linus Torvalds 已提交
785

786 787
	error = -EEXIST;
	if (sysfs_find_dirent(sd->s_parent, new_name))
788
		goto out_unlock;
789

790 791 792
	error = -ENOMEM;
	new_dentry = d_alloc_name(parent, new_name);
	if (!new_dentry)
793
		goto out_unlock;
794

T
Tejun Heo 已提交
795 796 797 798
	/* rename kobject and sysfs_dirent */
	error = -ENOMEM;
	new_name = dup_name = kstrdup(new_name, GFP_KERNEL);
	if (!new_name)
799
		goto out_unlock;
T
Tejun Heo 已提交
800

801 802
	error = kobject_set_name(kobj, "%s", new_name);
	if (error)
803
		goto out_unlock;
804

805
	dup_name = sd->s_name;
T
Tejun Heo 已提交
806 807
	sd->s_name = new_name;

T
Tejun Heo 已提交
808
	/* rename */
809
	d_add(new_dentry, NULL);
E
Eric W. Biederman 已提交
810
	d_move(old_dentry, new_dentry);
811 812 813

	error = 0;
 out_unlock:
814
	mutex_unlock(&sysfs_mutex);
815
	mutex_unlock(&parent->d_inode->i_mutex);
816 817 818
	kfree(dup_name);
	dput(old_dentry);
	dput(new_dentry);
819
 out:
820
	mutex_unlock(&sysfs_rename_mutex);
L
Linus Torvalds 已提交
821 822 823
	return error;
}

824
int sysfs_move_dir(struct kobject *kobj, struct kobject *new_parent_kobj)
825
{
826 827 828 829
	struct sysfs_dirent *sd = kobj->sd;
	struct sysfs_dirent *new_parent_sd;
	struct dentry *old_parent, *new_parent = NULL;
	struct dentry *old_dentry = NULL, *new_dentry = NULL;
830 831
	int error;

832
	mutex_lock(&sysfs_rename_mutex);
833 834 835
	BUG_ON(!sd->s_parent);
	new_parent_sd = new_parent_kobj->sd ? new_parent_kobj->sd : &sysfs_root;

836 837 838 839
	error = 0;
	if (sd->s_parent == new_parent_sd)
		goto out;	/* nothing to move */

840 841 842 843
	/* get dentries */
	old_dentry = sysfs_get_dentry(sd);
	if (IS_ERR(old_dentry)) {
		error = PTR_ERR(old_dentry);
844
		goto out;
845
	}
E
Eric W. Biederman 已提交
846
	old_parent = old_dentry->d_parent;
847 848 849 850

	new_parent = sysfs_get_dentry(new_parent_sd);
	if (IS_ERR(new_parent)) {
		error = PTR_ERR(new_parent);
851
		goto out;
852
	}
853 854

again:
855 856 857
	mutex_lock(&old_parent->d_inode->i_mutex);
	if (!mutex_trylock(&new_parent->d_inode->i_mutex)) {
		mutex_unlock(&old_parent->d_inode->i_mutex);
858 859
		goto again;
	}
860
	mutex_lock(&sysfs_mutex);
861

862 863
	error = -EEXIST;
	if (sysfs_find_dirent(new_parent_sd, sd->s_name))
864
		goto out_unlock;
865 866 867 868 869 870 871

	error = -ENOMEM;
	new_dentry = d_alloc_name(new_parent, sd->s_name);
	if (!new_dentry)
		goto out_unlock;

	error = 0;
872
	d_add(new_dentry, NULL);
E
Eric W. Biederman 已提交
873
	d_move(old_dentry, new_dentry);
874 875 876
	dput(new_dentry);

	/* Remove from old parent's list and insert into new parent's list. */
877
	sysfs_unlink_sibling(sd);
878 879 880
	sysfs_get(new_parent_sd);
	sysfs_put(sd->s_parent);
	sd->s_parent = new_parent_sd;
881
	sysfs_link_sibling(sd);
882

883
 out_unlock:
884
	mutex_unlock(&sysfs_mutex);
885 886
	mutex_unlock(&new_parent->d_inode->i_mutex);
	mutex_unlock(&old_parent->d_inode->i_mutex);
887
 out:
888 889 890
	dput(new_parent);
	dput(old_dentry);
	dput(new_dentry);
891
	mutex_unlock(&sysfs_rename_mutex);
892 893 894
	return error;
}

L
Linus Torvalds 已提交
895 896 897 898 899 900 901 902
/* Relationship between s_mode and the DT_xxx types */
static inline unsigned char dt_type(struct sysfs_dirent *sd)
{
	return (sd->s_mode >> 12) & 15;
}

static int sysfs_readdir(struct file * filp, void * dirent, filldir_t filldir)
{
903
	struct dentry *dentry = filp->f_path.dentry;
L
Linus Torvalds 已提交
904
	struct sysfs_dirent * parent_sd = dentry->d_fsdata;
E
Eric W. Biederman 已提交
905
	struct sysfs_dirent *pos;
L
Linus Torvalds 已提交
906 907
	ino_t ino;

E
Eric W. Biederman 已提交
908 909 910
	if (filp->f_pos == 0) {
		ino = parent_sd->s_ino;
		if (filldir(dirent, ".", 1, filp->f_pos, ino, DT_DIR) == 0)
L
Linus Torvalds 已提交
911
			filp->f_pos++;
E
Eric W. Biederman 已提交
912 913 914 915 916 917 918
	}
	if (filp->f_pos == 1) {
		if (parent_sd->s_parent)
			ino = parent_sd->s_parent->s_ino;
		else
			ino = parent_sd->s_ino;
		if (filldir(dirent, "..", 2, filp->f_pos, ino, DT_DIR) == 0)
L
Linus Torvalds 已提交
919
			filp->f_pos++;
E
Eric W. Biederman 已提交
920 921 922
	}
	if ((filp->f_pos > 1) && (filp->f_pos < INT_MAX)) {
		mutex_lock(&sysfs_mutex);
923

E
Eric W. Biederman 已提交
924 925 926 927
		/* Skip the dentries we have already reported */
		pos = parent_sd->s_children;
		while (pos && (filp->f_pos > pos->s_ino))
			pos = pos->s_sibling;
928

E
Eric W. Biederman 已提交
929 930 931
		for ( ; pos; pos = pos->s_sibling) {
			const char * name;
			int len;
L
Linus Torvalds 已提交
932

E
Eric W. Biederman 已提交
933 934 935
			name = pos->s_name;
			len = strlen(name);
			filp->f_pos = ino = pos->s_ino;
L
Linus Torvalds 已提交
936

E
Eric W. Biederman 已提交
937 938
			if (filldir(dirent, name, len, filp->f_pos, ino,
					 dt_type(pos)) < 0)
L
Linus Torvalds 已提交
939 940
				break;
		}
E
Eric W. Biederman 已提交
941 942
		if (!pos)
			filp->f_pos = INT_MAX;
943
		mutex_unlock(&sysfs_mutex);
L
Linus Torvalds 已提交
944
	}
E
Eric W. Biederman 已提交
945
	return 0;
L
Linus Torvalds 已提交
946 947
}

E
Eric W. Biederman 已提交
948

949
const struct file_operations sysfs_dir_operations = {
L
Linus Torvalds 已提交
950 951 952
	.read		= generic_read_dir,
	.readdir	= sysfs_readdir,
};