dir.c 27.2 KB
Newer Older
L
Linus Torvalds 已提交
1 2 3 4 5 6 7 8 9 10
/*
 * dir.c - Operations for sysfs directories.
 */

#undef DEBUG

#include <linux/fs.h>
#include <linux/mount.h>
#include <linux/module.h>
#include <linux/kobject.h>
11
#include <linux/namei.h>
12
#include <linux/idr.h>
13
#include <linux/completion.h>
14
#include <asm/semaphore.h>
L
Linus Torvalds 已提交
15 16
#include "sysfs.h"

17
DEFINE_MUTEX(sysfs_mutex);
T
Tejun Heo 已提交
18
spinlock_t sysfs_assoc_lock = SPIN_LOCK_UNLOCKED;
L
Linus Torvalds 已提交
19

20 21 22
static spinlock_t sysfs_ino_lock = SPIN_LOCK_UNLOCKED;
static DEFINE_IDA(sysfs_ino_ida);

23 24 25 26 27 28 29 30
/**
 *	sysfs_link_sibling - link sysfs_dirent into sibling list
 *	@sd: sysfs_dirent of interest
 *
 *	Link @sd into its sibling list which starts from
 *	sd->s_parent->s_children.
 *
 *	Locking:
31
 *	mutex_lock(sysfs_mutex)
32
 */
33
void sysfs_link_sibling(struct sysfs_dirent *sd)
34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49
{
	struct sysfs_dirent *parent_sd = sd->s_parent;

	BUG_ON(sd->s_sibling);
	sd->s_sibling = parent_sd->s_children;
	parent_sd->s_children = sd;
}

/**
 *	sysfs_unlink_sibling - unlink sysfs_dirent from sibling list
 *	@sd: sysfs_dirent of interest
 *
 *	Unlink @sd from its sibling list which starts from
 *	sd->s_parent->s_children.
 *
 *	Locking:
50
 *	mutex_lock(sysfs_mutex)
51
 */
52
void sysfs_unlink_sibling(struct sysfs_dirent *sd)
53 54 55 56 57 58 59 60 61 62 63 64
{
	struct sysfs_dirent **pos;

	for (pos = &sd->s_parent->s_children; *pos; pos = &(*pos)->s_sibling) {
		if (*pos == sd) {
			*pos = sd->s_sibling;
			sd->s_sibling = NULL;
			break;
		}
	}
}

65 66 67 68 69 70 71 72 73 74 75 76
/**
 *	sysfs_get_active - get an active reference to sysfs_dirent
 *	@sd: sysfs_dirent to get an active reference to
 *
 *	Get an active reference of @sd.  This function is noop if @sd
 *	is NULL.
 *
 *	RETURNS:
 *	Pointer to @sd on success, NULL on failure.
 */
struct sysfs_dirent *sysfs_get_active(struct sysfs_dirent *sd)
{
77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93
	if (unlikely(!sd))
		return NULL;

	while (1) {
		int v, t;

		v = atomic_read(&sd->s_active);
		if (unlikely(v < 0))
			return NULL;

		t = atomic_cmpxchg(&sd->s_active, v, v + 1);
		if (likely(t == v))
			return sd;
		if (t < 0)
			return NULL;

		cpu_relax();
94 95 96 97 98 99 100 101 102 103 104 105
	}
}

/**
 *	sysfs_put_active - put an active reference to sysfs_dirent
 *	@sd: sysfs_dirent to put an active reference to
 *
 *	Put an active reference to @sd.  This function is noop if @sd
 *	is NULL.
 */
void sysfs_put_active(struct sysfs_dirent *sd)
{
106 107 108 109 110 111 112 113 114 115 116
	struct completion *cmpl;
	int v;

	if (unlikely(!sd))
		return;

	v = atomic_dec_return(&sd->s_active);
	if (likely(v != SD_DEACTIVATED_BIAS))
		return;

	/* atomic_dec_return() is a mb(), we'll always see the updated
117
	 * sd->s_sibling.
118
	 */
119
	cmpl = (void *)sd->s_sibling;
120
	complete(cmpl);
121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165
}

/**
 *	sysfs_get_active_two - get active references to sysfs_dirent and parent
 *	@sd: sysfs_dirent of interest
 *
 *	Get active reference to @sd and its parent.  Parent's active
 *	reference is grabbed first.  This function is noop if @sd is
 *	NULL.
 *
 *	RETURNS:
 *	Pointer to @sd on success, NULL on failure.
 */
struct sysfs_dirent *sysfs_get_active_two(struct sysfs_dirent *sd)
{
	if (sd) {
		if (sd->s_parent && unlikely(!sysfs_get_active(sd->s_parent)))
			return NULL;
		if (unlikely(!sysfs_get_active(sd))) {
			sysfs_put_active(sd->s_parent);
			return NULL;
		}
	}
	return sd;
}

/**
 *	sysfs_put_active_two - put active references to sysfs_dirent and parent
 *	@sd: sysfs_dirent of interest
 *
 *	Put active references to @sd and its parent.  This function is
 *	noop if @sd is NULL.
 */
void sysfs_put_active_two(struct sysfs_dirent *sd)
{
	if (sd) {
		sysfs_put_active(sd);
		sysfs_put_active(sd->s_parent);
	}
}

/**
 *	sysfs_deactivate - deactivate sysfs_dirent
 *	@sd: sysfs_dirent to deactivate
 *
166
 *	Deny new active references and drain existing ones.
167
 */
168
static void sysfs_deactivate(struct sysfs_dirent *sd)
169
{
170 171
	DECLARE_COMPLETION_ONSTACK(wait);
	int v;
172

173
	BUG_ON(sd->s_sibling || !(sd->s_flags & SYSFS_FLAG_REMOVED));
174
	sd->s_sibling = (void *)&wait;
175 176

	/* atomic_add_return() is a mb(), put_active() will always see
177
	 * the updated sd->s_sibling.
178
	 */
179 180 181 182 183
	v = atomic_add_return(SD_DEACTIVATED_BIAS, &sd->s_active);

	if (v != SD_DEACTIVATED_BIAS)
		wait_for_completion(&wait);

184
	sd->s_sibling = NULL;
185 186
}

T
Tejun Heo 已提交
187
static int sysfs_alloc_ino(ino_t *pino)
188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212
{
	int ino, rc;

 retry:
	spin_lock(&sysfs_ino_lock);
	rc = ida_get_new_above(&sysfs_ino_ida, 2, &ino);
	spin_unlock(&sysfs_ino_lock);

	if (rc == -EAGAIN) {
		if (ida_pre_get(&sysfs_ino_ida, GFP_KERNEL))
			goto retry;
		rc = -ENOMEM;
	}

	*pino = ino;
	return rc;
}

static void sysfs_free_ino(ino_t ino)
{
	spin_lock(&sysfs_ino_lock);
	ida_remove(&sysfs_ino_ida, ino);
	spin_unlock(&sysfs_ino_lock);
}

213 214
void release_sysfs_dirent(struct sysfs_dirent * sd)
{
T
Tejun Heo 已提交
215 216 217
	struct sysfs_dirent *parent_sd;

 repeat:
218 219 220
	/* Moving/renaming is always done while holding reference.
	 * sd->s_parent won't change beneath us.
	 */
T
Tejun Heo 已提交
221 222
	parent_sd = sd->s_parent;

223
	if (sysfs_type(sd) == SYSFS_KOBJ_LINK)
224
		sysfs_put(sd->s_elem.symlink.target_sd);
225
	if (sysfs_type(sd) & SYSFS_COPY_NAME)
T
Tejun Heo 已提交
226
		kfree(sd->s_name);
227
	kfree(sd->s_iattr);
228
	sysfs_free_ino(sd->s_ino);
229
	kmem_cache_free(sysfs_dir_cachep, sd);
T
Tejun Heo 已提交
230 231 232 233

	sd = parent_sd;
	if (sd && atomic_dec_and_test(&sd->s_count))
		goto repeat;
234 235
}

L
Linus Torvalds 已提交
236 237 238 239 240
static void sysfs_d_iput(struct dentry * dentry, struct inode * inode)
{
	struct sysfs_dirent * sd = dentry->d_fsdata;

	if (sd) {
T
Tejun Heo 已提交
241 242
		/* sd->s_dentry is protected with sysfs_assoc_lock.
		 * This allows sysfs_drop_dentry() to dereference it.
243
		 */
T
Tejun Heo 已提交
244
		spin_lock(&sysfs_assoc_lock);
245 246 247 248 249 250 251 252

		/* The dentry might have been deleted or another
		 * lookup could have happened updating sd->s_dentry to
		 * point the new dentry.  Ignore if it isn't pointing
		 * to this dentry.
		 */
		if (sd->s_dentry == dentry)
			sd->s_dentry = NULL;
T
Tejun Heo 已提交
253
		spin_unlock(&sysfs_assoc_lock);
L
Linus Torvalds 已提交
254 255 256 257 258 259 260 261 262
		sysfs_put(sd);
	}
	iput(inode);
}

static struct dentry_operations sysfs_dentry_ops = {
	.d_iput		= sysfs_d_iput,
};

263
struct sysfs_dirent *sysfs_new_dirent(const char *name, umode_t mode, int type)
L
Linus Torvalds 已提交
264
{
T
Tejun Heo 已提交
265 266 267 268 269 270 271 272
	char *dup_name = NULL;
	struct sysfs_dirent *sd = NULL;

	if (type & SYSFS_COPY_NAME) {
		name = dup_name = kstrdup(name, GFP_KERNEL);
		if (!name)
			goto err_out;
	}
L
Linus Torvalds 已提交
273

274
	sd = kmem_cache_zalloc(sysfs_dir_cachep, GFP_KERNEL);
L
Linus Torvalds 已提交
275
	if (!sd)
T
Tejun Heo 已提交
276
		goto err_out;
L
Linus Torvalds 已提交
277

T
Tejun Heo 已提交
278 279
	if (sysfs_alloc_ino(&sd->s_ino))
		goto err_out;
280

L
Linus Torvalds 已提交
281
	atomic_set(&sd->s_count, 1);
282
	atomic_set(&sd->s_active, 0);
283
	atomic_set(&sd->s_event, 1);
284

T
Tejun Heo 已提交
285
	sd->s_name = name;
286
	sd->s_mode = mode;
287
	sd->s_flags = type;
L
Linus Torvalds 已提交
288 289

	return sd;
T
Tejun Heo 已提交
290 291 292 293 294

 err_out:
	kfree(dup_name);
	kmem_cache_free(sysfs_dir_cachep, sd);
	return NULL;
L
Linus Torvalds 已提交
295 296
}

297 298 299 300 301 302 303 304 305 306 307
/**
 *	sysfs_attach_dentry - associate sysfs_dirent with dentry
 *	@sd: target sysfs_dirent
 *	@dentry: dentry to associate
 *
 *	Associate @sd with @dentry.  This is protected by
 *	sysfs_assoc_lock to avoid race with sysfs_d_iput().
 *
 *	LOCKING:
 *	mutex_lock(sysfs_mutex)
 */
308 309 310 311 312 313
static void sysfs_attach_dentry(struct sysfs_dirent *sd, struct dentry *dentry)
{
	dentry->d_op = &sysfs_dentry_ops;
	dentry->d_fsdata = sysfs_get(sd);

	/* protect sd->s_dentry against sysfs_d_iput */
T
Tejun Heo 已提交
314
	spin_lock(&sysfs_assoc_lock);
315
	sd->s_dentry = dentry;
T
Tejun Heo 已提交
316
	spin_unlock(&sysfs_assoc_lock);
317 318 319 320

	d_rehash(dentry);
}

321 322 323 324 325 326
static int sysfs_ilookup_test(struct inode *inode, void *arg)
{
	struct sysfs_dirent *sd = arg;
	return inode->i_ino == sd->s_ino;
}

327
/**
328 329 330
 *	sysfs_addrm_start - prepare for sysfs_dirent add/remove
 *	@acxt: pointer to sysfs_addrm_cxt to be used
 *	@parent_sd: parent sysfs_dirent
331
 *
332 333 334 335 336
 *	This function is called when the caller is about to add or
 *	remove sysfs_dirent under @parent_sd.  This function acquires
 *	sysfs_mutex, grabs inode for @parent_sd if available and lock
 *	i_mutex of it.  @acxt is used to keep and pass context to
 *	other addrm functions.
337 338
 *
 *	LOCKING:
339 340 341
 *	Kernel thread context (may sleep).  sysfs_mutex is locked on
 *	return.  i_mutex of parent inode is locked on return if
 *	available.
342
 */
343 344
void sysfs_addrm_start(struct sysfs_addrm_cxt *acxt,
		       struct sysfs_dirent *parent_sd)
345
{
346
	struct inode *inode;
347

348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436
	memset(acxt, 0, sizeof(*acxt));
	acxt->parent_sd = parent_sd;

	/* Lookup parent inode.  inode initialization and I_NEW
	 * clearing are protected by sysfs_mutex.  By grabbing it and
	 * looking up with _nowait variant, inode state can be
	 * determined reliably.
	 */
	mutex_lock(&sysfs_mutex);

	inode = ilookup5_nowait(sysfs_sb, parent_sd->s_ino, sysfs_ilookup_test,
				parent_sd);

	if (inode && !(inode->i_state & I_NEW)) {
		/* parent inode available */
		acxt->parent_inode = inode;

		/* sysfs_mutex is below i_mutex in lock hierarchy.
		 * First, trylock i_mutex.  If fails, unlock
		 * sysfs_mutex and lock them in order.
		 */
		if (!mutex_trylock(&inode->i_mutex)) {
			mutex_unlock(&sysfs_mutex);
			mutex_lock(&inode->i_mutex);
			mutex_lock(&sysfs_mutex);
		}
	} else
		iput(inode);
}

/**
 *	sysfs_add_one - add sysfs_dirent to parent
 *	@acxt: addrm context to use
 *	@sd: sysfs_dirent to be added
 *
 *	Get @acxt->parent_sd and set sd->s_parent to it and increment
 *	nlink of parent inode if @sd is a directory.  @sd is NOT
 *	linked into the children list of the parent.  The caller
 *	should invoke sysfs_link_sibling() after this function
 *	completes if @sd needs to be on the children list.
 *
 *	This function should be called between calls to
 *	sysfs_addrm_start() and sysfs_addrm_finish() and should be
 *	passed the same @acxt as passed to sysfs_addrm_start().
 *
 *	LOCKING:
 *	Determined by sysfs_addrm_start().
 */
void sysfs_add_one(struct sysfs_addrm_cxt *acxt, struct sysfs_dirent *sd)
{
	sd->s_parent = sysfs_get(acxt->parent_sd);

	if (sysfs_type(sd) == SYSFS_DIR && acxt->parent_inode)
		inc_nlink(acxt->parent_inode);

	acxt->cnt++;
}

/**
 *	sysfs_remove_one - remove sysfs_dirent from parent
 *	@acxt: addrm context to use
 *	@sd: sysfs_dirent to be added
 *
 *	Mark @sd removed and drop nlink of parent inode if @sd is a
 *	directory.  @sd is NOT unlinked from the children list of the
 *	parent.  The caller is repsonsible for removing @sd from the
 *	children list before calling this function.
 *
 *	This function should be called between calls to
 *	sysfs_addrm_start() and sysfs_addrm_finish() and should be
 *	passed the same @acxt as passed to sysfs_addrm_start().
 *
 *	LOCKING:
 *	Determined by sysfs_addrm_start().
 */
void sysfs_remove_one(struct sysfs_addrm_cxt *acxt, struct sysfs_dirent *sd)
{
	BUG_ON(sd->s_sibling || (sd->s_flags & SYSFS_FLAG_REMOVED));

	sd->s_flags |= SYSFS_FLAG_REMOVED;
	sd->s_sibling = acxt->removed;
	acxt->removed = sd;

	if (sysfs_type(sd) == SYSFS_DIR && acxt->parent_inode)
		drop_nlink(acxt->parent_inode);

	acxt->cnt++;
}

437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492
/**
 *	sysfs_drop_dentry - drop dentry for the specified sysfs_dirent
 *	@sd: target sysfs_dirent
 *
 *	Drop dentry for @sd.  @sd must have been unlinked from its
 *	parent on entry to this function such that it can't be looked
 *	up anymore.
 *
 *	@sd->s_dentry which is protected with sysfs_assoc_lock points
 *	to the currently associated dentry but we're not holding a
 *	reference to it and racing with dput().  Grab dcache_lock and
 *	verify dentry before dropping it.  If @sd->s_dentry is NULL or
 *	dput() beats us, no need to bother.
 */
static void sysfs_drop_dentry(struct sysfs_dirent *sd)
{
	struct dentry *dentry = NULL;
	struct inode *inode;

	/* We're not holding a reference to ->s_dentry dentry but the
	 * field will stay valid as long as sysfs_assoc_lock is held.
	 */
	spin_lock(&sysfs_assoc_lock);
	spin_lock(&dcache_lock);

	/* drop dentry if it's there and dput() didn't kill it yet */
	if (sd->s_dentry && sd->s_dentry->d_inode) {
		dentry = dget_locked(sd->s_dentry);
		spin_lock(&dentry->d_lock);
		__d_drop(dentry);
		spin_unlock(&dentry->d_lock);
	}

	spin_unlock(&dcache_lock);
	spin_unlock(&sysfs_assoc_lock);

	dput(dentry);
	/* XXX: unpin if directory, this will go away soon */
	if (sysfs_type(sd) == SYSFS_DIR)
		dput(dentry);

	/* adjust nlink and update timestamp */
	inode = ilookup(sysfs_sb, sd->s_ino);
	if (inode) {
		mutex_lock(&inode->i_mutex);

		inode->i_ctime = CURRENT_TIME;
		drop_nlink(inode);
		if (sysfs_type(sd) == SYSFS_DIR)
			drop_nlink(inode);

		mutex_unlock(&inode->i_mutex);
		iput(inode);
	}
}

493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531
/**
 *	sysfs_addrm_finish - finish up sysfs_dirent add/remove
 *	@acxt: addrm context to finish up
 *
 *	Finish up sysfs_dirent add/remove.  Resources acquired by
 *	sysfs_addrm_start() are released and removed sysfs_dirents are
 *	cleaned up.  Timestamps on the parent inode are updated.
 *
 *	LOCKING:
 *	All mutexes acquired by sysfs_addrm_start() are released.
 *
 *	RETURNS:
 *	Number of added/removed sysfs_dirents since sysfs_addrm_start().
 */
int sysfs_addrm_finish(struct sysfs_addrm_cxt *acxt)
{
	/* release resources acquired by sysfs_addrm_start() */
	mutex_unlock(&sysfs_mutex);
	if (acxt->parent_inode) {
		struct inode *inode = acxt->parent_inode;

		/* if added/removed, update timestamps on the parent */
		if (acxt->cnt)
			inode->i_ctime = inode->i_mtime = CURRENT_TIME;

		mutex_unlock(&inode->i_mutex);
		iput(inode);
	}

	/* kill removed sysfs_dirents */
	while (acxt->removed) {
		struct sysfs_dirent *sd = acxt->removed;

		acxt->removed = sd->s_sibling;
		sd->s_sibling = NULL;

		sysfs_drop_dentry(sd);
		sysfs_deactivate(sd);
		sysfs_put(sd);
T
Tejun Heo 已提交
532
	}
533 534

	return acxt->cnt;
535 536
}

537 538 539 540 541 542
/**
 *	sysfs_find_dirent - find sysfs_dirent with the given name
 *	@parent_sd: sysfs_dirent to search under
 *	@name: name to look for
 *
 *	Look for sysfs_dirent with name @name under @parent_sd.
543
 *
544
 *	LOCKING:
545
 *	mutex_lock(sysfs_mutex)
546
 *
547 548
 *	RETURNS:
 *	Pointer to sysfs_dirent if found, NULL if not.
549
 */
550 551
struct sysfs_dirent *sysfs_find_dirent(struct sysfs_dirent *parent_sd,
				       const unsigned char *name)
552
{
553 554 555 556 557 558 559
	struct sysfs_dirent *sd;

	for (sd = parent_sd->s_children; sd; sd = sd->s_sibling)
		if (sysfs_type(sd) && !strcmp(sd->s_name, name))
			return sd;
	return NULL;
}
560

561 562 563 564 565 566 567 568 569
/**
 *	sysfs_get_dirent - find and get sysfs_dirent with the given name
 *	@parent_sd: sysfs_dirent to search under
 *	@name: name to look for
 *
 *	Look for sysfs_dirent with name @name under @parent_sd and get
 *	it if found.
 *
 *	LOCKING:
570
 *	Kernel thread context (may sleep).  Grabs sysfs_mutex.
571 572 573 574 575 576 577 578 579
 *
 *	RETURNS:
 *	Pointer to sysfs_dirent if found, NULL if not.
 */
struct sysfs_dirent *sysfs_get_dirent(struct sysfs_dirent *parent_sd,
				      const unsigned char *name)
{
	struct sysfs_dirent *sd;

580
	mutex_lock(&sysfs_mutex);
581 582
	sd = sysfs_find_dirent(parent_sd, name);
	sysfs_get(sd);
583
	mutex_unlock(&sysfs_mutex);
584 585

	return sd;
586 587
}

588 589
static int create_dir(struct kobject *kobj, struct sysfs_dirent *parent_sd,
		      const char *name, struct sysfs_dirent **p_sd)
L
Linus Torvalds 已提交
590
{
591
	struct dentry *parent = parent_sd->s_dentry;
592
	struct sysfs_addrm_cxt acxt;
L
Linus Torvalds 已提交
593 594
	int error;
	umode_t mode = S_IFDIR| S_IRWXU | S_IRUGO | S_IXUGO;
595
	struct dentry *dentry;
596
	struct inode *inode;
597
	struct sysfs_dirent *sd;
L
Linus Torvalds 已提交
598

599
	sysfs_addrm_start(&acxt, parent_sd);
600

601
	/* allocate */
602 603 604
	dentry = lookup_one_len(name, parent, strlen(name));
	if (IS_ERR(dentry)) {
		error = PTR_ERR(dentry);
605
		goto out_finish;
606 607 608
	}

	error = -EEXIST;
609
	if (dentry->d_inode)
610 611
		goto out_dput;

612
	error = -ENOMEM;
613
	sd = sysfs_new_dirent(name, mode, SYSFS_DIR);
614
	if (!sd)
615
		goto out_drop;
616
	sd->s_elem.dir.kobj = kobj;
617

618
	inode = sysfs_get_inode(sd);
619
	if (!inode)
620 621
		goto out_sput;

622 623 624 625 626 627
	if (inode->i_state & I_NEW) {
		inode->i_op = &sysfs_dir_inode_operations;
		inode->i_fop = &sysfs_dir_operations;
		/* directory inodes start off with i_nlink == 2 (for ".") */
		inc_nlink(inode);
	}
628 629 630

	/* link in */
	error = -EEXIST;
631
	if (sysfs_find_dirent(parent_sd, name))
632 633
		goto out_iput;

634 635
	sysfs_add_one(&acxt, sd);
	sysfs_link_sibling(sd);
636
	sysfs_instantiate(dentry, inode);
637
	sysfs_attach_dentry(sd, dentry);
638

639
	*p_sd = sd;
640
	error = 0;
641
	goto out_finish;	/* pin directory dentry in core */
642

643 644
 out_iput:
	iput(inode);
645 646 647 648 649 650
 out_sput:
	sysfs_put(sd);
 out_drop:
	d_drop(dentry);
 out_dput:
	dput(dentry);
651 652
 out_finish:
	sysfs_addrm_finish(&acxt);
L
Linus Torvalds 已提交
653 654 655
	return error;
}

656 657
int sysfs_create_subdir(struct kobject *kobj, const char *name,
			struct sysfs_dirent **p_sd)
L
Linus Torvalds 已提交
658
{
659
	return create_dir(kobj, kobj->sd, name, p_sd);
L
Linus Torvalds 已提交
660 661 662 663 664
}

/**
 *	sysfs_create_dir - create a directory for an object.
 *	@kobj:		object we're creating directory for. 
665
 *	@shadow_parent:	parent object.
L
Linus Torvalds 已提交
666
 */
667 668
int sysfs_create_dir(struct kobject *kobj,
		     struct sysfs_dirent *shadow_parent_sd)
L
Linus Torvalds 已提交
669
{
670
	struct sysfs_dirent *parent_sd, *sd;
L
Linus Torvalds 已提交
671 672 673 674
	int error = 0;

	BUG_ON(!kobj);

675 676
	if (shadow_parent_sd)
		parent_sd = shadow_parent_sd;
677
	else if (kobj->parent)
678
		parent_sd = kobj->parent->sd;
L
Linus Torvalds 已提交
679
	else if (sysfs_mount && sysfs_mount->mnt_sb)
680
		parent_sd = sysfs_mount->mnt_sb->s_root->d_fsdata;
L
Linus Torvalds 已提交
681 682 683
	else
		return -EFAULT;

684
	error = create_dir(kobj, parent_sd, kobject_name(kobj), &sd);
L
Linus Torvalds 已提交
685
	if (!error)
686
		kobj->sd = sd;
L
Linus Torvalds 已提交
687 688 689 690 691 692 693 694
	return error;
}

static struct dentry * sysfs_lookup(struct inode *dir, struct dentry *dentry,
				struct nameidata *nd)
{
	struct sysfs_dirent * parent_sd = dentry->d_parent->d_fsdata;
	struct sysfs_dirent * sd;
695
	struct bin_attribute *bin_attr;
696 697
	struct inode *inode;
	int found = 0;
L
Linus Torvalds 已提交
698

699
	for (sd = parent_sd->s_children; sd; sd = sd->s_sibling) {
700
		if ((sysfs_type(sd) & SYSFS_NOT_PINNED) &&
701 702
		    !strcmp(sd->s_name, dentry->d_name.name)) {
			found = 1;
L
Linus Torvalds 已提交
703 704 705 706
			break;
		}
	}

707 708 709 710 711
	/* no such entry */
	if (!found)
		return NULL;

	/* attach dentry and inode */
712
	inode = sysfs_get_inode(sd);
713 714 715
	if (!inode)
		return ERR_PTR(-ENOMEM);

716 717
	mutex_lock(&sysfs_mutex);

718 719
	if (inode->i_state & I_NEW) {
		/* initialize inode according to type */
720 721
		switch (sysfs_type(sd)) {
		case SYSFS_KOBJ_ATTR:
722 723
			inode->i_size = PAGE_SIZE;
			inode->i_fop = &sysfs_file_operations;
724 725 726
			break;
		case SYSFS_KOBJ_BIN_ATTR:
			bin_attr = sd->s_elem.bin_attr.bin_attr;
727 728
			inode->i_size = bin_attr->size;
			inode->i_fop = &bin_fops;
729 730
			break;
		case SYSFS_KOBJ_LINK:
731
			inode->i_op = &sysfs_symlink_inode_operations;
732 733 734 735
			break;
		default:
			BUG();
		}
736
	}
737 738 739 740

	sysfs_instantiate(dentry, inode);
	sysfs_attach_dentry(sd, dentry);

741 742
	mutex_unlock(&sysfs_mutex);

743
	return NULL;
L
Linus Torvalds 已提交
744 745
}

746
const struct inode_operations sysfs_dir_inode_operations = {
L
Linus Torvalds 已提交
747
	.lookup		= sysfs_lookup,
748
	.setattr	= sysfs_setattr,
L
Linus Torvalds 已提交
749 750
};

751
static void remove_dir(struct sysfs_dirent *sd)
L
Linus Torvalds 已提交
752
{
753
	struct sysfs_addrm_cxt acxt;
L
Linus Torvalds 已提交
754

755 756 757 758
	sysfs_addrm_start(&acxt, sd->s_parent);
	sysfs_unlink_sibling(sd);
	sysfs_remove_one(&acxt, sd);
	sysfs_addrm_finish(&acxt);
L
Linus Torvalds 已提交
759 760
}

761
void sysfs_remove_subdir(struct sysfs_dirent *sd)
L
Linus Torvalds 已提交
762
{
763
	remove_dir(sd);
L
Linus Torvalds 已提交
764 765 766
}


767
static void __sysfs_remove_dir(struct sysfs_dirent *dir_sd)
L
Linus Torvalds 已提交
768
{
769
	struct sysfs_addrm_cxt acxt;
770
	struct sysfs_dirent **pos;
L
Linus Torvalds 已提交
771

772
	if (!dir_sd)
L
Linus Torvalds 已提交
773 774
		return;

775
	pr_debug("sysfs %s: removing dir\n", dir_sd->s_name);
776
	sysfs_addrm_start(&acxt, dir_sd);
777
	pos = &dir_sd->s_children;
778 779 780
	while (*pos) {
		struct sysfs_dirent *sd = *pos;

781
		if (sysfs_type(sd) && (sysfs_type(sd) & SYSFS_NOT_PINNED)) {
782
			*pos = sd->s_sibling;
783 784
			sd->s_sibling = NULL;
			sysfs_remove_one(&acxt, sd);
785 786
		} else
			pos = &(*pos)->s_sibling;
L
Linus Torvalds 已提交
787
	}
788
	sysfs_addrm_finish(&acxt);
789

790
	remove_dir(dir_sd);
791 792 793 794 795 796 797 798 799 800 801 802 803
}

/**
 *	sysfs_remove_dir - remove an object's directory.
 *	@kobj:	object.
 *
 *	The only thing special about this is that we remove any files in
 *	the directory before we remove the directory, and we've inlined
 *	what used to be sysfs_rmdir() below, instead of calling separately.
 */

void sysfs_remove_dir(struct kobject * kobj)
{
804
	struct sysfs_dirent *sd = kobj->sd;
805

T
Tejun Heo 已提交
806
	spin_lock(&sysfs_assoc_lock);
807
	kobj->sd = NULL;
T
Tejun Heo 已提交
808
	spin_unlock(&sysfs_assoc_lock);
809

810
	__sysfs_remove_dir(sd);
L
Linus Torvalds 已提交
811 812
}

813
int sysfs_rename_dir(struct kobject *kobj, struct sysfs_dirent *new_parent_sd,
814
		     const char *new_name)
L
Linus Torvalds 已提交
815
{
816 817
	struct sysfs_dirent *sd = kobj->sd;
	struct dentry *new_parent = new_parent_sd->s_dentry;
T
Tejun Heo 已提交
818 819
	struct dentry *new_dentry;
	char *dup_name;
820
	int error;
L
Linus Torvalds 已提交
821

822
	if (!new_parent_sd)
823
		return -EFAULT;
L
Linus Torvalds 已提交
824

825
	mutex_lock(&new_parent->d_inode->i_mutex);
L
Linus Torvalds 已提交
826

827
	new_dentry = lookup_one_len(new_name, new_parent, strlen(new_name));
828 829 830
	if (IS_ERR(new_dentry)) {
		error = PTR_ERR(new_dentry);
		goto out_unlock;
L
Linus Torvalds 已提交
831
	}
832 833 834 835 836 837

	/* By allowing two different directories with the same
	 * d_parent we allow this routine to move between different
	 * shadows of the same directory
	 */
	error = -EINVAL;
838
	if (sd->s_parent->s_dentry->d_inode != new_parent->d_inode ||
839
	    new_dentry->d_parent->d_inode != new_parent->d_inode ||
840
	    new_dentry == sd->s_dentry)
841 842 843 844 845 846
		goto out_dput;

	error = -EEXIST;
	if (new_dentry->d_inode)
		goto out_dput;

T
Tejun Heo 已提交
847 848 849 850 851 852
	/* rename kobject and sysfs_dirent */
	error = -ENOMEM;
	new_name = dup_name = kstrdup(new_name, GFP_KERNEL);
	if (!new_name)
		goto out_drop;

853 854
	error = kobject_set_name(kobj, "%s", new_name);
	if (error)
T
Tejun Heo 已提交
855
		goto out_free;
856

T
Tejun Heo 已提交
857 858 859 860
	kfree(sd->s_name);
	sd->s_name = new_name;

	/* move under the new parent */
861
	d_add(new_dentry, NULL);
862
	d_move(sd->s_dentry, new_dentry);
863

864 865
	mutex_lock(&sysfs_mutex);

866
	sysfs_unlink_sibling(sd);
867
	sysfs_get(new_parent_sd);
868
	sysfs_put(sd->s_parent);
869
	sd->s_parent = new_parent_sd;
870
	sysfs_link_sibling(sd);
871

872 873
	mutex_unlock(&sysfs_mutex);

874 875 876
	error = 0;
	goto out_unlock;

T
Tejun Heo 已提交
877 878
 out_free:
	kfree(dup_name);
879 880 881 882 883
 out_drop:
	d_drop(new_dentry);
 out_dput:
	dput(new_dentry);
 out_unlock:
884
	mutex_unlock(&new_parent->d_inode->i_mutex);
L
Linus Torvalds 已提交
885 886 887
	return error;
}

888 889 890 891 892 893 894
int sysfs_move_dir(struct kobject *kobj, struct kobject *new_parent)
{
	struct dentry *old_parent_dentry, *new_parent_dentry, *new_dentry;
	struct sysfs_dirent *new_parent_sd, *sd;
	int error;

	old_parent_dentry = kobj->parent ?
895
		kobj->parent->sd->s_dentry : sysfs_mount->mnt_sb->s_root;
896
	new_parent_dentry = new_parent ?
897
		new_parent->sd->s_dentry : sysfs_mount->mnt_sb->s_root;
898

M
Mark Lord 已提交
899 900
	if (old_parent_dentry->d_inode == new_parent_dentry->d_inode)
		return 0;	/* nothing to move */
901 902 903 904 905 906 907 908
again:
	mutex_lock(&old_parent_dentry->d_inode->i_mutex);
	if (!mutex_trylock(&new_parent_dentry->d_inode->i_mutex)) {
		mutex_unlock(&old_parent_dentry->d_inode->i_mutex);
		goto again;
	}

	new_parent_sd = new_parent_dentry->d_fsdata;
909
	sd = kobj->sd;
910 911 912 913 914 915 916 917 918

	new_dentry = lookup_one_len(kobj->name, new_parent_dentry,
				    strlen(kobj->name));
	if (IS_ERR(new_dentry)) {
		error = PTR_ERR(new_dentry);
		goto out;
	} else
		error = 0;
	d_add(new_dentry, NULL);
919
	d_move(sd->s_dentry, new_dentry);
920 921 922
	dput(new_dentry);

	/* Remove from old parent's list and insert into new parent's list. */
923 924
	mutex_lock(&sysfs_mutex);

925
	sysfs_unlink_sibling(sd);
926 927 928
	sysfs_get(new_parent_sd);
	sysfs_put(sd->s_parent);
	sd->s_parent = new_parent_sd;
929
	sysfs_link_sibling(sd);
930

931
	mutex_unlock(&sysfs_mutex);
932 933 934 935 936 937 938
out:
	mutex_unlock(&new_parent_dentry->d_inode->i_mutex);
	mutex_unlock(&old_parent_dentry->d_inode->i_mutex);

	return error;
}

L
Linus Torvalds 已提交
939 940
static int sysfs_dir_open(struct inode *inode, struct file *file)
{
941
	struct dentry * dentry = file->f_path.dentry;
L
Linus Torvalds 已提交
942
	struct sysfs_dirent * parent_sd = dentry->d_fsdata;
943
	struct sysfs_dirent * sd;
L
Linus Torvalds 已提交
944

945
	sd = sysfs_new_dirent("_DIR_", 0, 0);
946 947
	if (sd) {
		mutex_lock(&sysfs_mutex);
948 949
		sd->s_parent = sysfs_get(parent_sd);
		sysfs_link_sibling(sd);
950 951
		mutex_unlock(&sysfs_mutex);
	}
L
Linus Torvalds 已提交
952

953 954
	file->private_data = sd;
	return sd ? 0 : -ENOMEM;
L
Linus Torvalds 已提交
955 956 957 958 959 960
}

static int sysfs_dir_close(struct inode *inode, struct file *file)
{
	struct sysfs_dirent * cursor = file->private_data;

961
	mutex_lock(&sysfs_mutex);
962
	sysfs_unlink_sibling(cursor);
963
	mutex_unlock(&sysfs_mutex);
L
Linus Torvalds 已提交
964 965 966 967 968 969 970 971 972 973 974 975 976 977

	release_sysfs_dirent(cursor);

	return 0;
}

/* Relationship between s_mode and the DT_xxx types */
static inline unsigned char dt_type(struct sysfs_dirent *sd)
{
	return (sd->s_mode >> 12) & 15;
}

static int sysfs_readdir(struct file * filp, void * dirent, filldir_t filldir)
{
978
	struct dentry *dentry = filp->f_path.dentry;
L
Linus Torvalds 已提交
979 980
	struct sysfs_dirent * parent_sd = dentry->d_fsdata;
	struct sysfs_dirent *cursor = filp->private_data;
981
	struct sysfs_dirent **pos;
L
Linus Torvalds 已提交
982 983 984 985 986
	ino_t ino;
	int i = filp->f_pos;

	switch (i) {
		case 0:
987
			ino = parent_sd->s_ino;
L
Linus Torvalds 已提交
988 989 990 991 992 993
			if (filldir(dirent, ".", 1, i, ino, DT_DIR) < 0)
				break;
			filp->f_pos++;
			i++;
			/* fallthrough */
		case 1:
T
Tejun Heo 已提交
994 995 996 997
			if (parent_sd->s_parent)
				ino = parent_sd->s_parent->s_ino;
			else
				ino = parent_sd->s_ino;
L
Linus Torvalds 已提交
998 999 1000 1001 1002 1003
			if (filldir(dirent, "..", 2, i, ino, DT_DIR) < 0)
				break;
			filp->f_pos++;
			i++;
			/* fallthrough */
		default:
1004 1005
			mutex_lock(&sysfs_mutex);

1006 1007 1008 1009 1010 1011 1012
			pos = &parent_sd->s_children;
			while (*pos != cursor)
				pos = &(*pos)->s_sibling;

			/* unlink cursor */
			*pos = cursor->s_sibling;

A
Akinobu Mita 已提交
1013
			if (filp->f_pos == 2)
1014
				pos = &parent_sd->s_children;
A
Akinobu Mita 已提交
1015

1016 1017
			for ( ; *pos; pos = &(*pos)->s_sibling) {
				struct sysfs_dirent *next = *pos;
L
Linus Torvalds 已提交
1018 1019 1020
				const char * name;
				int len;

1021
				if (!sysfs_type(next))
L
Linus Torvalds 已提交
1022 1023
					continue;

T
Tejun Heo 已提交
1024
				name = next->s_name;
L
Linus Torvalds 已提交
1025
				len = strlen(name);
1026
				ino = next->s_ino;
L
Linus Torvalds 已提交
1027 1028 1029

				if (filldir(dirent, name, len, filp->f_pos, ino,
						 dt_type(next)) < 0)
1030
					break;
L
Linus Torvalds 已提交
1031 1032 1033

				filp->f_pos++;
			}
1034 1035 1036 1037

			/* put cursor back in */
			cursor->s_sibling = *pos;
			*pos = cursor;
1038 1039

			mutex_unlock(&sysfs_mutex);
L
Linus Torvalds 已提交
1040 1041 1042 1043 1044 1045
	}
	return 0;
}

static loff_t sysfs_dir_lseek(struct file * file, loff_t offset, int origin)
{
1046
	struct dentry * dentry = file->f_path.dentry;
L
Linus Torvalds 已提交
1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057

	switch (origin) {
		case 1:
			offset += file->f_pos;
		case 0:
			if (offset >= 0)
				break;
		default:
			return -EINVAL;
	}
	if (offset != file->f_pos) {
1058 1059
		mutex_lock(&sysfs_mutex);

L
Linus Torvalds 已提交
1060 1061 1062 1063
		file->f_pos = offset;
		if (file->f_pos >= 2) {
			struct sysfs_dirent *sd = dentry->d_fsdata;
			struct sysfs_dirent *cursor = file->private_data;
1064
			struct sysfs_dirent **pos;
L
Linus Torvalds 已提交
1065 1066
			loff_t n = file->f_pos - 2;

1067 1068 1069 1070 1071
			sysfs_unlink_sibling(cursor);

			pos = &sd->s_children;
			while (n && *pos) {
				struct sysfs_dirent *next = *pos;
1072
				if (sysfs_type(next))
L
Linus Torvalds 已提交
1073
					n--;
1074
				pos = &(*pos)->s_sibling;
L
Linus Torvalds 已提交
1075
			}
1076 1077 1078

			cursor->s_sibling = *pos;
			*pos = cursor;
L
Linus Torvalds 已提交
1079
		}
1080 1081

		mutex_unlock(&sysfs_mutex);
L
Linus Torvalds 已提交
1082
	}
1083

L
Linus Torvalds 已提交
1084 1085 1086
	return offset;
}

1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098

/**
 *	sysfs_make_shadowed_dir - Setup so a directory can be shadowed
 *	@kobj:	object we're creating shadow of.
 */

int sysfs_make_shadowed_dir(struct kobject *kobj,
	void * (*follow_link)(struct dentry *, struct nameidata *))
{
	struct inode *inode;
	struct inode_operations *i_op;

1099
	inode = kobj->sd->s_dentry->d_inode;
1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125
	if (inode->i_op != &sysfs_dir_inode_operations)
		return -EINVAL;

	i_op = kmalloc(sizeof(*i_op), GFP_KERNEL);
	if (!i_op)
		return -ENOMEM;

	memcpy(i_op, &sysfs_dir_inode_operations, sizeof(*i_op));
	i_op->follow_link = follow_link;

	/* Locking of inode->i_op?
	 * Since setting i_op is a single word write and they
	 * are atomic we should be ok here.
	 */
	inode->i_op = i_op;
	return 0;
}

/**
 *	sysfs_create_shadow_dir - create a shadow directory for an object.
 *	@kobj:	object we're creating directory for.
 *
 *	sysfs_make_shadowed_dir must already have been called on this
 *	directory.
 */

1126
struct sysfs_dirent *sysfs_create_shadow_dir(struct kobject *kobj)
1127
{
1128
	struct dentry *dir = kobj->sd->s_dentry;
T
Tejun Heo 已提交
1129 1130 1131 1132
	struct inode *inode = dir->d_inode;
	struct dentry *parent = dir->d_parent;
	struct sysfs_dirent *parent_sd = parent->d_fsdata;
	struct dentry *shadow;
1133
	struct sysfs_dirent *sd;
1134
	struct sysfs_addrm_cxt acxt;
1135

1136
	sd = ERR_PTR(-EINVAL);
1137 1138 1139 1140 1141 1142 1143
	if (!sysfs_is_shadowed_inode(inode))
		goto out;

	shadow = d_alloc(parent, &dir->d_name);
	if (!shadow)
		goto nomem;

1144
	sd = sysfs_new_dirent("_SHADOW_", inode->i_mode, SYSFS_DIR);
1145 1146
	if (!sd)
		goto nomem;
1147
	sd->s_elem.dir.kobj = kobj;
1148

1149 1150 1151 1152 1153 1154 1155
	sysfs_addrm_start(&acxt, parent_sd);

	/* add but don't link into children list */
	sysfs_add_one(&acxt, sd);

	/* attach and instantiate dentry */
	sysfs_attach_dentry(sd, shadow);
1156
	d_instantiate(shadow, igrab(inode));
1157 1158 1159
	inc_nlink(inode);	/* tj: synchronization? */

	sysfs_addrm_finish(&acxt);
1160 1161 1162 1163

	dget(shadow);		/* Extra count - pin the dentry in core */

out:
1164
	return sd;
1165 1166
nomem:
	dput(shadow);
1167
	sd = ERR_PTR(-ENOMEM);
1168 1169 1170 1171 1172
	goto out;
}

/**
 *	sysfs_remove_shadow_dir - remove an object's directory.
1173
 *	@shadow_sd: sysfs_dirent of shadow directory
1174 1175 1176 1177 1178 1179
 *
 *	The only thing special about this is that we remove any files in
 *	the directory before we remove the directory, and we've inlined
 *	what used to be sysfs_rmdir() below, instead of calling separately.
 */

1180
void sysfs_remove_shadow_dir(struct sysfs_dirent *shadow_sd)
1181
{
1182
	__sysfs_remove_dir(shadow_sd);
1183 1184
}

1185
const struct file_operations sysfs_dir_operations = {
L
Linus Torvalds 已提交
1186 1187 1188 1189 1190 1191
	.open		= sysfs_dir_open,
	.release	= sysfs_dir_close,
	.llseek		= sysfs_dir_lseek,
	.read		= generic_read_dir,
	.readdir	= sysfs_readdir,
};