dir.c 25.7 KB
Newer Older
L
Linus Torvalds 已提交
1 2 3 4 5 6 7 8 9 10
/*
 * dir.c - Operations for sysfs directories.
 */

#undef DEBUG

#include <linux/fs.h>
#include <linux/mount.h>
#include <linux/module.h>
#include <linux/kobject.h>
11
#include <linux/namei.h>
12
#include <linux/idr.h>
13
#include <linux/completion.h>
14
#include <asm/semaphore.h>
L
Linus Torvalds 已提交
15 16
#include "sysfs.h"

17
DEFINE_MUTEX(sysfs_mutex);
T
Tejun Heo 已提交
18
spinlock_t sysfs_assoc_lock = SPIN_LOCK_UNLOCKED;
L
Linus Torvalds 已提交
19

20 21 22
static spinlock_t sysfs_ino_lock = SPIN_LOCK_UNLOCKED;
static DEFINE_IDA(sysfs_ino_ida);

23 24 25 26 27 28 29 30
/**
 *	sysfs_link_sibling - link sysfs_dirent into sibling list
 *	@sd: sysfs_dirent of interest
 *
 *	Link @sd into its sibling list which starts from
 *	sd->s_parent->s_children.
 *
 *	Locking:
31
 *	mutex_lock(sysfs_mutex)
32
 */
33
void sysfs_link_sibling(struct sysfs_dirent *sd)
34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49
{
	struct sysfs_dirent *parent_sd = sd->s_parent;

	BUG_ON(sd->s_sibling);
	sd->s_sibling = parent_sd->s_children;
	parent_sd->s_children = sd;
}

/**
 *	sysfs_unlink_sibling - unlink sysfs_dirent from sibling list
 *	@sd: sysfs_dirent of interest
 *
 *	Unlink @sd from its sibling list which starts from
 *	sd->s_parent->s_children.
 *
 *	Locking:
50
 *	mutex_lock(sysfs_mutex)
51
 */
52
void sysfs_unlink_sibling(struct sysfs_dirent *sd)
53 54 55 56 57 58 59 60 61 62 63 64
{
	struct sysfs_dirent **pos;

	for (pos = &sd->s_parent->s_children; *pos; pos = &(*pos)->s_sibling) {
		if (*pos == sd) {
			*pos = sd->s_sibling;
			sd->s_sibling = NULL;
			break;
		}
	}
}

65 66 67 68 69 70 71 72 73 74 75 76
/**
 *	sysfs_get_active - get an active reference to sysfs_dirent
 *	@sd: sysfs_dirent to get an active reference to
 *
 *	Get an active reference of @sd.  This function is noop if @sd
 *	is NULL.
 *
 *	RETURNS:
 *	Pointer to @sd on success, NULL on failure.
 */
struct sysfs_dirent *sysfs_get_active(struct sysfs_dirent *sd)
{
77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93
	if (unlikely(!sd))
		return NULL;

	while (1) {
		int v, t;

		v = atomic_read(&sd->s_active);
		if (unlikely(v < 0))
			return NULL;

		t = atomic_cmpxchg(&sd->s_active, v, v + 1);
		if (likely(t == v))
			return sd;
		if (t < 0)
			return NULL;

		cpu_relax();
94 95 96 97 98 99 100 101 102 103 104 105
	}
}

/**
 *	sysfs_put_active - put an active reference to sysfs_dirent
 *	@sd: sysfs_dirent to put an active reference to
 *
 *	Put an active reference to @sd.  This function is noop if @sd
 *	is NULL.
 */
void sysfs_put_active(struct sysfs_dirent *sd)
{
106 107 108 109 110 111 112 113 114 115 116
	struct completion *cmpl;
	int v;

	if (unlikely(!sd))
		return;

	v = atomic_dec_return(&sd->s_active);
	if (likely(v != SD_DEACTIVATED_BIAS))
		return;

	/* atomic_dec_return() is a mb(), we'll always see the updated
117
	 * sd->s_sibling.
118
	 */
119
	cmpl = (void *)sd->s_sibling;
120
	complete(cmpl);
121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165
}

/**
 *	sysfs_get_active_two - get active references to sysfs_dirent and parent
 *	@sd: sysfs_dirent of interest
 *
 *	Get active reference to @sd and its parent.  Parent's active
 *	reference is grabbed first.  This function is noop if @sd is
 *	NULL.
 *
 *	RETURNS:
 *	Pointer to @sd on success, NULL on failure.
 */
struct sysfs_dirent *sysfs_get_active_two(struct sysfs_dirent *sd)
{
	if (sd) {
		if (sd->s_parent && unlikely(!sysfs_get_active(sd->s_parent)))
			return NULL;
		if (unlikely(!sysfs_get_active(sd))) {
			sysfs_put_active(sd->s_parent);
			return NULL;
		}
	}
	return sd;
}

/**
 *	sysfs_put_active_two - put active references to sysfs_dirent and parent
 *	@sd: sysfs_dirent of interest
 *
 *	Put active references to @sd and its parent.  This function is
 *	noop if @sd is NULL.
 */
void sysfs_put_active_two(struct sysfs_dirent *sd)
{
	if (sd) {
		sysfs_put_active(sd);
		sysfs_put_active(sd->s_parent);
	}
}

/**
 *	sysfs_deactivate - deactivate sysfs_dirent
 *	@sd: sysfs_dirent to deactivate
 *
166
 *	Deny new active references and drain existing ones.
167
 */
168
static void sysfs_deactivate(struct sysfs_dirent *sd)
169
{
170 171
	DECLARE_COMPLETION_ONSTACK(wait);
	int v;
172

173
	BUG_ON(sd->s_sibling || !(sd->s_flags & SYSFS_FLAG_REMOVED));
174
	sd->s_sibling = (void *)&wait;
175 176

	/* atomic_add_return() is a mb(), put_active() will always see
177
	 * the updated sd->s_sibling.
178
	 */
179 180 181 182 183
	v = atomic_add_return(SD_DEACTIVATED_BIAS, &sd->s_active);

	if (v != SD_DEACTIVATED_BIAS)
		wait_for_completion(&wait);

184
	sd->s_sibling = NULL;
185 186
}

T
Tejun Heo 已提交
187
static int sysfs_alloc_ino(ino_t *pino)
188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212
{
	int ino, rc;

 retry:
	spin_lock(&sysfs_ino_lock);
	rc = ida_get_new_above(&sysfs_ino_ida, 2, &ino);
	spin_unlock(&sysfs_ino_lock);

	if (rc == -EAGAIN) {
		if (ida_pre_get(&sysfs_ino_ida, GFP_KERNEL))
			goto retry;
		rc = -ENOMEM;
	}

	*pino = ino;
	return rc;
}

static void sysfs_free_ino(ino_t ino)
{
	spin_lock(&sysfs_ino_lock);
	ida_remove(&sysfs_ino_ida, ino);
	spin_unlock(&sysfs_ino_lock);
}

213 214
void release_sysfs_dirent(struct sysfs_dirent * sd)
{
T
Tejun Heo 已提交
215 216 217
	struct sysfs_dirent *parent_sd;

 repeat:
218 219 220
	/* Moving/renaming is always done while holding reference.
	 * sd->s_parent won't change beneath us.
	 */
T
Tejun Heo 已提交
221 222
	parent_sd = sd->s_parent;

223
	if (sysfs_type(sd) == SYSFS_KOBJ_LINK)
224
		sysfs_put(sd->s_elem.symlink.target_sd);
225
	if (sysfs_type(sd) & SYSFS_COPY_NAME)
T
Tejun Heo 已提交
226
		kfree(sd->s_name);
227
	kfree(sd->s_iattr);
228
	sysfs_free_ino(sd->s_ino);
229
	kmem_cache_free(sysfs_dir_cachep, sd);
T
Tejun Heo 已提交
230 231 232 233

	sd = parent_sd;
	if (sd && atomic_dec_and_test(&sd->s_count))
		goto repeat;
234 235
}

L
Linus Torvalds 已提交
236 237 238 239 240
static void sysfs_d_iput(struct dentry * dentry, struct inode * inode)
{
	struct sysfs_dirent * sd = dentry->d_fsdata;

	if (sd) {
T
Tejun Heo 已提交
241 242
		/* sd->s_dentry is protected with sysfs_assoc_lock.
		 * This allows sysfs_drop_dentry() to dereference it.
243
		 */
T
Tejun Heo 已提交
244
		spin_lock(&sysfs_assoc_lock);
245 246 247 248 249 250 251 252

		/* The dentry might have been deleted or another
		 * lookup could have happened updating sd->s_dentry to
		 * point the new dentry.  Ignore if it isn't pointing
		 * to this dentry.
		 */
		if (sd->s_dentry == dentry)
			sd->s_dentry = NULL;
T
Tejun Heo 已提交
253
		spin_unlock(&sysfs_assoc_lock);
L
Linus Torvalds 已提交
254 255 256 257 258 259 260 261 262
		sysfs_put(sd);
	}
	iput(inode);
}

static struct dentry_operations sysfs_dentry_ops = {
	.d_iput		= sysfs_d_iput,
};

263
struct sysfs_dirent *sysfs_new_dirent(const char *name, umode_t mode, int type)
L
Linus Torvalds 已提交
264
{
T
Tejun Heo 已提交
265 266 267 268 269 270 271 272
	char *dup_name = NULL;
	struct sysfs_dirent *sd = NULL;

	if (type & SYSFS_COPY_NAME) {
		name = dup_name = kstrdup(name, GFP_KERNEL);
		if (!name)
			goto err_out;
	}
L
Linus Torvalds 已提交
273

274
	sd = kmem_cache_zalloc(sysfs_dir_cachep, GFP_KERNEL);
L
Linus Torvalds 已提交
275
	if (!sd)
T
Tejun Heo 已提交
276
		goto err_out;
L
Linus Torvalds 已提交
277

T
Tejun Heo 已提交
278 279
	if (sysfs_alloc_ino(&sd->s_ino))
		goto err_out;
280

L
Linus Torvalds 已提交
281
	atomic_set(&sd->s_count, 1);
282
	atomic_set(&sd->s_active, 0);
283
	atomic_set(&sd->s_event, 1);
284

T
Tejun Heo 已提交
285
	sd->s_name = name;
286
	sd->s_mode = mode;
287
	sd->s_flags = type;
L
Linus Torvalds 已提交
288 289

	return sd;
T
Tejun Heo 已提交
290 291 292 293 294

 err_out:
	kfree(dup_name);
	kmem_cache_free(sysfs_dir_cachep, sd);
	return NULL;
L
Linus Torvalds 已提交
295 296
}

297 298 299 300 301 302 303 304 305 306 307
/**
 *	sysfs_attach_dentry - associate sysfs_dirent with dentry
 *	@sd: target sysfs_dirent
 *	@dentry: dentry to associate
 *
 *	Associate @sd with @dentry.  This is protected by
 *	sysfs_assoc_lock to avoid race with sysfs_d_iput().
 *
 *	LOCKING:
 *	mutex_lock(sysfs_mutex)
 */
308 309 310 311 312 313
static void sysfs_attach_dentry(struct sysfs_dirent *sd, struct dentry *dentry)
{
	dentry->d_op = &sysfs_dentry_ops;
	dentry->d_fsdata = sysfs_get(sd);

	/* protect sd->s_dentry against sysfs_d_iput */
T
Tejun Heo 已提交
314
	spin_lock(&sysfs_assoc_lock);
315
	sd->s_dentry = dentry;
T
Tejun Heo 已提交
316
	spin_unlock(&sysfs_assoc_lock);
317 318 319 320

	d_rehash(dentry);
}

321 322 323 324 325 326
static int sysfs_ilookup_test(struct inode *inode, void *arg)
{
	struct sysfs_dirent *sd = arg;
	return inode->i_ino == sd->s_ino;
}

327
/**
328 329 330
 *	sysfs_addrm_start - prepare for sysfs_dirent add/remove
 *	@acxt: pointer to sysfs_addrm_cxt to be used
 *	@parent_sd: parent sysfs_dirent
331
 *
332 333 334 335 336
 *	This function is called when the caller is about to add or
 *	remove sysfs_dirent under @parent_sd.  This function acquires
 *	sysfs_mutex, grabs inode for @parent_sd if available and lock
 *	i_mutex of it.  @acxt is used to keep and pass context to
 *	other addrm functions.
337 338
 *
 *	LOCKING:
339 340 341
 *	Kernel thread context (may sleep).  sysfs_mutex is locked on
 *	return.  i_mutex of parent inode is locked on return if
 *	available.
342
 */
343 344
void sysfs_addrm_start(struct sysfs_addrm_cxt *acxt,
		       struct sysfs_dirent *parent_sd)
345
{
346
	struct inode *inode;
347


	memset(acxt, 0, sizeof(*acxt));
	acxt->parent_sd = parent_sd;

	/* Lookup parent inode.  inode initialization and I_NEW
	 * clearing are protected by sysfs_mutex.  By grabbing it and
	 * looking up with _nowait variant, inode state can be
	 * determined reliably.
	 */
	mutex_lock(&sysfs_mutex);

	inode = ilookup5_nowait(sysfs_sb, parent_sd->s_ino, sysfs_ilookup_test,
				parent_sd);

	if (inode && !(inode->i_state & I_NEW)) {
		/* parent inode available */
		acxt->parent_inode = inode;

		/* sysfs_mutex is below i_mutex in lock hierarchy.
		 * First, trylock i_mutex.  If fails, unlock
		 * sysfs_mutex and lock them in order.
		 */
		if (!mutex_trylock(&inode->i_mutex)) {
			mutex_unlock(&sysfs_mutex);
			mutex_lock(&inode->i_mutex);
			mutex_lock(&sysfs_mutex);
		}
	} else
		iput(inode);
}

/**
 *	sysfs_add_one - add sysfs_dirent to parent
 *	@acxt: addrm context to use
 *	@sd: sysfs_dirent to be added
 *
 *	Get @acxt->parent_sd and set sd->s_parent to it and increment
 *	nlink of parent inode if @sd is a directory.  @sd is NOT
 *	linked into the children list of the parent.  The caller
 *	should invoke sysfs_link_sibling() after this function
 *	completes if @sd needs to be on the children list.
 *
 *	This function should be called between calls to
 *	sysfs_addrm_start() and sysfs_addrm_finish() and should be
 *	passed the same @acxt as passed to sysfs_addrm_start().
 *
 *	LOCKING:
 *	Determined by sysfs_addrm_start().
 */
void sysfs_add_one(struct sysfs_addrm_cxt *acxt, struct sysfs_dirent *sd)
{
	sd->s_parent = sysfs_get(acxt->parent_sd);

	if (sysfs_type(sd) == SYSFS_DIR && acxt->parent_inode)
		inc_nlink(acxt->parent_inode);

	acxt->cnt++;
}

/**
 *	sysfs_remove_one - remove sysfs_dirent from parent
 *	@acxt: addrm context to use
 *	@sd: sysfs_dirent to be added
 *
 *	Mark @sd removed and drop nlink of parent inode if @sd is a
 *	directory.  @sd is NOT unlinked from the children list of the
 *	parent.  The caller is repsonsible for removing @sd from the
 *	children list before calling this function.
 *
 *	This function should be called between calls to
 *	sysfs_addrm_start() and sysfs_addrm_finish() and should be
 *	passed the same @acxt as passed to sysfs_addrm_start().
 *
 *	LOCKING:
 *	Determined by sysfs_addrm_start().
 */
void sysfs_remove_one(struct sysfs_addrm_cxt *acxt, struct sysfs_dirent *sd)
{
	BUG_ON(sd->s_sibling || (sd->s_flags & SYSFS_FLAG_REMOVED));

	sd->s_flags |= SYSFS_FLAG_REMOVED;
	sd->s_sibling = acxt->removed;
	acxt->removed = sd;

	if (sysfs_type(sd) == SYSFS_DIR && acxt->parent_inode)
		drop_nlink(acxt->parent_inode);

	acxt->cnt++;
}

/**
 *	sysfs_addrm_finish - finish up sysfs_dirent add/remove
 *	@acxt: addrm context to finish up
 *
 *	Finish up sysfs_dirent add/remove.  Resources acquired by
 *	sysfs_addrm_start() are released and removed sysfs_dirents are
 *	cleaned up.  Timestamps on the parent inode are updated.
 *
 *	LOCKING:
 *	All mutexes acquired by sysfs_addrm_start() are released.
 *
 *	RETURNS:
 *	Number of added/removed sysfs_dirents since sysfs_addrm_start().
 */
int sysfs_addrm_finish(struct sysfs_addrm_cxt *acxt)
{
	/* release resources acquired by sysfs_addrm_start() */
	mutex_unlock(&sysfs_mutex);
	if (acxt->parent_inode) {
		struct inode *inode = acxt->parent_inode;

		/* if added/removed, update timestamps on the parent */
		if (acxt->cnt)
			inode->i_ctime = inode->i_mtime = CURRENT_TIME;

		mutex_unlock(&inode->i_mutex);
		iput(inode);
	}

	/* kill removed sysfs_dirents */
	while (acxt->removed) {
		struct sysfs_dirent *sd = acxt->removed;

		acxt->removed = sd->s_sibling;
		sd->s_sibling = NULL;

		sysfs_drop_dentry(sd);
		sysfs_deactivate(sd);
		sysfs_put(sd);
T
Tejun Heo 已提交
476
	}
477 478

	return acxt->cnt;
479 480
}

481 482 483 484 485 486
/**
 *	sysfs_find_dirent - find sysfs_dirent with the given name
 *	@parent_sd: sysfs_dirent to search under
 *	@name: name to look for
 *
 *	Look for sysfs_dirent with name @name under @parent_sd.
487
 *
488
 *	LOCKING:
489
 *	mutex_lock(sysfs_mutex)
490
 *
491 492
 *	RETURNS:
 *	Pointer to sysfs_dirent if found, NULL if not.
493
 */
494 495
struct sysfs_dirent *sysfs_find_dirent(struct sysfs_dirent *parent_sd,
				       const unsigned char *name)
496
{
497 498 499 500 501 502 503
	struct sysfs_dirent *sd;

	for (sd = parent_sd->s_children; sd; sd = sd->s_sibling)
		if (sysfs_type(sd) && !strcmp(sd->s_name, name))
			return sd;
	return NULL;
}
504

505 506 507 508 509 510 511 512 513
/**
 *	sysfs_get_dirent - find and get sysfs_dirent with the given name
 *	@parent_sd: sysfs_dirent to search under
 *	@name: name to look for
 *
 *	Look for sysfs_dirent with name @name under @parent_sd and get
 *	it if found.
 *
 *	LOCKING:
514
 *	Kernel thread context (may sleep).  Grabs sysfs_mutex.
515 516 517 518 519 520 521 522 523
 *
 *	RETURNS:
 *	Pointer to sysfs_dirent if found, NULL if not.
 */
struct sysfs_dirent *sysfs_get_dirent(struct sysfs_dirent *parent_sd,
				      const unsigned char *name)
{
	struct sysfs_dirent *sd;

524
	mutex_lock(&sysfs_mutex);
525 526
	sd = sysfs_find_dirent(parent_sd, name);
	sysfs_get(sd);
527
	mutex_unlock(&sysfs_mutex);
528 529

	return sd;
530 531
}

532 533
static int create_dir(struct kobject *kobj, struct sysfs_dirent *parent_sd,
		      const char *name, struct sysfs_dirent **p_sd)
L
Linus Torvalds 已提交
534
{
535
	struct dentry *parent = parent_sd->s_dentry;
536
	struct sysfs_addrm_cxt acxt;
L
Linus Torvalds 已提交
537 538
	int error;
	umode_t mode = S_IFDIR| S_IRWXU | S_IRUGO | S_IXUGO;
539
	struct dentry *dentry;
540
	struct inode *inode;
541
	struct sysfs_dirent *sd;
L
Linus Torvalds 已提交
542

543
	sysfs_addrm_start(&acxt, parent_sd);
544

545
	/* allocate */
546 547 548
	dentry = lookup_one_len(name, parent, strlen(name));
	if (IS_ERR(dentry)) {
		error = PTR_ERR(dentry);
549
		goto out_finish;
550 551 552
	}

	error = -EEXIST;
553
	if (dentry->d_inode)
554 555
		goto out_dput;

556
	error = -ENOMEM;
557
	sd = sysfs_new_dirent(name, mode, SYSFS_DIR);
558
	if (!sd)
559
		goto out_drop;
560
	sd->s_elem.dir.kobj = kobj;
561

562
	inode = sysfs_get_inode(sd);
563
	if (!inode)
564 565
		goto out_sput;

566 567 568 569 570 571
	if (inode->i_state & I_NEW) {
		inode->i_op = &sysfs_dir_inode_operations;
		inode->i_fop = &sysfs_dir_operations;
		/* directory inodes start off with i_nlink == 2 (for ".") */
		inc_nlink(inode);
	}
572 573 574

	/* link in */
	error = -EEXIST;
575
	if (sysfs_find_dirent(parent_sd, name))
576 577
		goto out_iput;

578 579
	sysfs_add_one(&acxt, sd);
	sysfs_link_sibling(sd);
580
	sysfs_instantiate(dentry, inode);
581
	sysfs_attach_dentry(sd, dentry);
582

583
	*p_sd = sd;
584
	error = 0;
585
	goto out_finish;	/* pin directory dentry in core */
586

587 588
 out_iput:
	iput(inode);
589 590 591 592 593 594
 out_sput:
	sysfs_put(sd);
 out_drop:
	d_drop(dentry);
 out_dput:
	dput(dentry);
595 596
 out_finish:
	sysfs_addrm_finish(&acxt);
L
Linus Torvalds 已提交
597 598 599
	return error;
}

600 601
int sysfs_create_subdir(struct kobject *kobj, const char *name,
			struct sysfs_dirent **p_sd)
L
Linus Torvalds 已提交
602
{
603
	return create_dir(kobj, kobj->sd, name, p_sd);
L
Linus Torvalds 已提交
604 605 606 607 608
}

/**
 *	sysfs_create_dir - create a directory for an object.
 *	@kobj:		object we're creating directory for. 
609
 *	@shadow_parent:	parent object.
L
Linus Torvalds 已提交
610
 */
611 612
int sysfs_create_dir(struct kobject *kobj,
		     struct sysfs_dirent *shadow_parent_sd)
L
Linus Torvalds 已提交
613
{
614
	struct sysfs_dirent *parent_sd, *sd;
L
Linus Torvalds 已提交
615 616 617 618
	int error = 0;

	BUG_ON(!kobj);

619 620
	if (shadow_parent_sd)
		parent_sd = shadow_parent_sd;
621
	else if (kobj->parent)
622
		parent_sd = kobj->parent->sd;
L
Linus Torvalds 已提交
623
	else if (sysfs_mount && sysfs_mount->mnt_sb)
624
		parent_sd = sysfs_mount->mnt_sb->s_root->d_fsdata;
L
Linus Torvalds 已提交
625 626 627
	else
		return -EFAULT;

628
	error = create_dir(kobj, parent_sd, kobject_name(kobj), &sd);
L
Linus Torvalds 已提交
629
	if (!error)
630
		kobj->sd = sd;
L
Linus Torvalds 已提交
631 632 633 634 635 636 637 638
	return error;
}

static struct dentry * sysfs_lookup(struct inode *dir, struct dentry *dentry,
				struct nameidata *nd)
{
	struct sysfs_dirent * parent_sd = dentry->d_parent->d_fsdata;
	struct sysfs_dirent * sd;
639
	struct bin_attribute *bin_attr;
640 641
	struct inode *inode;
	int found = 0;
L
Linus Torvalds 已提交
642

643
	for (sd = parent_sd->s_children; sd; sd = sd->s_sibling) {
644
		if ((sysfs_type(sd) & SYSFS_NOT_PINNED) &&
645 646
		    !strcmp(sd->s_name, dentry->d_name.name)) {
			found = 1;
L
Linus Torvalds 已提交
647 648 649 650
			break;
		}
	}

651 652 653 654 655
	/* no such entry */
	if (!found)
		return NULL;

	/* attach dentry and inode */
656
	inode = sysfs_get_inode(sd);
657 658 659
	if (!inode)
		return ERR_PTR(-ENOMEM);

660 661
	mutex_lock(&sysfs_mutex);

662 663
	if (inode->i_state & I_NEW) {
		/* initialize inode according to type */
664 665
		switch (sysfs_type(sd)) {
		case SYSFS_KOBJ_ATTR:
666 667
			inode->i_size = PAGE_SIZE;
			inode->i_fop = &sysfs_file_operations;
668 669 670
			break;
		case SYSFS_KOBJ_BIN_ATTR:
			bin_attr = sd->s_elem.bin_attr.bin_attr;
671 672
			inode->i_size = bin_attr->size;
			inode->i_fop = &bin_fops;
673 674
			break;
		case SYSFS_KOBJ_LINK:
675
			inode->i_op = &sysfs_symlink_inode_operations;
676 677 678 679
			break;
		default:
			BUG();
		}
680
	}
681 682 683 684

	sysfs_instantiate(dentry, inode);
	sysfs_attach_dentry(sd, dentry);

685 686
	mutex_unlock(&sysfs_mutex);

687
	return NULL;
L
Linus Torvalds 已提交
688 689
}

690
const struct inode_operations sysfs_dir_inode_operations = {
L
Linus Torvalds 已提交
691
	.lookup		= sysfs_lookup,
692
	.setattr	= sysfs_setattr,
L
Linus Torvalds 已提交
693 694
};

695
static void remove_dir(struct sysfs_dirent *sd)
L
Linus Torvalds 已提交
696
{
697
	struct sysfs_addrm_cxt acxt;
L
Linus Torvalds 已提交
698

699 700 701 702
	sysfs_addrm_start(&acxt, sd->s_parent);
	sysfs_unlink_sibling(sd);
	sysfs_remove_one(&acxt, sd);
	sysfs_addrm_finish(&acxt);
L
Linus Torvalds 已提交
703 704
}

705
void sysfs_remove_subdir(struct sysfs_dirent *sd)
L
Linus Torvalds 已提交
706
{
707
	remove_dir(sd);
L
Linus Torvalds 已提交
708 709 710
}


711
static void __sysfs_remove_dir(struct sysfs_dirent *dir_sd)
L
Linus Torvalds 已提交
712
{
713
	struct sysfs_addrm_cxt acxt;
714
	struct sysfs_dirent **pos;
L
Linus Torvalds 已提交
715

716
	if (!dir_sd)
L
Linus Torvalds 已提交
717 718
		return;

719
	pr_debug("sysfs %s: removing dir\n", dir_sd->s_name);
720
	sysfs_addrm_start(&acxt, dir_sd);
721
	pos = &dir_sd->s_children;
722 723 724
	while (*pos) {
		struct sysfs_dirent *sd = *pos;

725
		if (sysfs_type(sd) && (sysfs_type(sd) & SYSFS_NOT_PINNED)) {
726
			*pos = sd->s_sibling;
727 728
			sd->s_sibling = NULL;
			sysfs_remove_one(&acxt, sd);
729 730
		} else
			pos = &(*pos)->s_sibling;
L
Linus Torvalds 已提交
731
	}
732
	sysfs_addrm_finish(&acxt);
733

734
	remove_dir(dir_sd);
735 736 737 738 739 740 741 742 743 744 745 746 747
}

/**
 *	sysfs_remove_dir - remove an object's directory.
 *	@kobj:	object.
 *
 *	The only thing special about this is that we remove any files in
 *	the directory before we remove the directory, and we've inlined
 *	what used to be sysfs_rmdir() below, instead of calling separately.
 */

void sysfs_remove_dir(struct kobject * kobj)
{
748
	struct sysfs_dirent *sd = kobj->sd;
749

T
Tejun Heo 已提交
750
	spin_lock(&sysfs_assoc_lock);
751
	kobj->sd = NULL;
T
Tejun Heo 已提交
752
	spin_unlock(&sysfs_assoc_lock);
753

754
	__sysfs_remove_dir(sd);
L
Linus Torvalds 已提交
755 756
}

757
int sysfs_rename_dir(struct kobject *kobj, struct sysfs_dirent *new_parent_sd,
758
		     const char *new_name)
L
Linus Torvalds 已提交
759
{
760 761
	struct sysfs_dirent *sd = kobj->sd;
	struct dentry *new_parent = new_parent_sd->s_dentry;
T
Tejun Heo 已提交
762 763
	struct dentry *new_dentry;
	char *dup_name;
764
	int error;
L
Linus Torvalds 已提交
765

766
	if (!new_parent_sd)
767
		return -EFAULT;
L
Linus Torvalds 已提交
768

769
	mutex_lock(&new_parent->d_inode->i_mutex);
L
Linus Torvalds 已提交
770

771
	new_dentry = lookup_one_len(new_name, new_parent, strlen(new_name));
772 773 774
	if (IS_ERR(new_dentry)) {
		error = PTR_ERR(new_dentry);
		goto out_unlock;
L
Linus Torvalds 已提交
775
	}
776 777 778 779 780 781

	/* By allowing two different directories with the same
	 * d_parent we allow this routine to move between different
	 * shadows of the same directory
	 */
	error = -EINVAL;
782
	if (sd->s_parent->s_dentry->d_inode != new_parent->d_inode ||
783
	    new_dentry->d_parent->d_inode != new_parent->d_inode ||
784
	    new_dentry == sd->s_dentry)
785 786 787 788 789 790
		goto out_dput;

	error = -EEXIST;
	if (new_dentry->d_inode)
		goto out_dput;

T
Tejun Heo 已提交
791 792 793 794 795 796
	/* rename kobject and sysfs_dirent */
	error = -ENOMEM;
	new_name = dup_name = kstrdup(new_name, GFP_KERNEL);
	if (!new_name)
		goto out_drop;

797 798
	error = kobject_set_name(kobj, "%s", new_name);
	if (error)
T
Tejun Heo 已提交
799
		goto out_free;
800

T
Tejun Heo 已提交
801 802 803 804
	kfree(sd->s_name);
	sd->s_name = new_name;

	/* move under the new parent */
805
	d_add(new_dentry, NULL);
806
	d_move(sd->s_dentry, new_dentry);
807

808 809
	mutex_lock(&sysfs_mutex);

810
	sysfs_unlink_sibling(sd);
811
	sysfs_get(new_parent_sd);
812
	sysfs_put(sd->s_parent);
813
	sd->s_parent = new_parent_sd;
814
	sysfs_link_sibling(sd);
815

816 817
	mutex_unlock(&sysfs_mutex);

818 819 820
	error = 0;
	goto out_unlock;

T
Tejun Heo 已提交
821 822
 out_free:
	kfree(dup_name);
823 824 825 826 827
 out_drop:
	d_drop(new_dentry);
 out_dput:
	dput(new_dentry);
 out_unlock:
828
	mutex_unlock(&new_parent->d_inode->i_mutex);
L
Linus Torvalds 已提交
829 830 831
	return error;
}

832 833 834 835 836 837 838
int sysfs_move_dir(struct kobject *kobj, struct kobject *new_parent)
{
	struct dentry *old_parent_dentry, *new_parent_dentry, *new_dentry;
	struct sysfs_dirent *new_parent_sd, *sd;
	int error;

	old_parent_dentry = kobj->parent ?
839
		kobj->parent->sd->s_dentry : sysfs_mount->mnt_sb->s_root;
840
	new_parent_dentry = new_parent ?
841
		new_parent->sd->s_dentry : sysfs_mount->mnt_sb->s_root;
842

M
Mark Lord 已提交
843 844
	if (old_parent_dentry->d_inode == new_parent_dentry->d_inode)
		return 0;	/* nothing to move */
845 846 847 848 849 850 851 852
again:
	mutex_lock(&old_parent_dentry->d_inode->i_mutex);
	if (!mutex_trylock(&new_parent_dentry->d_inode->i_mutex)) {
		mutex_unlock(&old_parent_dentry->d_inode->i_mutex);
		goto again;
	}

	new_parent_sd = new_parent_dentry->d_fsdata;
853
	sd = kobj->sd;
854 855 856 857 858 859 860 861 862

	new_dentry = lookup_one_len(kobj->name, new_parent_dentry,
				    strlen(kobj->name));
	if (IS_ERR(new_dentry)) {
		error = PTR_ERR(new_dentry);
		goto out;
	} else
		error = 0;
	d_add(new_dentry, NULL);
863
	d_move(sd->s_dentry, new_dentry);
864 865 866
	dput(new_dentry);

	/* Remove from old parent's list and insert into new parent's list. */
867 868
	mutex_lock(&sysfs_mutex);

869
	sysfs_unlink_sibling(sd);
870 871 872
	sysfs_get(new_parent_sd);
	sysfs_put(sd->s_parent);
	sd->s_parent = new_parent_sd;
873
	sysfs_link_sibling(sd);
874

875
	mutex_unlock(&sysfs_mutex);
876 877 878 879 880 881 882
out:
	mutex_unlock(&new_parent_dentry->d_inode->i_mutex);
	mutex_unlock(&old_parent_dentry->d_inode->i_mutex);

	return error;
}

L
Linus Torvalds 已提交
883 884
static int sysfs_dir_open(struct inode *inode, struct file *file)
{
885
	struct dentry * dentry = file->f_path.dentry;
L
Linus Torvalds 已提交
886
	struct sysfs_dirent * parent_sd = dentry->d_fsdata;
887
	struct sysfs_dirent * sd;
L
Linus Torvalds 已提交
888

889
	sd = sysfs_new_dirent("_DIR_", 0, 0);
890 891
	if (sd) {
		mutex_lock(&sysfs_mutex);
892 893
		sd->s_parent = sysfs_get(parent_sd);
		sysfs_link_sibling(sd);
894 895
		mutex_unlock(&sysfs_mutex);
	}
L
Linus Torvalds 已提交
896

897 898
	file->private_data = sd;
	return sd ? 0 : -ENOMEM;
L
Linus Torvalds 已提交
899 900 901 902 903 904
}

static int sysfs_dir_close(struct inode *inode, struct file *file)
{
	struct sysfs_dirent * cursor = file->private_data;

905
	mutex_lock(&sysfs_mutex);
906
	sysfs_unlink_sibling(cursor);
907
	mutex_unlock(&sysfs_mutex);
L
Linus Torvalds 已提交
908 909 910 911 912 913 914 915 916 917 918 919 920 921

	release_sysfs_dirent(cursor);

	return 0;
}

/* Relationship between s_mode and the DT_xxx types */
static inline unsigned char dt_type(struct sysfs_dirent *sd)
{
	return (sd->s_mode >> 12) & 15;
}

static int sysfs_readdir(struct file * filp, void * dirent, filldir_t filldir)
{
922
	struct dentry *dentry = filp->f_path.dentry;
L
Linus Torvalds 已提交
923 924
	struct sysfs_dirent * parent_sd = dentry->d_fsdata;
	struct sysfs_dirent *cursor = filp->private_data;
925
	struct sysfs_dirent **pos;
L
Linus Torvalds 已提交
926 927 928 929 930
	ino_t ino;
	int i = filp->f_pos;

	switch (i) {
		case 0:
931
			ino = parent_sd->s_ino;
L
Linus Torvalds 已提交
932 933 934 935 936 937
			if (filldir(dirent, ".", 1, i, ino, DT_DIR) < 0)
				break;
			filp->f_pos++;
			i++;
			/* fallthrough */
		case 1:
T
Tejun Heo 已提交
938 939 940 941
			if (parent_sd->s_parent)
				ino = parent_sd->s_parent->s_ino;
			else
				ino = parent_sd->s_ino;
L
Linus Torvalds 已提交
942 943 944 945 946 947
			if (filldir(dirent, "..", 2, i, ino, DT_DIR) < 0)
				break;
			filp->f_pos++;
			i++;
			/* fallthrough */
		default:
948 949
			mutex_lock(&sysfs_mutex);

950 951 952 953 954 955 956
			pos = &parent_sd->s_children;
			while (*pos != cursor)
				pos = &(*pos)->s_sibling;

			/* unlink cursor */
			*pos = cursor->s_sibling;

A
Akinobu Mita 已提交
957
			if (filp->f_pos == 2)
958
				pos = &parent_sd->s_children;
A
Akinobu Mita 已提交
959

960 961
			for ( ; *pos; pos = &(*pos)->s_sibling) {
				struct sysfs_dirent *next = *pos;
L
Linus Torvalds 已提交
962 963 964
				const char * name;
				int len;

965
				if (!sysfs_type(next))
L
Linus Torvalds 已提交
966 967
					continue;

T
Tejun Heo 已提交
968
				name = next->s_name;
L
Linus Torvalds 已提交
969
				len = strlen(name);
970
				ino = next->s_ino;
L
Linus Torvalds 已提交
971 972 973

				if (filldir(dirent, name, len, filp->f_pos, ino,
						 dt_type(next)) < 0)
974
					break;
L
Linus Torvalds 已提交
975 976 977

				filp->f_pos++;
			}
978 979 980 981

			/* put cursor back in */
			cursor->s_sibling = *pos;
			*pos = cursor;
982 983

			mutex_unlock(&sysfs_mutex);
L
Linus Torvalds 已提交
984 985 986 987 988 989
	}
	return 0;
}

static loff_t sysfs_dir_lseek(struct file * file, loff_t offset, int origin)
{
990
	struct dentry * dentry = file->f_path.dentry;
L
Linus Torvalds 已提交
991 992 993 994 995 996 997 998 999 1000 1001

	switch (origin) {
		case 1:
			offset += file->f_pos;
		case 0:
			if (offset >= 0)
				break;
		default:
			return -EINVAL;
	}
	if (offset != file->f_pos) {
1002 1003
		mutex_lock(&sysfs_mutex);

L
Linus Torvalds 已提交
1004 1005 1006 1007
		file->f_pos = offset;
		if (file->f_pos >= 2) {
			struct sysfs_dirent *sd = dentry->d_fsdata;
			struct sysfs_dirent *cursor = file->private_data;
1008
			struct sysfs_dirent **pos;
L
Linus Torvalds 已提交
1009 1010
			loff_t n = file->f_pos - 2;

1011 1012 1013 1014 1015
			sysfs_unlink_sibling(cursor);

			pos = &sd->s_children;
			while (n && *pos) {
				struct sysfs_dirent *next = *pos;
1016
				if (sysfs_type(next))
L
Linus Torvalds 已提交
1017
					n--;
1018
				pos = &(*pos)->s_sibling;
L
Linus Torvalds 已提交
1019
			}
1020 1021 1022

			cursor->s_sibling = *pos;
			*pos = cursor;
L
Linus Torvalds 已提交
1023
		}
1024 1025

		mutex_unlock(&sysfs_mutex);
L
Linus Torvalds 已提交
1026
	}
1027

L
Linus Torvalds 已提交
1028 1029 1030
	return offset;
}

1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042

/**
 *	sysfs_make_shadowed_dir - Setup so a directory can be shadowed
 *	@kobj:	object we're creating shadow of.
 */

int sysfs_make_shadowed_dir(struct kobject *kobj,
	void * (*follow_link)(struct dentry *, struct nameidata *))
{
	struct inode *inode;
	struct inode_operations *i_op;

1043
	inode = kobj->sd->s_dentry->d_inode;
1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069
	if (inode->i_op != &sysfs_dir_inode_operations)
		return -EINVAL;

	i_op = kmalloc(sizeof(*i_op), GFP_KERNEL);
	if (!i_op)
		return -ENOMEM;

	memcpy(i_op, &sysfs_dir_inode_operations, sizeof(*i_op));
	i_op->follow_link = follow_link;

	/* Locking of inode->i_op?
	 * Since setting i_op is a single word write and they
	 * are atomic we should be ok here.
	 */
	inode->i_op = i_op;
	return 0;
}

/**
 *	sysfs_create_shadow_dir - create a shadow directory for an object.
 *	@kobj:	object we're creating directory for.
 *
 *	sysfs_make_shadowed_dir must already have been called on this
 *	directory.
 */

1070
struct sysfs_dirent *sysfs_create_shadow_dir(struct kobject *kobj)
1071
{
1072
	struct dentry *dir = kobj->sd->s_dentry;
T
Tejun Heo 已提交
1073 1074 1075 1076
	struct inode *inode = dir->d_inode;
	struct dentry *parent = dir->d_parent;
	struct sysfs_dirent *parent_sd = parent->d_fsdata;
	struct dentry *shadow;
1077
	struct sysfs_dirent *sd;
1078
	struct sysfs_addrm_cxt acxt;
1079

1080
	sd = ERR_PTR(-EINVAL);
1081 1082 1083 1084 1085 1086 1087
	if (!sysfs_is_shadowed_inode(inode))
		goto out;

	shadow = d_alloc(parent, &dir->d_name);
	if (!shadow)
		goto nomem;

1088
	sd = sysfs_new_dirent("_SHADOW_", inode->i_mode, SYSFS_DIR);
1089 1090
	if (!sd)
		goto nomem;
1091
	sd->s_elem.dir.kobj = kobj;
1092

1093 1094 1095 1096 1097 1098 1099
	sysfs_addrm_start(&acxt, parent_sd);

	/* add but don't link into children list */
	sysfs_add_one(&acxt, sd);

	/* attach and instantiate dentry */
	sysfs_attach_dentry(sd, shadow);
1100
	d_instantiate(shadow, igrab(inode));
1101 1102 1103
	inc_nlink(inode);	/* tj: synchronization? */

	sysfs_addrm_finish(&acxt);
1104 1105 1106 1107

	dget(shadow);		/* Extra count - pin the dentry in core */

out:
1108
	return sd;
1109 1110
nomem:
	dput(shadow);
1111
	sd = ERR_PTR(-ENOMEM);
1112 1113 1114 1115 1116
	goto out;
}

/**
 *	sysfs_remove_shadow_dir - remove an object's directory.
1117
 *	@shadow_sd: sysfs_dirent of shadow directory
1118 1119 1120 1121 1122 1123
 *
 *	The only thing special about this is that we remove any files in
 *	the directory before we remove the directory, and we've inlined
 *	what used to be sysfs_rmdir() below, instead of calling separately.
 */

1124
void sysfs_remove_shadow_dir(struct sysfs_dirent *shadow_sd)
1125
{
1126
	__sysfs_remove_dir(shadow_sd);
1127 1128
}

1129
const struct file_operations sysfs_dir_operations = {
L
Linus Torvalds 已提交
1130 1131 1132 1133 1134 1135
	.open		= sysfs_dir_open,
	.release	= sysfs_dir_close,
	.llseek		= sysfs_dir_lseek,
	.read		= generic_read_dir,
	.readdir	= sysfs_readdir,
};