file.c 31.5 KB
Newer Older
L
Linus Torvalds 已提交
1
/*
T
Tejun Heo 已提交
2 3 4 5 6 7 8 9 10
 * fs/sysfs/file.c - sysfs regular (text) file implementation
 *
 * Copyright (c) 2001-3 Patrick Mochel
 * Copyright (c) 2007 SUSE Linux Products GmbH
 * Copyright (c) 2007 Tejun Heo <teheo@suse.de>
 *
 * This file is released under the GPLv2.
 *
 * Please see Documentation/filesystems/sysfs.txt for more information.
L
Linus Torvalds 已提交
11 12 13 14
 */

#include <linux/module.h>
#include <linux/kobject.h>
15
#include <linux/kallsyms.h>
16
#include <linux/slab.h>
M
Miklos Szeredi 已提交
17
#include <linux/fsnotify.h>
18
#include <linux/namei.h>
19
#include <linux/poll.h>
20
#include <linux/list.h>
21
#include <linux/mutex.h>
A
Andrew Morton 已提交
22
#include <linux/limits.h>
23
#include <linux/uaccess.h>
24
#include <linux/seq_file.h>
25
#include <linux/mm.h>
L
Linus Torvalds 已提交
26 27 28

#include "sysfs.h"

T
Tejun Heo 已提交
29
/*
30
 * There's one sysfs_open_file for each open file and one sysfs_open_dirent
T
Tejun Heo 已提交
31
 * for each sysfs_dirent with one or more open files.
T
Tejun Heo 已提交
32
 *
T
Tejun Heo 已提交
33 34 35
 * sysfs_dirent->s_attr.open points to sysfs_open_dirent.  s_attr.open is
 * protected by sysfs_open_dirent_lock.
 *
36 37
 * filp->private_data points to seq_file whose ->private points to
 * sysfs_open_file.  sysfs_open_files are chained at
38
 * sysfs_open_dirent->files, which is protected by sysfs_open_file_mutex.
T
Tejun Heo 已提交
39
 */
J
Jiri Slaby 已提交
40
static DEFINE_SPINLOCK(sysfs_open_dirent_lock);
T
Tejun Heo 已提交
41
static DEFINE_MUTEX(sysfs_open_file_mutex);
T
Tejun Heo 已提交
42 43 44

struct sysfs_open_dirent {
	atomic_t		refcnt;
45 46
	atomic_t		event;
	wait_queue_head_t	poll;
47
	struct list_head	files; /* goes through sysfs_open_file.list */
T
Tejun Heo 已提交
48 49
};

50 51 52 53 54
static struct sysfs_open_file *sysfs_of(struct file *file)
{
	return ((struct seq_file *)file->private_data)->private;
}

T
Tejun Heo 已提交
55 56 57 58 59 60
/*
 * Determine the kernfs_ops for the given sysfs_dirent.  This function must
 * be called while holding an active reference.
 */
static const struct kernfs_ops *kernfs_ops(struct sysfs_dirent *sd)
{
61
	if (sd->s_flags & SYSFS_FLAG_LOCKDEP)
T
Tejun Heo 已提交
62 63 64 65
		lockdep_assert_held(sd);
	return sd->s_attr.ops;
}

T
Tejun Heo 已提交
66 67 68 69 70 71
/*
 * Determine ktype->sysfs_ops for the given sysfs_dirent.  This function
 * must be called while holding an active reference.
 */
static const struct sysfs_ops *sysfs_file_ops(struct sysfs_dirent *sd)
{
72
	struct kobject *kobj = sd->s_parent->priv;
T
Tejun Heo 已提交
73

74
	if (sd->s_flags & SYSFS_FLAG_LOCKDEP)
75
		lockdep_assert_held(sd);
T
Tejun Heo 已提交
76 77 78
	return kobj->ktype ? kobj->ktype->sysfs_ops : NULL;
}

79 80 81 82
/*
 * Reads on sysfs are handled through seq_file, which takes care of hairy
 * details like buffering and seeking.  The following function pipes
 * sysfs_ops->show() result through seq_file.
L
Linus Torvalds 已提交
83
 */
84
static int sysfs_kf_seq_show(struct seq_file *sf, void *v)
L
Linus Torvalds 已提交
85
{
86
	struct sysfs_open_file *of = sf->private;
87
	struct kobject *kobj = of->sd->s_parent->priv;
88
	const struct sysfs_ops *ops = sysfs_file_ops(of->sd);
L
Linus Torvalds 已提交
89
	ssize_t count;
90
	char *buf;
L
Linus Torvalds 已提交
91

92 93 94 95 96 97
	/* acquire buffer and ensure that it's >= PAGE_SIZE */
	count = seq_get_buf(sf, &buf);
	if (count < PAGE_SIZE) {
		seq_commit(sf, -1);
		return 0;
	}
L
Linus Torvalds 已提交
98

99
	/*
100 101
	 * Invoke show().  Control may reach here via seq file lseek even
	 * if @ops->show() isn't implemented.
102
	 */
103
	if (ops->show) {
104
		count = ops->show(kobj, of->sd->priv, buf);
105 106 107
		if (count < 0)
			return count;
	}
108

109 110 111 112
	/*
	 * The code works fine with PAGE_SIZE return but it's likely to
	 * indicate truncated result or overflow in normal use cases.
	 */
113 114 115 116 117 118
	if (count >= (ssize_t)PAGE_SIZE) {
		print_symbol("fill_read_buffer: %s returned bad count\n",
			(unsigned long)ops->show);
		/* Try to struggle along */
		count = PAGE_SIZE - 1;
	}
119 120
	seq_commit(sf, count);
	return 0;
L
Linus Torvalds 已提交
121 122
}

123 124
static ssize_t sysfs_kf_bin_read(struct sysfs_open_file *of, char *buf,
				 size_t count, loff_t pos)
T
Tejun Heo 已提交
125
{
126 127
	struct bin_attribute *battr = of->sd->priv;
	struct kobject *kobj = of->sd->s_parent->priv;
128
	loff_t size = file_inode(of->file)->i_size;
T
Tejun Heo 已提交
129

130
	if (!count)
T
Tejun Heo 已提交
131 132 133
		return 0;

	if (size) {
134
		if (pos > size)
T
Tejun Heo 已提交
135
			return 0;
136 137
		if (pos + count > size)
			count = size - pos;
T
Tejun Heo 已提交
138 139
	}

140 141 142 143 144 145 146 147 148
	if (!battr->read)
		return -EIO;

	return battr->read(of->file, kobj, battr, buf, pos, count);
}

static void *kernfs_seq_start(struct seq_file *sf, loff_t *ppos)
{
	struct sysfs_open_file *of = sf->private;
149
	const struct kernfs_ops *ops;
150 151 152 153 154 155 156 157 158

	/*
	 * @of->mutex nests outside active ref and is just to ensure that
	 * the ops aren't called concurrently for the same open file.
	 */
	mutex_lock(&of->mutex);
	if (!sysfs_get_active(of->sd))
		return ERR_PTR(-ENODEV);

159 160 161 162 163 164 165 166 167 168
	ops = kernfs_ops(of->sd);
	if (ops->seq_start) {
		return ops->seq_start(sf, ppos);
	} else {
		/*
		 * The same behavior and code as single_open().  Returns
		 * !NULL if pos is at the beginning; otherwise, NULL.
		 */
		return NULL + !*ppos;
	}
169 170 171 172
}

static void *kernfs_seq_next(struct seq_file *sf, void *v, loff_t *ppos)
{
173 174 175 176 177 178 179 180 181 182 183 184 185
	struct sysfs_open_file *of = sf->private;
	const struct kernfs_ops *ops = kernfs_ops(of->sd);

	if (ops->seq_next) {
		return ops->seq_next(sf, v, ppos);
	} else {
		/*
		 * The same behavior and code as single_open(), always
		 * terminate after the initial read.
		 */
		++*ppos;
		return NULL;
	}
186 187 188 189 190
}

static void kernfs_seq_stop(struct seq_file *sf, void *v)
{
	struct sysfs_open_file *of = sf->private;
191 192 193 194
	const struct kernfs_ops *ops = kernfs_ops(of->sd);

	if (ops->seq_stop)
		ops->seq_stop(sf, v);
195 196 197 198 199 200 201 202 203 204 205

	sysfs_put_active(of->sd);
	mutex_unlock(&of->mutex);
}

static int kernfs_seq_show(struct seq_file *sf, void *v)
{
	struct sysfs_open_file *of = sf->private;

	of->event = atomic_read(&of->sd->s_attr.open->event);

T
Tejun Heo 已提交
206
	return of->sd->s_attr.ops->seq_show(sf, v);
207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226
}

static const struct seq_operations kernfs_seq_ops = {
	.start = kernfs_seq_start,
	.next = kernfs_seq_next,
	.stop = kernfs_seq_stop,
	.show = kernfs_seq_show,
};

/*
 * As reading a bin file can have side-effects, the exact offset and bytes
 * specified in read(2) call should be passed to the read callback making
 * it difficult to use seq_file.  Implement simplistic custom buffering for
 * bin files.
 */
static ssize_t kernfs_file_direct_read(struct sysfs_open_file *of,
				       char __user *user_buf, size_t count,
				       loff_t *ppos)
{
	ssize_t len = min_t(size_t, count, PAGE_SIZE);
T
Tejun Heo 已提交
227
	const struct kernfs_ops *ops;
228 229 230
	char *buf;

	buf = kmalloc(len, GFP_KERNEL);
T
Tejun Heo 已提交
231 232 233
	if (!buf)
		return -ENOMEM;

234 235 236 237
	/*
	 * @of->mutex nests outside active ref and is just to ensure that
	 * the ops aren't called concurrently for the same open file.
	 */
T
Tejun Heo 已提交
238 239
	mutex_lock(&of->mutex);
	if (!sysfs_get_active(of->sd)) {
240
		len = -ENODEV;
T
Tejun Heo 已提交
241 242 243 244
		mutex_unlock(&of->mutex);
		goto out_free;
	}

T
Tejun Heo 已提交
245 246 247 248 249
	ops = kernfs_ops(of->sd);
	if (ops->read)
		len = ops->read(of, buf, len, *ppos);
	else
		len = -EINVAL;
T
Tejun Heo 已提交
250 251 252 253

	sysfs_put_active(of->sd);
	mutex_unlock(&of->mutex);

254
	if (len < 0)
T
Tejun Heo 已提交
255 256
		goto out_free;

257 258
	if (copy_to_user(user_buf, buf, len)) {
		len = -EFAULT;
T
Tejun Heo 已提交
259 260 261
		goto out_free;
	}

262
	*ppos += len;
T
Tejun Heo 已提交
263 264 265

 out_free:
	kfree(buf);
266 267 268 269 270 271 272 273 274 275 276 277 278 279 280
	return len;
}

/**
 * kernfs_file_read - kernfs vfs read callback
 * @file: file pointer
 * @user_buf: data to write
 * @count: number of bytes
 * @ppos: starting offset
 */
static ssize_t kernfs_file_read(struct file *file, char __user *user_buf,
				size_t count, loff_t *ppos)
{
	struct sysfs_open_file *of = sysfs_of(file);

T
Tejun Heo 已提交
281
	if (of->sd->s_flags & SYSFS_FLAG_HAS_SEQ_SHOW)
282
		return seq_read(file, user_buf, count, ppos);
T
Tejun Heo 已提交
283 284
	else
		return kernfs_file_direct_read(of, user_buf, count, ppos);
T
Tejun Heo 已提交
285 286
}

287 288 289
/* kernfs write callback for regular sysfs files */
static ssize_t sysfs_kf_write(struct sysfs_open_file *of, char *buf,
			      size_t count, loff_t pos)
L
Linus Torvalds 已提交
290
{
291
	const struct sysfs_ops *ops = sysfs_file_ops(of->sd);
292
	struct kobject *kobj = of->sd->s_parent->priv;
293

294 295
	if (!count)
		return 0;
296

297 298
	return ops->store(kobj, of->sd->priv, buf, count);
}
299

300 301 302 303 304 305 306
/* kernfs write callback for bin sysfs files */
static ssize_t sysfs_kf_bin_write(struct sysfs_open_file *of, char *buf,
				  size_t count, loff_t pos)
{
	struct bin_attribute *battr = of->sd->priv;
	struct kobject *kobj = of->sd->s_parent->priv;
	loff_t size = file_inode(of->file)->i_size;
307

308 309 310 311
	if (size) {
		if (size <= pos)
			return 0;
		count = min_t(ssize_t, count, size - pos);
312
	}
313 314
	if (!count)
		return 0;
315

316 317
	if (!battr->write)
		return -EIO;
L
Linus Torvalds 已提交
318

319
	return battr->write(of->file, kobj, battr, buf, pos, count);
L
Linus Torvalds 已提交
320 321 322
}

/**
323
 * kernfs_file_write - kernfs vfs write callback
T
Tejun Heo 已提交
324 325 326 327 328
 * @file: file pointer
 * @user_buf: data to write
 * @count: number of bytes
 * @ppos: starting offset
 *
329 330
 * Copy data in from userland and pass it to the matching kernfs write
 * operation.
L
Linus Torvalds 已提交
331
 *
T
Tejun Heo 已提交
332 333 334 335 336
 * There is no easy way for us to know if userspace is only doing a partial
 * write, so we don't support them. We expect the entire buffer to come on
 * the first write.  Hint: if you're writing a value, first read the file,
 * modify only the the value you're changing, then write entire buffer
 * back.
L
Linus Torvalds 已提交
337
 */
338 339
static ssize_t kernfs_file_write(struct file *file, const char __user *user_buf,
				 size_t count, loff_t *ppos)
L
Linus Torvalds 已提交
340
{
341
	struct sysfs_open_file *of = sysfs_of(file);
342
	ssize_t len = min_t(size_t, count, PAGE_SIZE);
T
Tejun Heo 已提交
343
	const struct kernfs_ops *ops;
T
Tejun Heo 已提交
344
	char *buf;
L
Linus Torvalds 已提交
345

T
Tejun Heo 已提交
346 347 348 349 350 351 352 353 354 355
	buf = kmalloc(len + 1, GFP_KERNEL);
	if (!buf)
		return -ENOMEM;

	if (copy_from_user(buf, user_buf, len)) {
		len = -EFAULT;
		goto out_free;
	}
	buf[len] = '\0';	/* guarantee string termination */

356 357 358 359 360 361 362 363 364 365 366
	/*
	 * @of->mutex nests outside active ref and is just to ensure that
	 * the ops aren't called concurrently for the same open file.
	 */
	mutex_lock(&of->mutex);
	if (!sysfs_get_active(of->sd)) {
		mutex_unlock(&of->mutex);
		len = -ENODEV;
		goto out_free;
	}

T
Tejun Heo 已提交
367 368 369
	ops = kernfs_ops(of->sd);
	if (ops->write)
		len = ops->write(of, buf, len, *ppos);
370
	else
T
Tejun Heo 已提交
371
		len = -EINVAL;
372 373 374 375

	sysfs_put_active(of->sd);
	mutex_unlock(&of->mutex);

L
Linus Torvalds 已提交
376 377
	if (len > 0)
		*ppos += len;
T
Tejun Heo 已提交
378 379
out_free:
	kfree(buf);
L
Linus Torvalds 已提交
380 381 382
	return len;
}

383 384 385 386 387 388 389 390 391 392 393 394 395
static int sysfs_kf_bin_mmap(struct sysfs_open_file *of,
			     struct vm_area_struct *vma)
{
	struct bin_attribute *battr = of->sd->priv;
	struct kobject *kobj = of->sd->s_parent->priv;

	if (!battr->mmap)
		return -ENODEV;

	return battr->mmap(of->file, kobj, battr, vma);
}

static void kernfs_vma_open(struct vm_area_struct *vma)
396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411
{
	struct file *file = vma->vm_file;
	struct sysfs_open_file *of = sysfs_of(file);

	if (!of->vm_ops)
		return;

	if (!sysfs_get_active(of->sd))
		return;

	if (of->vm_ops->open)
		of->vm_ops->open(vma);

	sysfs_put_active(of->sd);
}

412
static int kernfs_vma_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431
{
	struct file *file = vma->vm_file;
	struct sysfs_open_file *of = sysfs_of(file);
	int ret;

	if (!of->vm_ops)
		return VM_FAULT_SIGBUS;

	if (!sysfs_get_active(of->sd))
		return VM_FAULT_SIGBUS;

	ret = VM_FAULT_SIGBUS;
	if (of->vm_ops->fault)
		ret = of->vm_ops->fault(vma, vmf);

	sysfs_put_active(of->sd);
	return ret;
}

432 433
static int kernfs_vma_page_mkwrite(struct vm_area_struct *vma,
				   struct vm_fault *vmf)
434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454
{
	struct file *file = vma->vm_file;
	struct sysfs_open_file *of = sysfs_of(file);
	int ret;

	if (!of->vm_ops)
		return VM_FAULT_SIGBUS;

	if (!sysfs_get_active(of->sd))
		return VM_FAULT_SIGBUS;

	ret = 0;
	if (of->vm_ops->page_mkwrite)
		ret = of->vm_ops->page_mkwrite(vma, vmf);
	else
		file_update_time(file);

	sysfs_put_active(of->sd);
	return ret;
}

455 456
static int kernfs_vma_access(struct vm_area_struct *vma, unsigned long addr,
			     void *buf, int len, int write)
457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476
{
	struct file *file = vma->vm_file;
	struct sysfs_open_file *of = sysfs_of(file);
	int ret;

	if (!of->vm_ops)
		return -EINVAL;

	if (!sysfs_get_active(of->sd))
		return -EINVAL;

	ret = -EINVAL;
	if (of->vm_ops->access)
		ret = of->vm_ops->access(vma, addr, buf, len, write);

	sysfs_put_active(of->sd);
	return ret;
}

#ifdef CONFIG_NUMA
477 478
static int kernfs_vma_set_policy(struct vm_area_struct *vma,
				 struct mempolicy *new)
479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497
{
	struct file *file = vma->vm_file;
	struct sysfs_open_file *of = sysfs_of(file);
	int ret;

	if (!of->vm_ops)
		return 0;

	if (!sysfs_get_active(of->sd))
		return -EINVAL;

	ret = 0;
	if (of->vm_ops->set_policy)
		ret = of->vm_ops->set_policy(vma, new);

	sysfs_put_active(of->sd);
	return ret;
}

498 499
static struct mempolicy *kernfs_vma_get_policy(struct vm_area_struct *vma,
					       unsigned long addr)
500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518
{
	struct file *file = vma->vm_file;
	struct sysfs_open_file *of = sysfs_of(file);
	struct mempolicy *pol;

	if (!of->vm_ops)
		return vma->vm_policy;

	if (!sysfs_get_active(of->sd))
		return vma->vm_policy;

	pol = vma->vm_policy;
	if (of->vm_ops->get_policy)
		pol = of->vm_ops->get_policy(vma, addr);

	sysfs_put_active(of->sd);
	return pol;
}

519 520 521
static int kernfs_vma_migrate(struct vm_area_struct *vma,
			      const nodemask_t *from, const nodemask_t *to,
			      unsigned long flags)
522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541
{
	struct file *file = vma->vm_file;
	struct sysfs_open_file *of = sysfs_of(file);
	int ret;

	if (!of->vm_ops)
		return 0;

	if (!sysfs_get_active(of->sd))
		return 0;

	ret = 0;
	if (of->vm_ops->migrate)
		ret = of->vm_ops->migrate(vma, from, to, flags);

	sysfs_put_active(of->sd);
	return ret;
}
#endif

542 543 544 545 546
static const struct vm_operations_struct kernfs_vm_ops = {
	.open		= kernfs_vma_open,
	.fault		= kernfs_vma_fault,
	.page_mkwrite	= kernfs_vma_page_mkwrite,
	.access		= kernfs_vma_access,
547
#ifdef CONFIG_NUMA
548 549 550
	.set_policy	= kernfs_vma_set_policy,
	.get_policy	= kernfs_vma_get_policy,
	.migrate	= kernfs_vma_migrate,
551 552 553
#endif
};

554
static int kernfs_file_mmap(struct file *file, struct vm_area_struct *vma)
555 556
{
	struct sysfs_open_file *of = sysfs_of(file);
T
Tejun Heo 已提交
557
	const struct kernfs_ops *ops;
558 559 560 561 562 563 564 565
	int rc;

	mutex_lock(&of->mutex);

	rc = -ENODEV;
	if (!sysfs_get_active(of->sd))
		goto out_unlock;

T
Tejun Heo 已提交
566 567 568
	ops = kernfs_ops(of->sd);
	if (ops->mmap)
		rc = ops->mmap(of, vma);
569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594
	if (rc)
		goto out_put;

	/*
	 * PowerPC's pci_mmap of legacy_mem uses shmem_zero_setup()
	 * to satisfy versions of X which crash if the mmap fails: that
	 * substitutes a new vm_file, and we don't then want bin_vm_ops.
	 */
	if (vma->vm_file != file)
		goto out_put;

	rc = -EINVAL;
	if (of->mmapped && of->vm_ops != vma->vm_ops)
		goto out_put;

	/*
	 * It is not possible to successfully wrap close.
	 * So error if someone is trying to use close.
	 */
	rc = -EINVAL;
	if (vma->vm_ops && vma->vm_ops->close)
		goto out_put;

	rc = 0;
	of->mmapped = 1;
	of->vm_ops = vma->vm_ops;
595
	vma->vm_ops = &kernfs_vm_ops;
596 597 598 599 600 601 602 603
out_put:
	sysfs_put_active(of->sd);
out_unlock:
	mutex_unlock(&of->mutex);

	return rc;
}

T
Tejun Heo 已提交
604 605 606
/**
 *	sysfs_get_open_dirent - get or create sysfs_open_dirent
 *	@sd: target sysfs_dirent
607
 *	@of: sysfs_open_file for this instance of open
T
Tejun Heo 已提交
608 609
 *
 *	If @sd->s_attr.open exists, increment its reference count;
610
 *	otherwise, create one.  @of is chained to the files list.
T
Tejun Heo 已提交
611 612 613 614 615 616 617 618
 *
 *	LOCKING:
 *	Kernel thread context (may sleep).
 *
 *	RETURNS:
 *	0 on success, -errno on failure.
 */
static int sysfs_get_open_dirent(struct sysfs_dirent *sd,
619
				 struct sysfs_open_file *of)
T
Tejun Heo 已提交
620 621 622 623
{
	struct sysfs_open_dirent *od, *new_od = NULL;

 retry:
T
Tejun Heo 已提交
624
	mutex_lock(&sysfs_open_file_mutex);
625
	spin_lock_irq(&sysfs_open_dirent_lock);
T
Tejun Heo 已提交
626 627 628 629 630 631 632 633 634

	if (!sd->s_attr.open && new_od) {
		sd->s_attr.open = new_od;
		new_od = NULL;
	}

	od = sd->s_attr.open;
	if (od) {
		atomic_inc(&od->refcnt);
635
		list_add_tail(&of->list, &od->files);
T
Tejun Heo 已提交
636 637
	}

638
	spin_unlock_irq(&sysfs_open_dirent_lock);
T
Tejun Heo 已提交
639
	mutex_unlock(&sysfs_open_file_mutex);
T
Tejun Heo 已提交
640 641 642 643 644 645 646 647 648 649 650 651

	if (od) {
		kfree(new_od);
		return 0;
	}

	/* not there, initialize a new one and retry */
	new_od = kmalloc(sizeof(*new_od), GFP_KERNEL);
	if (!new_od)
		return -ENOMEM;

	atomic_set(&new_od->refcnt, 0);
652 653
	atomic_set(&new_od->event, 1);
	init_waitqueue_head(&new_od->poll);
654
	INIT_LIST_HEAD(&new_od->files);
T
Tejun Heo 已提交
655 656 657 658 659 660
	goto retry;
}

/**
 *	sysfs_put_open_dirent - put sysfs_open_dirent
 *	@sd: target sysfs_dirent
661
 *	@of: associated sysfs_open_file
T
Tejun Heo 已提交
662
 *
663 664
 *	Put @sd->s_attr.open and unlink @of from the files list.  If
 *	reference count reaches zero, disassociate and free it.
T
Tejun Heo 已提交
665 666 667 668 669
 *
 *	LOCKING:
 *	None.
 */
static void sysfs_put_open_dirent(struct sysfs_dirent *sd,
670
				  struct sysfs_open_file *of)
T
Tejun Heo 已提交
671 672
{
	struct sysfs_open_dirent *od = sd->s_attr.open;
673
	unsigned long flags;
T
Tejun Heo 已提交
674

T
Tejun Heo 已提交
675
	mutex_lock(&sysfs_open_file_mutex);
676
	spin_lock_irqsave(&sysfs_open_dirent_lock, flags);
T
Tejun Heo 已提交
677

678 679 680
	if (of)
		list_del(&of->list);

T
Tejun Heo 已提交
681 682 683 684 685
	if (atomic_dec_and_test(&od->refcnt))
		sd->s_attr.open = NULL;
	else
		od = NULL;

686
	spin_unlock_irqrestore(&sysfs_open_dirent_lock, flags);
T
Tejun Heo 已提交
687
	mutex_unlock(&sysfs_open_file_mutex);
T
Tejun Heo 已提交
688 689 690 691

	kfree(od);
}

692
static int kernfs_file_open(struct inode *inode, struct file *file)
L
Linus Torvalds 已提交
693
{
694
	struct sysfs_dirent *attr_sd = file->f_path.dentry->d_fsdata;
T
Tejun Heo 已提交
695
	const struct kernfs_ops *ops;
696
	struct sysfs_open_file *of;
697
	bool has_read, has_write, has_mmap;
698
	int error = -EACCES;
L
Linus Torvalds 已提交
699

700
	if (!sysfs_get_active(attr_sd))
701
		return -ENODEV;
L
Linus Torvalds 已提交
702

T
Tejun Heo 已提交
703
	ops = kernfs_ops(attr_sd);
L
Linus Torvalds 已提交
704

T
Tejun Heo 已提交
705 706 707
	has_read = ops->seq_show || ops->read || ops->mmap;
	has_write = ops->write || ops->mmap;
	has_mmap = ops->mmap;
L
Linus Torvalds 已提交
708

709 710 711 712 713 714 715 716 717
	/* check perms and supported operations */
	if ((file->f_mode & FMODE_WRITE) &&
	    (!(inode->i_mode & S_IWUGO) || !has_write))
		goto err_out;

	if ((file->f_mode & FMODE_READ) &&
	    (!(inode->i_mode & S_IRUGO) || !has_read))
		goto err_out;

718
	/* allocate a sysfs_open_file for the file */
719
	error = -ENOMEM;
720 721
	of = kzalloc(sizeof(struct sysfs_open_file), GFP_KERNEL);
	if (!of)
722
		goto err_out;
L
Linus Torvalds 已提交
723

724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740
	/*
	 * The following is done to give a different lockdep key to
	 * @of->mutex for files which implement mmap.  This is a rather
	 * crude way to avoid false positive lockdep warning around
	 * mm->mmap_sem - mmap nests @of->mutex under mm->mmap_sem and
	 * reading /sys/block/sda/trace/act_mask grabs sr_mutex, under
	 * which mm->mmap_sem nests, while holding @of->mutex.  As each
	 * open file has a separate mutex, it's okay as long as those don't
	 * happen on the same file.  At this point, we can't easily give
	 * each file a separate locking class.  Let's differentiate on
	 * whether the file has mmap or not for now.
	 */
	if (has_mmap)
		mutex_init(&of->mutex);
	else
		mutex_init(&of->mutex);

741 742
	of->sd = attr_sd;
	of->file = file;
743 744

	/*
745 746 747
	 * Always instantiate seq_file even if read access doesn't use
	 * seq_file or is not requested.  This unifies private data access
	 * and readable regular files are the vast majority anyway.
748
	 */
T
Tejun Heo 已提交
749
	if (ops->seq_show)
750
		error = seq_open(file, &kernfs_seq_ops);
T
Tejun Heo 已提交
751 752
	else
		error = seq_open(file, NULL);
753 754 755
	if (error)
		goto err_free;

756 757
	((struct seq_file *)file->private_data)->private = of;

758 759 760
	/* seq_file clears PWRITE unconditionally, restore it if WRITE */
	if (file->f_mode & FMODE_WRITE)
		file->f_mode |= FMODE_PWRITE;
761

T
Tejun Heo 已提交
762
	/* make sure we have open dirent struct */
763
	error = sysfs_get_open_dirent(attr_sd, of);
T
Tejun Heo 已提交
764
	if (error)
765
		goto err_close;
T
Tejun Heo 已提交
766

767
	/* open succeeded, put active references */
768
	sysfs_put_active(attr_sd);
769 770
	return 0;

771
err_close:
772
	seq_release(inode, file);
773
err_free:
774
	kfree(of);
775
err_out:
776
	sysfs_put_active(attr_sd);
L
Linus Torvalds 已提交
777 778 779
	return error;
}

780
static int kernfs_file_release(struct inode *inode, struct file *filp)
L
Linus Torvalds 已提交
781
{
T
Tejun Heo 已提交
782
	struct sysfs_dirent *sd = filp->f_path.dentry->d_fsdata;
783
	struct sysfs_open_file *of = sysfs_of(filp);
L
Linus Torvalds 已提交
784

785
	sysfs_put_open_dirent(sd, of);
786
	seq_release(inode, filp);
787
	kfree(of);
788

L
Linus Torvalds 已提交
789 790 791
	return 0;
}

792 793 794 795 796
void sysfs_unmap_bin_file(struct sysfs_dirent *sd)
{
	struct sysfs_open_dirent *od;
	struct sysfs_open_file *of;

T
Tejun Heo 已提交
797
	if (!(sd->s_flags & SYSFS_FLAG_HAS_MMAP))
798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817
		return;

	spin_lock_irq(&sysfs_open_dirent_lock);
	od = sd->s_attr.open;
	if (od)
		atomic_inc(&od->refcnt);
	spin_unlock_irq(&sysfs_open_dirent_lock);
	if (!od)
		return;

	mutex_lock(&sysfs_open_file_mutex);
	list_for_each_entry(of, &od->files, list) {
		struct inode *inode = file_inode(of->file);
		unmap_mapping_range(inode->i_mapping, 0, 0, 1);
	}
	mutex_unlock(&sysfs_open_file_mutex);

	sysfs_put_open_dirent(sd, NULL);
}

818 819 820 821 822 823 824
/* Sysfs attribute files are pollable.  The idea is that you read
 * the content and then you use 'poll' or 'select' to wait for
 * the content to change.  When the content changes (assuming the
 * manager for the kobject supports notification), poll will
 * return POLLERR|POLLPRI, and select will return the fd whether
 * it is waiting for read, write, or exceptions.
 * Once poll/select indicates that the value has changed, you
825
 * need to close and re-open the file, or seek to 0 and read again.
826 827
 * Reminder: this only works for attributes which actively support
 * it, and it is not possible to test an attribute from userspace
828
 * to see if it supports poll (Neither 'poll' nor 'select' return
829 830
 * an appropriate error code).  When in doubt, set a suitable timeout value.
 */
831
static unsigned int kernfs_file_poll(struct file *filp, poll_table *wait)
832
{
833
	struct sysfs_open_file *of = sysfs_of(filp);
834
	struct sysfs_dirent *attr_sd = filp->f_path.dentry->d_fsdata;
835
	struct sysfs_open_dirent *od = attr_sd->s_attr.open;
836 837

	/* need parent for the kobj, grab both */
838
	if (!sysfs_get_active(attr_sd))
839
		goto trigger;
840

841
	poll_wait(filp, &od->poll, wait);
842

843
	sysfs_put_active(attr_sd);
844

845
	if (of->event != atomic_read(&od->event))
846
		goto trigger;
847

848
	return DEFAULT_POLLMASK;
849 850

 trigger:
851
	return DEFAULT_POLLMASK|POLLERR|POLLPRI;
852 853
}

854 855 856 857 858 859 860
/**
 * kernfs_notify - notify a kernfs file
 * @sd: file to notify
 *
 * Notify @sd such that poll(2) on @sd wakes up.
 */
void kernfs_notify(struct sysfs_dirent *sd)
861 862
{
	struct sysfs_open_dirent *od;
863
	unsigned long flags;
864

865
	spin_lock_irqsave(&sysfs_open_dirent_lock, flags);
866

867 868 869 870 871 872
	if (!WARN_ON(sysfs_type(sd) != SYSFS_KOBJ_ATTR)) {
		od = sd->s_attr.open;
		if (od) {
			atomic_inc(&od->event);
			wake_up_interruptible(&od->poll);
		}
873 874
	}

875
	spin_unlock_irqrestore(&sysfs_open_dirent_lock, flags);
876
}
877
EXPORT_SYMBOL_GPL(kernfs_notify);
878

879
void sysfs_notify(struct kobject *k, const char *dir, const char *attr)
880
{
881
	struct sysfs_dirent *sd = k->sd, *tmp;
882 883

	if (sd && dir)
884
		sd = kernfs_find_and_get(sd, dir);
885
	else
886
		kernfs_get(sd);
887 888

	if (sd && attr) {
889 890
		tmp = kernfs_find_and_get(sd, attr);
		kernfs_put(sd);
891 892
		sd = tmp;
	}
893

894 895
	if (sd) {
		kernfs_notify(sd);
896
		kernfs_put(sd);
897
	}
898 899 900
}
EXPORT_SYMBOL_GPL(sysfs_notify);

901
const struct file_operations kernfs_file_operations = {
902
	.read		= kernfs_file_read,
903
	.write		= kernfs_file_write,
904
	.llseek		= generic_file_llseek,
905
	.mmap		= kernfs_file_mmap,
906 907 908
	.open		= kernfs_file_open,
	.release	= kernfs_file_release,
	.poll		= kernfs_file_poll,
909 910
};

T
Tejun Heo 已提交
911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940
static const struct kernfs_ops sysfs_file_kfops_empty = {
};

static const struct kernfs_ops sysfs_file_kfops_ro = {
	.seq_show	= sysfs_kf_seq_show,
};

static const struct kernfs_ops sysfs_file_kfops_wo = {
	.write		= sysfs_kf_write,
};

static const struct kernfs_ops sysfs_file_kfops_rw = {
	.seq_show	= sysfs_kf_seq_show,
	.write		= sysfs_kf_write,
};

static const struct kernfs_ops sysfs_bin_kfops_ro = {
	.read		= sysfs_kf_bin_read,
};

static const struct kernfs_ops sysfs_bin_kfops_wo = {
	.write		= sysfs_kf_bin_write,
};

static const struct kernfs_ops sysfs_bin_kfops_rw = {
	.read		= sysfs_kf_bin_read,
	.write		= sysfs_kf_bin_write,
	.mmap		= sysfs_kf_bin_mmap,
};

941
int sysfs_add_file_mode_ns(struct sysfs_dirent *dir_sd,
942
			   const struct attribute *attr, bool is_bin,
943
			   umode_t mode, const void *ns)
L
Linus Torvalds 已提交
944
{
945
	struct lock_class_key *key = NULL;
T
Tejun Heo 已提交
946
	const struct kernfs_ops *ops;
947
	struct sysfs_dirent *sd;
948
	loff_t size;
L
Linus Torvalds 已提交
949

950
	if (!is_bin) {
T
Tejun Heo 已提交
951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967
		struct kobject *kobj = dir_sd->priv;
		const struct sysfs_ops *sysfs_ops = kobj->ktype->sysfs_ops;

		/* every kobject with an attribute needs a ktype assigned */
		if (WARN(!sysfs_ops, KERN_ERR
			 "missing sysfs attribute operations for kobject: %s\n",
			 kobject_name(kobj)))
			return -EINVAL;

		if (sysfs_ops->show && sysfs_ops->store)
			ops = &sysfs_file_kfops_rw;
		else if (sysfs_ops->show)
			ops = &sysfs_file_kfops_ro;
		else if (sysfs_ops->store)
			ops = &sysfs_file_kfops_wo;
		else
			ops = &sysfs_file_kfops_empty;
968 969

		size = PAGE_SIZE;
T
Tejun Heo 已提交
970 971 972 973 974 975 976 977 978 979 980
	} else {
		struct bin_attribute *battr = (void *)attr;

		if ((battr->read && battr->write) || battr->mmap)
			ops = &sysfs_bin_kfops_rw;
		else if (battr->read)
			ops = &sysfs_bin_kfops_ro;
		else if (battr->write)
			ops = &sysfs_bin_kfops_wo;
		else
			ops = &sysfs_file_kfops_empty;
981 982

		size = battr->size;
T
Tejun Heo 已提交
983 984
	}

985 986 987 988 989 990
#ifdef CONFIG_DEBUG_LOCK_ALLOC
	if (!attr->ignore_lockdep)
		key = attr->key ?: (struct lock_class_key *)&attr->skey;
#endif
	sd = kernfs_create_file_ns_key(dir_sd, attr->name, mode, size,
				       ops, (void *)attr, ns, key);
991 992 993 994 995 996 997 998 999
	if (IS_ERR(sd)) {
		if (PTR_ERR(sd) == -EEXIST)
			sysfs_warn_dup(dir_sd, attr->name);
		return PTR_ERR(sd);
	}
	return 0;
}

/**
1000
 * kernfs_create_file_ns_key - create a file
1001 1002 1003 1004 1005 1006 1007
 * @parent: directory to create the file in
 * @name: name of the file
 * @mode: mode of the file
 * @size: size of the file
 * @ops: kernfs operations for the file
 * @priv: private data for the file
 * @ns: optional namespace tag of the file
1008
 * @key: lockdep key for the file's active_ref, %NULL to disable lockdep
1009 1010 1011
 *
 * Returns the created node on success, ERR_PTR() value on error.
 */
1012 1013 1014 1015 1016 1017
struct sysfs_dirent *kernfs_create_file_ns_key(struct sysfs_dirent *parent,
					       const char *name,
					       umode_t mode, loff_t size,
					       const struct kernfs_ops *ops,
					       void *priv, const void *ns,
					       struct lock_class_key *key)
1018 1019 1020 1021 1022 1023 1024
{
	struct sysfs_addrm_cxt acxt;
	struct sysfs_dirent *sd;
	int rc;

	sd = sysfs_new_dirent(name, (mode & S_IALLUGO) | S_IFREG,
			      SYSFS_KOBJ_ATTR);
1025
	if (!sd)
1026
		return ERR_PTR(-ENOMEM);
1027

T
Tejun Heo 已提交
1028
	sd->s_attr.ops = ops;
1029
	sd->s_attr.size = size;
1030
	sd->s_ns = ns;
1031
	sd->priv = priv;
1032 1033 1034 1035 1036 1037 1038

#ifdef CONFIG_DEBUG_LOCK_ALLOC
	if (key) {
		lockdep_init_map(&sd->dep_map, "s_active", key, 0);
		sd->s_flags |= SYSFS_FLAG_LOCKDEP;
	}
#endif
L
Linus Torvalds 已提交
1039

T
Tejun Heo 已提交
1040 1041 1042 1043 1044 1045 1046 1047 1048 1049
	/*
	 * sd->s_attr.ops is accesible only while holding active ref.  We
	 * need to know whether some ops are implemented outside active
	 * ref.  Cache their existence in flags.
	 */
	if (ops->seq_show)
		sd->s_flags |= SYSFS_FLAG_HAS_SEQ_SHOW;
	if (ops->mmap)
		sd->s_flags |= SYSFS_FLAG_HAS_MMAP;

1050
	sysfs_addrm_start(&acxt);
T
Tejun Heo 已提交
1051
	rc = sysfs_add_one(&acxt, sd, parent);
1052
	sysfs_addrm_finish(&acxt);
1053

1054
	if (rc) {
1055
		kernfs_put(sd);
1056 1057 1058
		return ERR_PTR(rc);
	}
	return sd;
L
Linus Torvalds 已提交
1059 1060
}

1061
int sysfs_add_file(struct sysfs_dirent *dir_sd, const struct attribute *attr,
1062
		   bool is_bin)
1063
{
1064
	return sysfs_add_file_mode_ns(dir_sd, attr, is_bin, attr->mode, NULL);
1065 1066
}

L
Linus Torvalds 已提交
1067
/**
1068 1069 1070 1071
 * sysfs_create_file_ns - create an attribute file for an object with custom ns
 * @kobj: object we're creating for
 * @attr: attribute descriptor
 * @ns: namespace the new file should belong to
L
Linus Torvalds 已提交
1072
 */
1073 1074
int sysfs_create_file_ns(struct kobject *kobj, const struct attribute *attr,
			 const void *ns)
L
Linus Torvalds 已提交
1075
{
1076
	BUG_ON(!kobj || !kobj->sd || !attr);
L
Linus Torvalds 已提交
1077

1078
	return sysfs_add_file_mode_ns(kobj->sd, attr, false, attr->mode, ns);
L
Linus Torvalds 已提交
1079 1080

}
1081
EXPORT_SYMBOL_GPL(sysfs_create_file_ns);
L
Linus Torvalds 已提交
1082

1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094
int sysfs_create_files(struct kobject *kobj, const struct attribute **ptr)
{
	int err = 0;
	int i;

	for (i = 0; ptr[i] && !err; i++)
		err = sysfs_create_file(kobj, ptr[i]);
	if (err)
		while (--i >= 0)
			sysfs_remove_file(kobj, ptr[i]);
	return err;
}
1095
EXPORT_SYMBOL_GPL(sysfs_create_files);
L
Linus Torvalds 已提交
1096

1097 1098 1099 1100 1101 1102 1103 1104 1105
/**
 * sysfs_add_file_to_group - add an attribute file to a pre-existing group.
 * @kobj: object we're acting for.
 * @attr: attribute descriptor.
 * @group: group name.
 */
int sysfs_add_file_to_group(struct kobject *kobj,
		const struct attribute *attr, const char *group)
{
1106
	struct sysfs_dirent *dir_sd;
1107 1108
	int error;

1109 1110 1111 1112 1113 1114
	if (group) {
		dir_sd = kernfs_find_and_get(kobj->sd, group);
	} else {
		dir_sd = kobj->sd;
		kernfs_get(dir_sd);
	}
1115

1116 1117 1118
	if (!dir_sd)
		return -ENOENT;

1119
	error = sysfs_add_file(dir_sd, attr, false);
1120
	kernfs_put(dir_sd);
1121

1122 1123 1124 1125
	return error;
}
EXPORT_SYMBOL_GPL(sysfs_add_file_to_group);

1126 1127 1128 1129 1130 1131 1132
/**
 * sysfs_chmod_file - update the modified mode value on an object attribute.
 * @kobj: object we're acting for.
 * @attr: attribute descriptor.
 * @mode: file permissions.
 *
 */
1133
int sysfs_chmod_file(struct kobject *kobj, const struct attribute *attr,
A
Al Viro 已提交
1134
		     umode_t mode)
1135
{
1136
	struct sysfs_dirent *sd;
1137
	struct iattr newattrs;
1138 1139
	int rc;

1140
	sd = kernfs_find_and_get(kobj->sd, attr->name);
1141
	if (!sd)
1142
		return -ENOENT;
1143

1144
	newattrs.ia_mode = (mode & S_IALLUGO) | (sd->s_mode & ~S_IALLUGO);
1145
	newattrs.ia_valid = ATTR_MODE;
1146

1147 1148
	rc = kernfs_setattr(sd, &newattrs);

1149
	kernfs_put(sd);
1150
	return rc;
1151 1152 1153
}
EXPORT_SYMBOL_GPL(sysfs_chmod_file);

L
Linus Torvalds 已提交
1154
/**
1155 1156 1157 1158
 * sysfs_remove_file_ns - remove an object attribute with a custom ns tag
 * @kobj: object we're acting for
 * @attr: attribute descriptor
 * @ns: namespace tag of the file to remove
L
Linus Torvalds 已提交
1159
 *
1160
 * Hash the attribute name and namespace tag and kill the victim.
L
Linus Torvalds 已提交
1161
 */
1162 1163
void sysfs_remove_file_ns(struct kobject *kobj, const struct attribute *attr,
			  const void *ns)
L
Linus Torvalds 已提交
1164
{
1165
	struct sysfs_dirent *dir_sd = kobj->sd;
1166

1167
	kernfs_remove_by_name_ns(dir_sd, attr->name, ns);
L
Linus Torvalds 已提交
1168
}
1169
EXPORT_SYMBOL_GPL(sysfs_remove_file_ns);
L
Linus Torvalds 已提交
1170

1171
void sysfs_remove_files(struct kobject *kobj, const struct attribute **ptr)
1172 1173 1174 1175 1176
{
	int i;
	for (i = 0; ptr[i]; i++)
		sysfs_remove_file(kobj, ptr[i]);
}
1177
EXPORT_SYMBOL_GPL(sysfs_remove_files);
L
Linus Torvalds 已提交
1178

1179 1180 1181 1182 1183 1184 1185 1186 1187
/**
 * sysfs_remove_file_from_group - remove an attribute file from a group.
 * @kobj: object we're acting for.
 * @attr: attribute descriptor.
 * @group: group name.
 */
void sysfs_remove_file_from_group(struct kobject *kobj,
		const struct attribute *attr, const char *group)
{
1188
	struct sysfs_dirent *dir_sd;
1189

1190 1191 1192 1193 1194 1195 1196
	if (group) {
		dir_sd = kernfs_find_and_get(kobj->sd, group);
	} else {
		dir_sd = kobj->sd;
		kernfs_get(dir_sd);
	}

1197
	if (dir_sd) {
1198
		kernfs_remove_by_name(dir_sd, attr->name);
1199
		kernfs_put(dir_sd);
1200 1201 1202 1203
	}
}
EXPORT_SYMBOL_GPL(sysfs_remove_file_from_group);

1204 1205 1206 1207 1208 1209 1210 1211 1212 1213
/**
 *	sysfs_create_bin_file - create binary file for object.
 *	@kobj:	object.
 *	@attr:	attribute descriptor.
 */
int sysfs_create_bin_file(struct kobject *kobj,
			  const struct bin_attribute *attr)
{
	BUG_ON(!kobj || !kobj->sd || !attr);

1214
	return sysfs_add_file(kobj->sd, &attr->attr, true);
1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225
}
EXPORT_SYMBOL_GPL(sysfs_create_bin_file);

/**
 *	sysfs_remove_bin_file - remove binary file for object.
 *	@kobj:	object.
 *	@attr:	attribute descriptor.
 */
void sysfs_remove_bin_file(struct kobject *kobj,
			   const struct bin_attribute *attr)
{
1226
	kernfs_remove_by_name(kobj->sd, attr->attr.name);
1227 1228 1229
}
EXPORT_SYMBOL_GPL(sysfs_remove_bin_file);

1230
struct sysfs_schedule_callback_struct {
1231 1232
	struct list_head	workq_list;
	struct kobject		*kobj;
1233 1234
	void			(*func)(void *);
	void			*data;
1235
	struct module		*owner;
1236 1237 1238
	struct work_struct	work;
};

1239
static struct workqueue_struct *sysfs_workqueue;
1240 1241
static DEFINE_MUTEX(sysfs_workq_mutex);
static LIST_HEAD(sysfs_workq);
1242 1243 1244 1245 1246 1247 1248
static void sysfs_schedule_callback_work(struct work_struct *work)
{
	struct sysfs_schedule_callback_struct *ss = container_of(work,
			struct sysfs_schedule_callback_struct, work);

	(ss->func)(ss->data);
	kobject_put(ss->kobj);
1249
	module_put(ss->owner);
1250 1251 1252
	mutex_lock(&sysfs_workq_mutex);
	list_del(&ss->workq_list);
	mutex_unlock(&sysfs_workq_mutex);
1253 1254 1255 1256 1257 1258 1259 1260
	kfree(ss);
}

/**
 * sysfs_schedule_callback - helper to schedule a callback for a kobject
 * @kobj: object we're acting for.
 * @func: callback function to invoke later.
 * @data: argument to pass to @func.
1261
 * @owner: module owning the callback code
1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273
 *
 * sysfs attribute methods must not unregister themselves or their parent
 * kobject (which would amount to the same thing).  Attempts to do so will
 * deadlock, since unregistration is mutually exclusive with driver
 * callbacks.
 *
 * Instead methods can call this routine, which will attempt to allocate
 * and schedule a workqueue request to call back @func with @data as its
 * argument in the workqueue's process context.  @kobj will be pinned
 * until @func returns.
 *
 * Returns 0 if the request was submitted, -ENOMEM if storage could not
1274 1275
 * be allocated, -ENODEV if a reference to @owner isn't available,
 * -EAGAIN if a callback has already been scheduled for @kobj.
1276 1277
 */
int sysfs_schedule_callback(struct kobject *kobj, void (*func)(void *),
1278
		void *data, struct module *owner)
1279
{
1280
	struct sysfs_schedule_callback_struct *ss, *tmp;
1281

1282 1283
	if (!try_module_get(owner))
		return -ENODEV;
1284 1285 1286 1287

	mutex_lock(&sysfs_workq_mutex);
	list_for_each_entry_safe(ss, tmp, &sysfs_workq, workq_list)
		if (ss->kobj == kobj) {
1288
			module_put(owner);
1289 1290 1291 1292 1293
			mutex_unlock(&sysfs_workq_mutex);
			return -EAGAIN;
		}
	mutex_unlock(&sysfs_workq_mutex);

1294
	if (sysfs_workqueue == NULL) {
1295
		sysfs_workqueue = create_singlethread_workqueue("sysfsd");
1296 1297 1298 1299 1300 1301
		if (sysfs_workqueue == NULL) {
			module_put(owner);
			return -ENOMEM;
		}
	}

1302
	ss = kmalloc(sizeof(*ss), GFP_KERNEL);
1303 1304
	if (!ss) {
		module_put(owner);
1305
		return -ENOMEM;
1306
	}
1307 1308 1309 1310
	kobject_get(kobj);
	ss->kobj = kobj;
	ss->func = func;
	ss->data = data;
1311
	ss->owner = owner;
1312
	INIT_WORK(&ss->work, sysfs_schedule_callback_work);
1313 1314 1315 1316
	INIT_LIST_HEAD(&ss->workq_list);
	mutex_lock(&sysfs_workq_mutex);
	list_add_tail(&ss->workq_list, &sysfs_workq);
	mutex_unlock(&sysfs_workq_mutex);
1317
	queue_work(sysfs_workqueue, &ss->work);
1318 1319 1320
	return 0;
}
EXPORT_SYMBOL_GPL(sysfs_schedule_callback);