pipe.c 28.2 KB
Newer Older
L
Linus Torvalds 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13
/*
 *  linux/fs/pipe.c
 *
 *  Copyright (C) 1991, 1992, 1999  Linus Torvalds
 */

#include <linux/mm.h>
#include <linux/file.h>
#include <linux/poll.h>
#include <linux/slab.h>
#include <linux/module.h>
#include <linux/init.h>
#include <linux/fs.h>
14
#include <linux/log2.h>
L
Linus Torvalds 已提交
15 16 17 18
#include <linux/mount.h>
#include <linux/pipe_fs_i.h>
#include <linux/uio.h>
#include <linux/highmem.h>
19
#include <linux/pagemap.h>
A
Al Viro 已提交
20
#include <linux/audit.h>
21
#include <linux/syscalls.h>
22
#include <linux/fcntl.h>
L
Linus Torvalds 已提交
23 24 25 26

#include <asm/uaccess.h>
#include <asm/ioctls.h>

27 28
/*
 * The max size that a non-root user is allowed to grow the pipe. Can
29
 * be set by root in /proc/sys/fs/pipe-max-size
30
 */
31 32 33 34 35 36
unsigned int pipe_max_size = 1048576;

/*
 * Minimum pipe size, as required by POSIX
 */
unsigned int pipe_min_size = PAGE_SIZE;
37

L
Linus Torvalds 已提交
38 39 40 41 42 43 44 45 46 47 48 49 50 51 52
/*
 * We use a start+len construction, which provides full use of the 
 * allocated memory.
 * -- Florian Coosmann (FGC)
 * 
 * Reads with count = 0 should always return 0.
 * -- Julian Bradfield 1999-06-07.
 *
 * FIFOs and Pipes now generate SIGIO for both readers and writers.
 * -- Jeremy Elson <jelson@circlemud.org> 2001-08-16
 *
 * pipe_read & write cleanup
 * -- Manfred Spraul <manfred@colorfullife.com> 2002-05-09
 */

53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83
static void pipe_lock_nested(struct pipe_inode_info *pipe, int subclass)
{
	if (pipe->inode)
		mutex_lock_nested(&pipe->inode->i_mutex, subclass);
}

void pipe_lock(struct pipe_inode_info *pipe)
{
	/*
	 * pipe_lock() nests non-pipe inode locks (for writing to a file)
	 */
	pipe_lock_nested(pipe, I_MUTEX_PARENT);
}
EXPORT_SYMBOL(pipe_lock);

void pipe_unlock(struct pipe_inode_info *pipe)
{
	if (pipe->inode)
		mutex_unlock(&pipe->inode->i_mutex);
}
EXPORT_SYMBOL(pipe_unlock);

void pipe_double_lock(struct pipe_inode_info *pipe1,
		      struct pipe_inode_info *pipe2)
{
	BUG_ON(pipe1 == pipe2);

	if (pipe1 < pipe2) {
		pipe_lock_nested(pipe1, I_MUTEX_PARENT);
		pipe_lock_nested(pipe2, I_MUTEX_CHILD);
	} else {
84 85
		pipe_lock_nested(pipe2, I_MUTEX_PARENT);
		pipe_lock_nested(pipe1, I_MUTEX_CHILD);
86 87 88
	}
}

L
Linus Torvalds 已提交
89
/* Drop the inode semaphore and wait for a pipe event, atomically */
90
void pipe_wait(struct pipe_inode_info *pipe)
L
Linus Torvalds 已提交
91 92 93
{
	DEFINE_WAIT(wait);

I
Ingo Molnar 已提交
94 95 96 97
	/*
	 * Pipes are system-local resources, so sleeping on them
	 * is considered a noninteractive wait:
	 */
98
	prepare_to_wait(&pipe->wait, &wait, TASK_INTERRUPTIBLE);
99
	pipe_unlock(pipe);
L
Linus Torvalds 已提交
100
	schedule();
101
	finish_wait(&pipe->wait, &wait);
102
	pipe_lock(pipe);
L
Linus Torvalds 已提交
103 104
}

105
static int
106 107
pipe_iov_copy_from_user(void *to, struct iovec *iov, unsigned long len,
			int atomic)
L
Linus Torvalds 已提交
108 109 110 111 112 113 114 115
{
	unsigned long copy;

	while (len > 0) {
		while (!iov->iov_len)
			iov++;
		copy = min_t(unsigned long, len, iov->iov_len);

116 117 118 119 120 121 122
		if (atomic) {
			if (__copy_from_user_inatomic(to, iov->iov_base, copy))
				return -EFAULT;
		} else {
			if (copy_from_user(to, iov->iov_base, copy))
				return -EFAULT;
		}
L
Linus Torvalds 已提交
123 124 125 126 127 128 129 130
		to += copy;
		len -= copy;
		iov->iov_base += copy;
		iov->iov_len -= copy;
	}
	return 0;
}

131
static int
132 133
pipe_iov_copy_to_user(struct iovec *iov, const void *from, unsigned long len,
		      int atomic)
L
Linus Torvalds 已提交
134 135 136 137 138 139 140 141
{
	unsigned long copy;

	while (len > 0) {
		while (!iov->iov_len)
			iov++;
		copy = min_t(unsigned long, len, iov->iov_len);

142 143 144 145 146 147 148
		if (atomic) {
			if (__copy_to_user_inatomic(iov->iov_base, from, copy))
				return -EFAULT;
		} else {
			if (copy_to_user(iov->iov_base, from, copy))
				return -EFAULT;
		}
L
Linus Torvalds 已提交
149 150 151 152 153 154 155 156
		from += copy;
		len -= copy;
		iov->iov_base += copy;
		iov->iov_len -= copy;
	}
	return 0;
}

157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197
/*
 * Attempt to pre-fault in the user memory, so we can use atomic copies.
 * Returns the number of bytes not faulted in.
 */
static int iov_fault_in_pages_write(struct iovec *iov, unsigned long len)
{
	while (!iov->iov_len)
		iov++;

	while (len > 0) {
		unsigned long this_len;

		this_len = min_t(unsigned long, len, iov->iov_len);
		if (fault_in_pages_writeable(iov->iov_base, this_len))
			break;

		len -= this_len;
		iov++;
	}

	return len;
}

/*
 * Pre-fault in the user memory, so we can use atomic copies.
 */
static void iov_fault_in_pages_read(struct iovec *iov, unsigned long len)
{
	while (!iov->iov_len)
		iov++;

	while (len > 0) {
		unsigned long this_len;

		this_len = min_t(unsigned long, len, iov->iov_len);
		fault_in_pages_readable(iov->iov_base, this_len);
		len -= this_len;
		iov++;
	}
}

198 199
static void anon_pipe_buf_release(struct pipe_inode_info *pipe,
				  struct pipe_buffer *buf)
L
Linus Torvalds 已提交
200 201 202
{
	struct page *page = buf->page;

203 204 205
	/*
	 * If nobody else uses this page, and we don't already have a
	 * temporary page, let's keep track of it as a one-deep
206
	 * allocation cache. (Otherwise just release our reference to it)
207
	 */
208
	if (page_count(page) == 1 && !pipe->tmp_page)
209
		pipe->tmp_page = page;
210 211
	else
		page_cache_release(page);
L
Linus Torvalds 已提交
212 213
}

214 215 216 217 218 219 220 221
/**
 * generic_pipe_buf_map - virtually map a pipe buffer
 * @pipe:	the pipe that the buffer belongs to
 * @buf:	the buffer that should be mapped
 * @atomic:	whether to use an atomic map
 *
 * Description:
 *	This function returns a kernel virtual address mapping for the
222
 *	pipe_buffer passed in @buf. If @atomic is set, an atomic map is provided
223 224 225 226 227
 *	and the caller has to be careful not to fault before calling
 *	the unmap function.
 *
 *	Note that this function occupies KM_USER0 if @atomic != 0.
 */
228
void *generic_pipe_buf_map(struct pipe_inode_info *pipe,
229
			   struct pipe_buffer *buf, int atomic)
L
Linus Torvalds 已提交
230
{
231 232 233 234 235
	if (atomic) {
		buf->flags |= PIPE_BUF_FLAG_ATOMIC;
		return kmap_atomic(buf->page, KM_USER0);
	}

L
Linus Torvalds 已提交
236 237
	return kmap(buf->page);
}
238
EXPORT_SYMBOL(generic_pipe_buf_map);
L
Linus Torvalds 已提交
239

240 241 242 243 244 245 246 247 248
/**
 * generic_pipe_buf_unmap - unmap a previously mapped pipe buffer
 * @pipe:	the pipe that the buffer belongs to
 * @buf:	the buffer that should be unmapped
 * @map_data:	the data that the mapping function returned
 *
 * Description:
 *	This function undoes the mapping that ->map() provided.
 */
249
void generic_pipe_buf_unmap(struct pipe_inode_info *pipe,
250
			    struct pipe_buffer *buf, void *map_data)
L
Linus Torvalds 已提交
251
{
252 253 254 255 256
	if (buf->flags & PIPE_BUF_FLAG_ATOMIC) {
		buf->flags &= ~PIPE_BUF_FLAG_ATOMIC;
		kunmap_atomic(map_data, KM_USER0);
	} else
		kunmap(buf->page);
L
Linus Torvalds 已提交
257
}
258
EXPORT_SYMBOL(generic_pipe_buf_unmap);
L
Linus Torvalds 已提交
259

260
/**
261
 * generic_pipe_buf_steal - attempt to take ownership of a &pipe_buffer
262 263 264 265
 * @pipe:	the pipe that the buffer belongs to
 * @buf:	the buffer to attempt to steal
 *
 * Description:
266
 *	This function attempts to steal the &struct page attached to
267 268
 *	@buf. If successful, this function returns 0 and returns with
 *	the page locked. The caller may then reuse the page for whatever
269
 *	he wishes; the typical use is insertion into a different file
270 271
 *	page cache.
 */
272 273
int generic_pipe_buf_steal(struct pipe_inode_info *pipe,
			   struct pipe_buffer *buf)
274
{
275 276
	struct page *page = buf->page;

277 278 279 280 281
	/*
	 * A reference of one is golden, that means that the owner of this
	 * page is the only one holding a reference to it. lock the page
	 * and return OK.
	 */
282 283 284 285 286 287
	if (page_count(page) == 1) {
		lock_page(page);
		return 0;
	}

	return 1;
288
}
289
EXPORT_SYMBOL(generic_pipe_buf_steal);
290

291
/**
292
 * generic_pipe_buf_get - get a reference to a &struct pipe_buffer
293 294 295 296 297 298 299 300 301
 * @pipe:	the pipe that the buffer belongs to
 * @buf:	the buffer to get a reference to
 *
 * Description:
 *	This function grabs an extra reference to @buf. It's used in
 *	in the tee() system call, when we duplicate the buffers in one
 *	pipe into another.
 */
void generic_pipe_buf_get(struct pipe_inode_info *pipe, struct pipe_buffer *buf)
302 303 304
{
	page_cache_get(buf->page);
}
305
EXPORT_SYMBOL(generic_pipe_buf_get);
306

307 308
/**
 * generic_pipe_buf_confirm - verify contents of the pipe buffer
R
Randy Dunlap 已提交
309
 * @info:	the pipe that the buffer belongs to
310 311 312 313 314 315
 * @buf:	the buffer to confirm
 *
 * Description:
 *	This function does nothing, because the generic pipe code uses
 *	pages that are always good when inserted into the pipe.
 */
316 317
int generic_pipe_buf_confirm(struct pipe_inode_info *info,
			     struct pipe_buffer *buf)
318 319 320
{
	return 0;
}
321
EXPORT_SYMBOL(generic_pipe_buf_confirm);
322

323 324 325 326 327 328 329 330 331 332 333 334 335
/**
 * generic_pipe_buf_release - put a reference to a &struct pipe_buffer
 * @pipe:	the pipe that the buffer belongs to
 * @buf:	the buffer to put a reference to
 *
 * Description:
 *	This function releases a reference to @buf.
 */
void generic_pipe_buf_release(struct pipe_inode_info *pipe,
			      struct pipe_buffer *buf)
{
	page_cache_release(buf->page);
}
336
EXPORT_SYMBOL(generic_pipe_buf_release);
337

338
static const struct pipe_buf_operations anon_pipe_buf_ops = {
L
Linus Torvalds 已提交
339
	.can_merge = 1,
340 341
	.map = generic_pipe_buf_map,
	.unmap = generic_pipe_buf_unmap,
342
	.confirm = generic_pipe_buf_confirm,
L
Linus Torvalds 已提交
343
	.release = anon_pipe_buf_release,
344
	.steal = generic_pipe_buf_steal,
345
	.get = generic_pipe_buf_get,
L
Linus Torvalds 已提交
346 347 348
};

static ssize_t
349 350
pipe_read(struct kiocb *iocb, const struct iovec *_iov,
	   unsigned long nr_segs, loff_t pos)
L
Linus Torvalds 已提交
351
{
352
	struct file *filp = iocb->ki_filp;
353
	struct inode *inode = filp->f_path.dentry->d_inode;
354
	struct pipe_inode_info *pipe;
L
Linus Torvalds 已提交
355 356 357 358 359 360 361 362 363 364 365 366
	int do_wakeup;
	ssize_t ret;
	struct iovec *iov = (struct iovec *)_iov;
	size_t total_len;

	total_len = iov_length(iov, nr_segs);
	/* Null read succeeds. */
	if (unlikely(total_len == 0))
		return 0;

	do_wakeup = 0;
	ret = 0;
367
	mutex_lock(&inode->i_mutex);
368
	pipe = inode->i_pipe;
L
Linus Torvalds 已提交
369
	for (;;) {
370
		int bufs = pipe->nrbufs;
L
Linus Torvalds 已提交
371
		if (bufs) {
372 373
			int curbuf = pipe->curbuf;
			struct pipe_buffer *buf = pipe->bufs + curbuf;
374
			const struct pipe_buf_operations *ops = buf->ops;
L
Linus Torvalds 已提交
375 376
			void *addr;
			size_t chars = buf->len;
377
			int error, atomic;
L
Linus Torvalds 已提交
378 379 380 381

			if (chars > total_len)
				chars = total_len;

382
			error = ops->confirm(pipe, buf);
383
			if (error) {
384
				if (!ret)
385
					ret = error;
386 387
				break;
			}
388

389 390 391 392 393
			atomic = !iov_fault_in_pages_write(iov, chars);
redo:
			addr = ops->map(pipe, buf, atomic);
			error = pipe_iov_copy_to_user(iov, addr + buf->offset, chars, atomic);
			ops->unmap(pipe, buf, addr);
L
Linus Torvalds 已提交
394
			if (unlikely(error)) {
395 396 397 398 399 400 401
				/*
				 * Just retry with the slow path if we failed.
				 */
				if (atomic) {
					atomic = 0;
					goto redo;
				}
402
				if (!ret)
403
					ret = error;
L
Linus Torvalds 已提交
404 405 406 407 408 409 410
				break;
			}
			ret += chars;
			buf->offset += chars;
			buf->len -= chars;
			if (!buf->len) {
				buf->ops = NULL;
411
				ops->release(pipe, buf);
412
				curbuf = (curbuf + 1) & (pipe->buffers - 1);
413 414
				pipe->curbuf = curbuf;
				pipe->nrbufs = --bufs;
L
Linus Torvalds 已提交
415 416 417 418 419 420 421 422
				do_wakeup = 1;
			}
			total_len -= chars;
			if (!total_len)
				break;	/* common path: read succeeded */
		}
		if (bufs)	/* More to do? */
			continue;
423
		if (!pipe->writers)
L
Linus Torvalds 已提交
424
			break;
425
		if (!pipe->waiting_writers) {
L
Linus Torvalds 已提交
426 427 428 429 430 431 432 433 434 435 436 437 438
			/* syscall merging: Usually we must not sleep
			 * if O_NONBLOCK is set, or if we got some data.
			 * But if a writer sleeps in kernel space, then
			 * we can wait for that data without violating POSIX.
			 */
			if (ret)
				break;
			if (filp->f_flags & O_NONBLOCK) {
				ret = -EAGAIN;
				break;
			}
		}
		if (signal_pending(current)) {
439 440
			if (!ret)
				ret = -ERESTARTSYS;
L
Linus Torvalds 已提交
441 442 443
			break;
		}
		if (do_wakeup) {
444 445
			wake_up_interruptible_sync(&pipe->wait);
 			kill_fasync(&pipe->fasync_writers, SIGIO, POLL_OUT);
L
Linus Torvalds 已提交
446
		}
447
		pipe_wait(pipe);
L
Linus Torvalds 已提交
448
	}
449
	mutex_unlock(&inode->i_mutex);
450 451

	/* Signal writers asynchronously that there is more room. */
L
Linus Torvalds 已提交
452
	if (do_wakeup) {
I
Ingo Molnar 已提交
453
		wake_up_interruptible_sync(&pipe->wait);
454
		kill_fasync(&pipe->fasync_writers, SIGIO, POLL_OUT);
L
Linus Torvalds 已提交
455 456 457 458 459 460 461
	}
	if (ret > 0)
		file_accessed(filp);
	return ret;
}

static ssize_t
462 463
pipe_write(struct kiocb *iocb, const struct iovec *_iov,
	    unsigned long nr_segs, loff_t ppos)
L
Linus Torvalds 已提交
464
{
465
	struct file *filp = iocb->ki_filp;
466
	struct inode *inode = filp->f_path.dentry->d_inode;
467
	struct pipe_inode_info *pipe;
L
Linus Torvalds 已提交
468 469 470 471 472 473 474 475 476 477 478 479 480
	ssize_t ret;
	int do_wakeup;
	struct iovec *iov = (struct iovec *)_iov;
	size_t total_len;
	ssize_t chars;

	total_len = iov_length(iov, nr_segs);
	/* Null write succeeds. */
	if (unlikely(total_len == 0))
		return 0;

	do_wakeup = 0;
	ret = 0;
481
	mutex_lock(&inode->i_mutex);
482
	pipe = inode->i_pipe;
L
Linus Torvalds 已提交
483

484
	if (!pipe->readers) {
L
Linus Torvalds 已提交
485 486 487 488 489 490 491
		send_sig(SIGPIPE, current, 0);
		ret = -EPIPE;
		goto out;
	}

	/* We try to merge small writes */
	chars = total_len & (PAGE_SIZE-1); /* size of the last buffer */
492
	if (pipe->nrbufs && chars != 0) {
493
		int lastbuf = (pipe->curbuf + pipe->nrbufs - 1) &
494
							(pipe->buffers - 1);
495
		struct pipe_buffer *buf = pipe->bufs + lastbuf;
496
		const struct pipe_buf_operations *ops = buf->ops;
L
Linus Torvalds 已提交
497
		int offset = buf->offset + buf->len;
498

L
Linus Torvalds 已提交
499
		if (ops->can_merge && offset + chars <= PAGE_SIZE) {
500
			int error, atomic = 1;
501 502
			void *addr;

503
			error = ops->confirm(pipe, buf);
504
			if (error)
505
				goto out;
506

507 508 509
			iov_fault_in_pages_read(iov, chars);
redo1:
			addr = ops->map(pipe, buf, atomic);
510
			error = pipe_iov_copy_from_user(offset + addr, iov,
511 512
							chars, atomic);
			ops->unmap(pipe, buf, addr);
L
Linus Torvalds 已提交
513 514
			ret = error;
			do_wakeup = 1;
515 516 517 518 519
			if (error) {
				if (atomic) {
					atomic = 0;
					goto redo1;
				}
L
Linus Torvalds 已提交
520
				goto out;
521
			}
L
Linus Torvalds 已提交
522 523 524 525 526 527 528 529 530 531
			buf->len += chars;
			total_len -= chars;
			ret = chars;
			if (!total_len)
				goto out;
		}
	}

	for (;;) {
		int bufs;
532

533
		if (!pipe->readers) {
L
Linus Torvalds 已提交
534
			send_sig(SIGPIPE, current, 0);
535 536
			if (!ret)
				ret = -EPIPE;
L
Linus Torvalds 已提交
537 538
			break;
		}
539
		bufs = pipe->nrbufs;
540 541
		if (bufs < pipe->buffers) {
			int newbuf = (pipe->curbuf + bufs) & (pipe->buffers-1);
542 543
			struct pipe_buffer *buf = pipe->bufs + newbuf;
			struct page *page = pipe->tmp_page;
544 545
			char *src;
			int error, atomic = 1;
L
Linus Torvalds 已提交
546 547 548 549 550 551 552

			if (!page) {
				page = alloc_page(GFP_HIGHUSER);
				if (unlikely(!page)) {
					ret = ret ? : -ENOMEM;
					break;
				}
553
				pipe->tmp_page = page;
L
Linus Torvalds 已提交
554
			}
555
			/* Always wake up, even if the copy fails. Otherwise
L
Linus Torvalds 已提交
556 557 558 559 560 561 562 563 564
			 * we lock up (O_NONBLOCK-)readers that sleep due to
			 * syscall merging.
			 * FIXME! Is this really true?
			 */
			do_wakeup = 1;
			chars = PAGE_SIZE;
			if (chars > total_len)
				chars = total_len;

565 566 567 568 569 570 571 572 573 574 575 576 577 578
			iov_fault_in_pages_read(iov, chars);
redo2:
			if (atomic)
				src = kmap_atomic(page, KM_USER0);
			else
				src = kmap(page);

			error = pipe_iov_copy_from_user(src, iov, chars,
							atomic);
			if (atomic)
				kunmap_atomic(src, KM_USER0);
			else
				kunmap(page);

L
Linus Torvalds 已提交
579
			if (unlikely(error)) {
580 581 582 583
				if (atomic) {
					atomic = 0;
					goto redo2;
				}
584
				if (!ret)
585
					ret = error;
L
Linus Torvalds 已提交
586 587 588 589 590 591 592 593 594
				break;
			}
			ret += chars;

			/* Insert it into the buffer array */
			buf->page = page;
			buf->ops = &anon_pipe_buf_ops;
			buf->offset = 0;
			buf->len = chars;
595 596
			pipe->nrbufs = ++bufs;
			pipe->tmp_page = NULL;
L
Linus Torvalds 已提交
597 598 599 600 601

			total_len -= chars;
			if (!total_len)
				break;
		}
602
		if (bufs < pipe->buffers)
L
Linus Torvalds 已提交
603 604
			continue;
		if (filp->f_flags & O_NONBLOCK) {
605 606
			if (!ret)
				ret = -EAGAIN;
L
Linus Torvalds 已提交
607 608 609
			break;
		}
		if (signal_pending(current)) {
610 611
			if (!ret)
				ret = -ERESTARTSYS;
L
Linus Torvalds 已提交
612 613 614
			break;
		}
		if (do_wakeup) {
615 616
			wake_up_interruptible_sync(&pipe->wait);
			kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN);
L
Linus Torvalds 已提交
617 618
			do_wakeup = 0;
		}
619 620 621
		pipe->waiting_writers++;
		pipe_wait(pipe);
		pipe->waiting_writers--;
L
Linus Torvalds 已提交
622 623
	}
out:
624
	mutex_unlock(&inode->i_mutex);
L
Linus Torvalds 已提交
625
	if (do_wakeup) {
I
Ingo Molnar 已提交
626
		wake_up_interruptible_sync(&pipe->wait);
627
		kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN);
L
Linus Torvalds 已提交
628 629
	}
	if (ret > 0)
630
		file_update_time(filp);
L
Linus Torvalds 已提交
631 632 633 634 635 636 637 638 639 640
	return ret;
}

static ssize_t
bad_pipe_r(struct file *filp, char __user *buf, size_t count, loff_t *ppos)
{
	return -EBADF;
}

static ssize_t
641 642
bad_pipe_w(struct file *filp, const char __user *buf, size_t count,
	   loff_t *ppos)
L
Linus Torvalds 已提交
643 644 645 646
{
	return -EBADF;
}

647
static long pipe_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
L
Linus Torvalds 已提交
648
{
649
	struct inode *inode = filp->f_path.dentry->d_inode;
650
	struct pipe_inode_info *pipe;
L
Linus Torvalds 已提交
651 652 653 654
	int count, buf, nrbufs;

	switch (cmd) {
		case FIONREAD:
655
			mutex_lock(&inode->i_mutex);
656
			pipe = inode->i_pipe;
L
Linus Torvalds 已提交
657
			count = 0;
658 659
			buf = pipe->curbuf;
			nrbufs = pipe->nrbufs;
L
Linus Torvalds 已提交
660
			while (--nrbufs >= 0) {
661
				count += pipe->bufs[buf].len;
662
				buf = (buf+1) & (pipe->buffers - 1);
L
Linus Torvalds 已提交
663
			}
664
			mutex_unlock(&inode->i_mutex);
665

L
Linus Torvalds 已提交
666 667 668 669 670 671 672 673 674 675 676
			return put_user(count, (int __user *)arg);
		default:
			return -EINVAL;
	}
}

/* No kernel lock held - fine */
static unsigned int
pipe_poll(struct file *filp, poll_table *wait)
{
	unsigned int mask;
677
	struct inode *inode = filp->f_path.dentry->d_inode;
678
	struct pipe_inode_info *pipe = inode->i_pipe;
L
Linus Torvalds 已提交
679 680
	int nrbufs;

681
	poll_wait(filp, &pipe->wait, wait);
L
Linus Torvalds 已提交
682 683

	/* Reading only -- no need for acquiring the semaphore.  */
684
	nrbufs = pipe->nrbufs;
L
Linus Torvalds 已提交
685 686 687
	mask = 0;
	if (filp->f_mode & FMODE_READ) {
		mask = (nrbufs > 0) ? POLLIN | POLLRDNORM : 0;
688
		if (!pipe->writers && filp->f_version != pipe->w_counter)
L
Linus Torvalds 已提交
689 690 691 692
			mask |= POLLHUP;
	}

	if (filp->f_mode & FMODE_WRITE) {
693
		mask |= (nrbufs < pipe->buffers) ? POLLOUT | POLLWRNORM : 0;
694 695 696 697
		/*
		 * Most Unices do not set POLLERR for FIFOs but on Linux they
		 * behave exactly like pipes for poll().
		 */
698
		if (!pipe->readers)
L
Linus Torvalds 已提交
699 700 701 702 703 704 705 706 707
			mask |= POLLERR;
	}

	return mask;
}

static int
pipe_release(struct inode *inode, int decr, int decw)
{
708 709
	struct pipe_inode_info *pipe;

710
	mutex_lock(&inode->i_mutex);
711 712 713
	pipe = inode->i_pipe;
	pipe->readers -= decr;
	pipe->writers -= decw;
714

715
	if (!pipe->readers && !pipe->writers) {
L
Linus Torvalds 已提交
716 717
		free_pipe_info(inode);
	} else {
I
Ingo Molnar 已提交
718
		wake_up_interruptible_sync(&pipe->wait);
719 720
		kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN);
		kill_fasync(&pipe->fasync_writers, SIGIO, POLL_OUT);
L
Linus Torvalds 已提交
721
	}
722
	mutex_unlock(&inode->i_mutex);
L
Linus Torvalds 已提交
723 724 725 726 727 728 729

	return 0;
}

static int
pipe_read_fasync(int fd, struct file *filp, int on)
{
730
	struct inode *inode = filp->f_path.dentry->d_inode;
L
Linus Torvalds 已提交
731 732
	int retval;

733 734 735
	mutex_lock(&inode->i_mutex);
	retval = fasync_helper(fd, filp, on, &inode->i_pipe->fasync_readers);
	mutex_unlock(&inode->i_mutex);
L
Linus Torvalds 已提交
736

737
	return retval;
L
Linus Torvalds 已提交
738 739 740 741 742 743
}


static int
pipe_write_fasync(int fd, struct file *filp, int on)
{
744
	struct inode *inode = filp->f_path.dentry->d_inode;
L
Linus Torvalds 已提交
745 746
	int retval;

747 748 749
	mutex_lock(&inode->i_mutex);
	retval = fasync_helper(fd, filp, on, &inode->i_pipe->fasync_writers);
	mutex_unlock(&inode->i_mutex);
L
Linus Torvalds 已提交
750

751
	return retval;
L
Linus Torvalds 已提交
752 753 754 755 756 757
}


static int
pipe_rdwr_fasync(int fd, struct file *filp, int on)
{
758
	struct inode *inode = filp->f_path.dentry->d_inode;
759
	struct pipe_inode_info *pipe = inode->i_pipe;
L
Linus Torvalds 已提交
760 761
	int retval;

762
	mutex_lock(&inode->i_mutex);
763
	retval = fasync_helper(fd, filp, on, &pipe->fasync_readers);
764
	if (retval >= 0) {
765
		retval = fasync_helper(fd, filp, on, &pipe->fasync_writers);
766 767 768
		if (retval < 0) /* this can happen only if on == T */
			fasync_helper(-1, filp, 0, &pipe->fasync_readers);
	}
769
	mutex_unlock(&inode->i_mutex);
770
	return retval;
L
Linus Torvalds 已提交
771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798
}


static int
pipe_read_release(struct inode *inode, struct file *filp)
{
	return pipe_release(inode, 1, 0);
}

static int
pipe_write_release(struct inode *inode, struct file *filp)
{
	return pipe_release(inode, 0, 1);
}

static int
pipe_rdwr_release(struct inode *inode, struct file *filp)
{
	int decr, decw;

	decr = (filp->f_mode & FMODE_READ) != 0;
	decw = (filp->f_mode & FMODE_WRITE) != 0;
	return pipe_release(inode, decr, decw);
}

static int
pipe_read_open(struct inode *inode, struct file *filp)
{
E
Earl Chew 已提交
799 800
	int ret = -ENOENT;

801
	mutex_lock(&inode->i_mutex);
E
Earl Chew 已提交
802 803 804 805 806 807

	if (inode->i_pipe) {
		ret = 0;
		inode->i_pipe->readers++;
	}

808
	mutex_unlock(&inode->i_mutex);
L
Linus Torvalds 已提交
809

E
Earl Chew 已提交
810
	return ret;
L
Linus Torvalds 已提交
811 812 813 814 815
}

static int
pipe_write_open(struct inode *inode, struct file *filp)
{
E
Earl Chew 已提交
816 817
	int ret = -ENOENT;

818
	mutex_lock(&inode->i_mutex);
E
Earl Chew 已提交
819 820 821 822 823 824

	if (inode->i_pipe) {
		ret = 0;
		inode->i_pipe->writers++;
	}

825
	mutex_unlock(&inode->i_mutex);
L
Linus Torvalds 已提交
826

E
Earl Chew 已提交
827
	return ret;
L
Linus Torvalds 已提交
828 829 830 831 832
}

static int
pipe_rdwr_open(struct inode *inode, struct file *filp)
{
E
Earl Chew 已提交
833 834
	int ret = -ENOENT;

835
	mutex_lock(&inode->i_mutex);
E
Earl Chew 已提交
836 837 838 839 840 841 842 843 844

	if (inode->i_pipe) {
		ret = 0;
		if (filp->f_mode & FMODE_READ)
			inode->i_pipe->readers++;
		if (filp->f_mode & FMODE_WRITE)
			inode->i_pipe->writers++;
	}

845
	mutex_unlock(&inode->i_mutex);
L
Linus Torvalds 已提交
846

E
Earl Chew 已提交
847
	return ret;
L
Linus Torvalds 已提交
848 849 850 851 852
}

/*
 * The file_operations structs are not static because they
 * are also used in linux/fs/fifo.c to do operations on FIFOs.
853 854
 *
 * Pipes reuse fifos' file_operations structs.
L
Linus Torvalds 已提交
855
 */
856
const struct file_operations read_pipefifo_fops = {
L
Linus Torvalds 已提交
857
	.llseek		= no_llseek,
858 859
	.read		= do_sync_read,
	.aio_read	= pipe_read,
L
Linus Torvalds 已提交
860 861
	.write		= bad_pipe_w,
	.poll		= pipe_poll,
862
	.unlocked_ioctl	= pipe_ioctl,
L
Linus Torvalds 已提交
863 864 865 866 867
	.open		= pipe_read_open,
	.release	= pipe_read_release,
	.fasync		= pipe_read_fasync,
};

868
const struct file_operations write_pipefifo_fops = {
L
Linus Torvalds 已提交
869 870
	.llseek		= no_llseek,
	.read		= bad_pipe_r,
871 872
	.write		= do_sync_write,
	.aio_write	= pipe_write,
L
Linus Torvalds 已提交
873
	.poll		= pipe_poll,
874
	.unlocked_ioctl	= pipe_ioctl,
L
Linus Torvalds 已提交
875 876 877 878 879
	.open		= pipe_write_open,
	.release	= pipe_write_release,
	.fasync		= pipe_write_fasync,
};

880
const struct file_operations rdwr_pipefifo_fops = {
L
Linus Torvalds 已提交
881
	.llseek		= no_llseek,
882 883 884 885
	.read		= do_sync_read,
	.aio_read	= pipe_read,
	.write		= do_sync_write,
	.aio_write	= pipe_write,
L
Linus Torvalds 已提交
886
	.poll		= pipe_poll,
887
	.unlocked_ioctl	= pipe_ioctl,
L
Linus Torvalds 已提交
888 889 890 891 892
	.open		= pipe_rdwr_open,
	.release	= pipe_rdwr_release,
	.fasync		= pipe_rdwr_fasync,
};

893 894
struct pipe_inode_info * alloc_pipe_info(struct inode *inode)
{
895
	struct pipe_inode_info *pipe;
896

897 898
	pipe = kzalloc(sizeof(struct pipe_inode_info), GFP_KERNEL);
	if (pipe) {
899 900 901 902 903 904 905 906 907
		pipe->bufs = kzalloc(sizeof(struct pipe_buffer) * PIPE_DEF_BUFFERS, GFP_KERNEL);
		if (pipe->bufs) {
			init_waitqueue_head(&pipe->wait);
			pipe->r_counter = pipe->w_counter = 1;
			pipe->inode = inode;
			pipe->buffers = PIPE_DEF_BUFFERS;
			return pipe;
		}
		kfree(pipe);
908 909
	}

910
	return NULL;
911 912
}

913
void __free_pipe_info(struct pipe_inode_info *pipe)
L
Linus Torvalds 已提交
914 915 916
{
	int i;

917
	for (i = 0; i < pipe->buffers; i++) {
918
		struct pipe_buffer *buf = pipe->bufs + i;
L
Linus Torvalds 已提交
919
		if (buf->ops)
920
			buf->ops->release(pipe, buf);
L
Linus Torvalds 已提交
921
	}
922 923
	if (pipe->tmp_page)
		__free_page(pipe->tmp_page);
924
	kfree(pipe->bufs);
925
	kfree(pipe);
L
Linus Torvalds 已提交
926 927
}

928 929 930 931 932 933
void free_pipe_info(struct inode *inode)
{
	__free_pipe_info(inode->i_pipe);
	inode->i_pipe = NULL;
}

934
static struct vfsmount *pipe_mnt __read_mostly;
935

936 937 938 939 940 941 942 943 944
/*
 * pipefs_dname() is called from d_path().
 */
static char *pipefs_dname(struct dentry *dentry, char *buffer, int buflen)
{
	return dynamic_dname(dentry, buffer, buflen, "pipe:[%lu]",
				dentry->d_inode->i_ino);
}

A
Al Viro 已提交
945
static const struct dentry_operations pipefs_dentry_operations = {
946
	.d_dname	= pipefs_dname,
L
Linus Torvalds 已提交
947 948 949 950 951
};

static struct inode * get_pipe_inode(void)
{
	struct inode *inode = new_inode(pipe_mnt->mnt_sb);
952
	struct pipe_inode_info *pipe;
L
Linus Torvalds 已提交
953 954 955 956

	if (!inode)
		goto fail_inode;

957 958
	inode->i_ino = get_next_ino();

959 960
	pipe = alloc_pipe_info(inode);
	if (!pipe)
L
Linus Torvalds 已提交
961
		goto fail_iput;
962
	inode->i_pipe = pipe;
963

964
	pipe->readers = pipe->writers = 1;
965
	inode->i_fop = &rdwr_pipefifo_fops;
L
Linus Torvalds 已提交
966 967 968 969 970 971 972 973 974

	/*
	 * Mark the inode dirty from the very beginning,
	 * that way it will never be moved to the dirty
	 * list because "mark_inode_dirty()" will think
	 * that it already _is_ on the dirty list.
	 */
	inode->i_state = I_DIRTY;
	inode->i_mode = S_IFIFO | S_IRUSR | S_IWUSR;
975 976
	inode->i_uid = current_fsuid();
	inode->i_gid = current_fsgid();
L
Linus Torvalds 已提交
977
	inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;
978

L
Linus Torvalds 已提交
979 980 981 982
	return inode;

fail_iput:
	iput(inode);
983

L
Linus Torvalds 已提交
984 985 986 987
fail_inode:
	return NULL;
}

988
struct file *create_write_pipe(int flags)
L
Linus Torvalds 已提交
989
{
A
Andi Kleen 已提交
990 991 992
	int err;
	struct inode *inode;
	struct file *f;
993
	struct path path;
994
	struct qstr name = { .name = "" };
L
Linus Torvalds 已提交
995

A
Andi Kleen 已提交
996
	err = -ENFILE;
L
Linus Torvalds 已提交
997 998
	inode = get_pipe_inode();
	if (!inode)
D
Dave Hansen 已提交
999
		goto err;
L
Linus Torvalds 已提交
1000

A
Andi Kleen 已提交
1001
	err = -ENOMEM;
1002
	path.dentry = d_alloc_pseudo(pipe_mnt->mnt_sb, &name);
1003
	if (!path.dentry)
A
Andi Kleen 已提交
1004
		goto err_inode;
1005
	path.mnt = mntget(pipe_mnt);
1006

1007
	d_set_d_op(path.dentry, &pipefs_dentry_operations);
1008
	d_instantiate(path.dentry, inode);
D
Dave Hansen 已提交
1009 1010

	err = -ENFILE;
1011
	f = alloc_file(&path, FMODE_WRITE, &write_pipefifo_fops);
D
Dave Hansen 已提交
1012 1013
	if (!f)
		goto err_dentry;
A
Andi Kleen 已提交
1014
	f->f_mapping = inode->i_mapping;
1015

1016
	f->f_flags = O_WRONLY | (flags & O_NONBLOCK);
A
Andi Kleen 已提交
1017 1018 1019
	f->f_version = 0;

	return f;
L
Linus Torvalds 已提交
1020

D
Dave Hansen 已提交
1021
 err_dentry:
1022
	free_pipe_info(inode);
1023
	path_put(&path);
1024 1025
	return ERR_PTR(err);

A
Andi Kleen 已提交
1026
 err_inode:
L
Linus Torvalds 已提交
1027 1028
	free_pipe_info(inode);
	iput(inode);
D
Dave Hansen 已提交
1029
 err:
A
Andi Kleen 已提交
1030 1031 1032 1033 1034
	return ERR_PTR(err);
}

void free_write_pipe(struct file *f)
{
1035
	free_pipe_info(f->f_dentry->d_inode);
1036
	path_put(&f->f_path);
A
Andi Kleen 已提交
1037 1038 1039
	put_filp(f);
}

1040
struct file *create_read_pipe(struct file *wrf, int flags)
A
Andi Kleen 已提交
1041
{
1042 1043 1044
	/* Grab pipe from the writer */
	struct file *f = alloc_file(&wrf->f_path, FMODE_READ,
				    &read_pipefifo_fops);
A
Andi Kleen 已提交
1045 1046 1047
	if (!f)
		return ERR_PTR(-ENFILE);

1048
	path_get(&wrf->f_path);
1049
	f->f_flags = O_RDONLY | (flags & O_NONBLOCK);
A
Andi Kleen 已提交
1050 1051 1052 1053

	return f;
}

U
Ulrich Drepper 已提交
1054
int do_pipe_flags(int *fd, int flags)
A
Andi Kleen 已提交
1055 1056 1057 1058 1059
{
	struct file *fw, *fr;
	int error;
	int fdw, fdr;

1060
	if (flags & ~(O_CLOEXEC | O_NONBLOCK))
U
Ulrich Drepper 已提交
1061 1062
		return -EINVAL;

1063
	fw = create_write_pipe(flags);
A
Andi Kleen 已提交
1064 1065
	if (IS_ERR(fw))
		return PTR_ERR(fw);
1066
	fr = create_read_pipe(fw, flags);
A
Andi Kleen 已提交
1067 1068 1069 1070
	error = PTR_ERR(fr);
	if (IS_ERR(fr))
		goto err_write_pipe;

U
Ulrich Drepper 已提交
1071
	error = get_unused_fd_flags(flags);
A
Andi Kleen 已提交
1072 1073 1074 1075
	if (error < 0)
		goto err_read_pipe;
	fdr = error;

U
Ulrich Drepper 已提交
1076
	error = get_unused_fd_flags(flags);
A
Andi Kleen 已提交
1077 1078 1079 1080
	if (error < 0)
		goto err_fdr;
	fdw = error;

A
Al Viro 已提交
1081
	audit_fd_pair(fdr, fdw);
A
Andi Kleen 已提交
1082 1083 1084 1085 1086 1087 1088 1089 1090 1091
	fd_install(fdr, fr);
	fd_install(fdw, fw);
	fd[0] = fdr;
	fd[1] = fdw;

	return 0;

 err_fdr:
	put_unused_fd(fdr);
 err_read_pipe:
1092
	path_put(&fr->f_path);
A
Andi Kleen 已提交
1093 1094 1095 1096
	put_filp(fr);
 err_write_pipe:
	free_write_pipe(fw);
	return error;
L
Linus Torvalds 已提交
1097 1098
}

1099 1100 1101 1102
/*
 * sys_pipe() is the normal C calling standard for creating
 * a pipe. It's not the way Unix traditionally does this, though.
 */
1103
SYSCALL_DEFINE2(pipe2, int __user *, fildes, int, flags)
1104 1105 1106 1107
{
	int fd[2];
	int error;

U
Ulrich Drepper 已提交
1108
	error = do_pipe_flags(fd, flags);
1109
	if (!error) {
1110 1111 1112
		if (copy_to_user(fildes, fd, sizeof(fd))) {
			sys_close(fd[0]);
			sys_close(fd[1]);
1113
			error = -EFAULT;
1114
		}
1115 1116 1117 1118
	}
	return error;
}

1119
SYSCALL_DEFINE1(pipe, int __user *, fildes)
U
Ulrich Drepper 已提交
1120 1121 1122 1123
{
	return sys_pipe2(fildes, 0);
}

1124 1125 1126 1127
/*
 * Allocate a new array of pipe buffers and copy the info over. Returns the
 * pipe size if successful, or return -ERROR on error.
 */
1128
static long pipe_set_size(struct pipe_inode_info *pipe, unsigned long nr_pages)
1129 1130 1131 1132 1133 1134 1135 1136 1137
{
	struct pipe_buffer *bufs;

	/*
	 * We can shrink the pipe, if arg >= pipe->nrbufs. Since we don't
	 * expect a lot of shrink+grow operations, just free and allocate
	 * again like we would do for growing. If the pipe currently
	 * contains more buffers than arg, then return busy.
	 */
1138
	if (nr_pages < pipe->nrbufs)
1139 1140
		return -EBUSY;

1141
	bufs = kcalloc(nr_pages, sizeof(struct pipe_buffer), GFP_KERNEL);
1142 1143 1144 1145 1146 1147 1148 1149
	if (unlikely(!bufs))
		return -ENOMEM;

	/*
	 * The pipe array wraps around, so just start the new one at zero
	 * and adjust the indexes.
	 */
	if (pipe->nrbufs) {
M
Miklos Szeredi 已提交
1150 1151
		unsigned int tail;
		unsigned int head;
1152

M
Miklos Szeredi 已提交
1153 1154 1155 1156 1157 1158 1159
		tail = pipe->curbuf + pipe->nrbufs;
		if (tail < pipe->buffers)
			tail = 0;
		else
			tail &= (pipe->buffers - 1);

		head = pipe->nrbufs - tail;
1160 1161 1162
		if (head)
			memcpy(bufs, pipe->bufs + pipe->curbuf, head * sizeof(struct pipe_buffer));
		if (tail)
M
Miklos Szeredi 已提交
1163
			memcpy(bufs + head, pipe->bufs, tail * sizeof(struct pipe_buffer));
1164 1165 1166 1167 1168
	}

	pipe->curbuf = 0;
	kfree(pipe->bufs);
	pipe->bufs = bufs;
1169 1170
	pipe->buffers = nr_pages;
	return nr_pages * PAGE_SIZE;
1171 1172
}

1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201
/*
 * Currently we rely on the pipe array holding a power-of-2 number
 * of pages.
 */
static inline unsigned int round_pipe_size(unsigned int size)
{
	unsigned long nr_pages;

	nr_pages = (size + PAGE_SIZE - 1) >> PAGE_SHIFT;
	return roundup_pow_of_two(nr_pages) << PAGE_SHIFT;
}

/*
 * This should work even if CONFIG_PROC_FS isn't set, as proc_dointvec_minmax
 * will return an error.
 */
int pipe_proc_fn(struct ctl_table *table, int write, void __user *buf,
		 size_t *lenp, loff_t *ppos)
{
	int ret;

	ret = proc_dointvec_minmax(table, write, buf, lenp, ppos);
	if (ret < 0 || !write)
		return ret;

	pipe_max_size = round_pipe_size(pipe_max_size);
	return ret;
}

1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213
/*
 * After the inode slimming patch, i_pipe/i_bdev/i_cdev share the same
 * location, so checking ->i_pipe is not enough to verify that this is a
 * pipe.
 */
struct pipe_inode_info *get_pipe_info(struct file *file)
{
	struct inode *i = file->f_path.dentry->d_inode;

	return S_ISFIFO(i->i_mode) ? i->i_pipe : NULL;
}

1214 1215 1216 1217 1218
long pipe_fcntl(struct file *file, unsigned int cmd, unsigned long arg)
{
	struct pipe_inode_info *pipe;
	long ret;

1219
	pipe = get_pipe_info(file);
1220 1221 1222 1223 1224 1225
	if (!pipe)
		return -EBADF;

	mutex_lock(&pipe->inode->i_mutex);

	switch (cmd) {
1226
	case F_SETPIPE_SZ: {
1227
		unsigned int size, nr_pages;
1228

1229 1230
		size = round_pipe_size(arg);
		nr_pages = size >> PAGE_SHIFT;
1231

1232 1233 1234 1235
		ret = -EINVAL;
		if (!nr_pages)
			goto out;

1236
		if (!capable(CAP_SYS_RESOURCE) && size > pipe_max_size) {
1237
			ret = -EPERM;
J
Julia Lawall 已提交
1238 1239
			goto out;
		}
1240
		ret = pipe_set_size(pipe, nr_pages);
1241
		break;
1242
		}
1243
	case F_GETPIPE_SZ:
1244
		ret = pipe->buffers * PAGE_SIZE;
1245 1246 1247 1248 1249 1250
		break;
	default:
		ret = -EINVAL;
		break;
	}

J
Julia Lawall 已提交
1251
out:
1252 1253 1254 1255
	mutex_unlock(&pipe->inode->i_mutex);
	return ret;
}

1256 1257 1258 1259
static const struct super_operations pipefs_ops = {
	.destroy_inode = free_inode_nonrcu,
};

L
Linus Torvalds 已提交
1260 1261 1262 1263 1264 1265
/*
 * pipefs should _never_ be mounted by userland - too much of security hassle,
 * no real gain from having the whole whorehouse mounted. So we don't need
 * any operations on the root directory. However, we need a non-trivial
 * d_name - pipe: will go nicely and kill the special-casing in procfs.
 */
A
Al Viro 已提交
1266 1267
static struct dentry *pipefs_mount(struct file_system_type *fs_type,
			 int flags, const char *dev_name, void *data)
L
Linus Torvalds 已提交
1268
{
1269
	return mount_pseudo(fs_type, "pipe:", &pipefs_ops, PIPEFS_MAGIC);
L
Linus Torvalds 已提交
1270 1271 1272 1273
}

static struct file_system_type pipe_fs_type = {
	.name		= "pipefs",
A
Al Viro 已提交
1274
	.mount		= pipefs_mount,
L
Linus Torvalds 已提交
1275 1276 1277 1278 1279 1280
	.kill_sb	= kill_anon_super,
};

static int __init init_pipe_fs(void)
{
	int err = register_filesystem(&pipe_fs_type);
1281

L
Linus Torvalds 已提交
1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299
	if (!err) {
		pipe_mnt = kern_mount(&pipe_fs_type);
		if (IS_ERR(pipe_mnt)) {
			err = PTR_ERR(pipe_mnt);
			unregister_filesystem(&pipe_fs_type);
		}
	}
	return err;
}

static void __exit exit_pipe_fs(void)
{
	unregister_filesystem(&pipe_fs_type);
	mntput(pipe_mnt);
}

fs_initcall(init_pipe_fs);
module_exit(exit_pipe_fs);