signalfd.c 9.6 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13
/*
 *  fs/signalfd.c
 *
 *  Copyright (C) 2003  Linus Torvalds
 *
 *  Mon Mar 5, 2007: Davide Libenzi <davidel@xmailserver.org>
 *      Changed ->read() to return a siginfo strcture instead of signal number.
 *      Fixed locking in ->poll().
 *      Added sighand-detach notification.
 *      Added fd re-use in sys_signalfd() syscall.
 *      Now using anonymous inode source.
 *      Thanks to Oleg Nesterov for useful code review and suggestions.
 *      More comments and suggestions from Arnd Bergmann.
14 15
 * Sat May 19, 2007: Davi E. M. Arnaut <davi@haxent.com.br>
 *      Retrieve multiple signals with one read() call
16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135
 */

#include <linux/file.h>
#include <linux/poll.h>
#include <linux/init.h>
#include <linux/fs.h>
#include <linux/sched.h>
#include <linux/kernel.h>
#include <linux/signal.h>
#include <linux/list.h>
#include <linux/anon_inodes.h>
#include <linux/signalfd.h>

struct signalfd_ctx {
	struct list_head lnk;
	wait_queue_head_t wqh;
	sigset_t sigmask;
	struct task_struct *tsk;
};

struct signalfd_lockctx {
	struct task_struct *tsk;
	unsigned long flags;
};

/*
 * Tries to acquire the sighand lock. We do not increment the sighand
 * use count, and we do not even pin the task struct, so we need to
 * do it inside an RCU read lock, and we must be prepared for the
 * ctx->tsk going to NULL (in signalfd_deliver()), and for the sighand
 * being detached. We return 0 if the sighand has been detached, or
 * 1 if we were able to pin the sighand lock.
 */
static int signalfd_lock(struct signalfd_ctx *ctx, struct signalfd_lockctx *lk)
{
	struct sighand_struct *sighand = NULL;

	rcu_read_lock();
	lk->tsk = rcu_dereference(ctx->tsk);
	if (likely(lk->tsk != NULL))
		sighand = lock_task_sighand(lk->tsk, &lk->flags);
	rcu_read_unlock();

	if (sighand && !ctx->tsk) {
		unlock_task_sighand(lk->tsk, &lk->flags);
		sighand = NULL;
	}

	return sighand != NULL;
}

static void signalfd_unlock(struct signalfd_lockctx *lk)
{
	unlock_task_sighand(lk->tsk, &lk->flags);
}

/*
 * This must be called with the sighand lock held.
 */
void signalfd_deliver(struct task_struct *tsk, int sig)
{
	struct sighand_struct *sighand = tsk->sighand;
	struct signalfd_ctx *ctx, *tmp;

	BUG_ON(!sig);
	list_for_each_entry_safe(ctx, tmp, &sighand->signalfd_list, lnk) {
		/*
		 * We use a negative signal value as a way to broadcast that the
		 * sighand has been orphaned, so that we can notify all the
		 * listeners about this. Remember the ctx->sigmask is inverted,
		 * so if the user is interested in a signal, that corresponding
		 * bit will be zero.
		 */
		if (sig < 0) {
			if (ctx->tsk == tsk) {
				ctx->tsk = NULL;
				list_del_init(&ctx->lnk);
				wake_up(&ctx->wqh);
			}
		} else {
			if (!sigismember(&ctx->sigmask, sig))
				wake_up(&ctx->wqh);
		}
	}
}

static void signalfd_cleanup(struct signalfd_ctx *ctx)
{
	struct signalfd_lockctx lk;

	/*
	 * This is tricky. If the sighand is gone, we do not need to remove
	 * context from the list, the list itself won't be there anymore.
	 */
	if (signalfd_lock(ctx, &lk)) {
		list_del(&ctx->lnk);
		signalfd_unlock(&lk);
	}
	kfree(ctx);
}

static int signalfd_release(struct inode *inode, struct file *file)
{
	signalfd_cleanup(file->private_data);
	return 0;
}

static unsigned int signalfd_poll(struct file *file, poll_table *wait)
{
	struct signalfd_ctx *ctx = file->private_data;
	unsigned int events = 0;
	struct signalfd_lockctx lk;

	poll_wait(file, &ctx->wqh, wait);

	/*
	 * Let the caller get a POLLIN in this case, ala socket recv() when
	 * the peer disconnects.
	 */
	if (signalfd_lock(ctx, &lk)) {
136 137
		if ((lk.tsk == current &&
		     next_signal(&lk.tsk->pending, &ctx->sigmask) > 0) ||
138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211
		    next_signal(&lk.tsk->signal->shared_pending,
				&ctx->sigmask) > 0)
			events |= POLLIN;
		signalfd_unlock(&lk);
	} else
		events |= POLLIN;

	return events;
}

/*
 * Copied from copy_siginfo_to_user() in kernel/signal.c
 */
static int signalfd_copyinfo(struct signalfd_siginfo __user *uinfo,
			     siginfo_t const *kinfo)
{
	long err;

	BUILD_BUG_ON(sizeof(struct signalfd_siginfo) != 128);

	/*
	 * Unused memebers should be zero ...
	 */
	err = __clear_user(uinfo, sizeof(*uinfo));

	/*
	 * If you change siginfo_t structure, please be sure
	 * this code is fixed accordingly.
	 */
	err |= __put_user(kinfo->si_signo, &uinfo->signo);
	err |= __put_user(kinfo->si_errno, &uinfo->err);
	err |= __put_user((short)kinfo->si_code, &uinfo->code);
	switch (kinfo->si_code & __SI_MASK) {
	case __SI_KILL:
		err |= __put_user(kinfo->si_pid, &uinfo->pid);
		err |= __put_user(kinfo->si_uid, &uinfo->uid);
		break;
	case __SI_TIMER:
		 err |= __put_user(kinfo->si_tid, &uinfo->tid);
		 err |= __put_user(kinfo->si_overrun, &uinfo->overrun);
		 err |= __put_user((long)kinfo->si_ptr, &uinfo->svptr);
		break;
	case __SI_POLL:
		err |= __put_user(kinfo->si_band, &uinfo->band);
		err |= __put_user(kinfo->si_fd, &uinfo->fd);
		break;
	case __SI_FAULT:
		err |= __put_user((long)kinfo->si_addr, &uinfo->addr);
#ifdef __ARCH_SI_TRAPNO
		err |= __put_user(kinfo->si_trapno, &uinfo->trapno);
#endif
		break;
	case __SI_CHLD:
		err |= __put_user(kinfo->si_pid, &uinfo->pid);
		err |= __put_user(kinfo->si_uid, &uinfo->uid);
		err |= __put_user(kinfo->si_status, &uinfo->status);
		err |= __put_user(kinfo->si_utime, &uinfo->utime);
		err |= __put_user(kinfo->si_stime, &uinfo->stime);
		break;
	case __SI_RT: /* This is not generated by the kernel as of now. */
	case __SI_MESGQ: /* But this is */
		err |= __put_user(kinfo->si_pid, &uinfo->pid);
		err |= __put_user(kinfo->si_uid, &uinfo->uid);
		err |= __put_user((long)kinfo->si_ptr, &uinfo->svptr);
		break;
	default: /* this is just in case for now ... */
		err |= __put_user(kinfo->si_pid, &uinfo->pid);
		err |= __put_user(kinfo->si_uid, &uinfo->uid);
		break;
	}

	return err ? -EFAULT: sizeof(*uinfo);
}

212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264
static ssize_t signalfd_dequeue(struct signalfd_ctx *ctx, siginfo_t *info,
				int nonblock)
{
	ssize_t ret;
	struct signalfd_lockctx lk;
	DECLARE_WAITQUEUE(wait, current);

	if (!signalfd_lock(ctx, &lk))
		return 0;

	ret = dequeue_signal(lk.tsk, &ctx->sigmask, info);
	switch (ret) {
	case 0:
		if (!nonblock)
			break;
		ret = -EAGAIN;
	default:
		signalfd_unlock(&lk);
		return ret;
	}

	add_wait_queue(&ctx->wqh, &wait);
	for (;;) {
		set_current_state(TASK_INTERRUPTIBLE);
		ret = dequeue_signal(lk.tsk, &ctx->sigmask, info);
		signalfd_unlock(&lk);
		if (ret != 0)
			break;
		if (signal_pending(current)) {
			ret = -ERESTARTSYS;
			break;
		}
		schedule();
		ret = signalfd_lock(ctx, &lk);
		if (unlikely(!ret)) {
			/*
			 * Let the caller read zero byte, ala socket
			 * recv() when the peer disconnect. This test
			 * must be done before doing a dequeue_signal(),
			 * because if the sighand has been orphaned,
			 * the dequeue_signal() call is going to crash
			 * because ->sighand will be long gone.
			 */
			 break;
		}
	}

	remove_wait_queue(&ctx->wqh, &wait);
	__set_current_state(TASK_RUNNING);

	return ret;
}

265 266 267 268 269 270 271 272 273
/*
 * Returns either the size of a "struct signalfd_siginfo", or zero if the
 * sighand we are attached to, has been orphaned. The "count" parameter
 * must be at least the size of a "struct signalfd_siginfo".
 */
static ssize_t signalfd_read(struct file *file, char __user *buf, size_t count,
			     loff_t *ppos)
{
	struct signalfd_ctx *ctx = file->private_data;
274 275 276
	struct signalfd_siginfo __user *siginfo;
	int nonblock = file->f_flags & O_NONBLOCK;
	ssize_t ret, total = 0;
277 278
	siginfo_t info;

279 280
	count /= sizeof(struct signalfd_siginfo);
	if (!count)
281 282
		return -EINVAL;

283 284 285 286 287 288 289 290 291 292 293 294 295 296 297
	siginfo = (struct signalfd_siginfo __user *) buf;

	do {
		ret = signalfd_dequeue(ctx, &info, nonblock);
		if (unlikely(ret <= 0))
			break;
		ret = signalfd_copyinfo(siginfo, &info);
		if (ret < 0)
			break;
		siginfo++;
		total += ret;
		nonblock = 1;
	} while (--count);

	return total ? total : ret;
298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322
}

static const struct file_operations signalfd_fops = {
	.release	= signalfd_release,
	.poll		= signalfd_poll,
	.read		= signalfd_read,
};

/*
 * Create a file descriptor that is associated with our signal
 * state. We can pass it around to others if we want to, but
 * it will always be _our_ signal state.
 */
asmlinkage long sys_signalfd(int ufd, sigset_t __user *user_mask, size_t sizemask)
{
	int error;
	sigset_t sigmask;
	struct signalfd_ctx *ctx;
	struct sighand_struct *sighand;
	struct file *file;
	struct inode *inode;
	struct signalfd_lockctx lk;

	if (sizemask != sizeof(sigset_t) ||
	    copy_from_user(&sigmask, user_mask, sizeof(sigmask)))
U
Ulrich Drepper 已提交
323
		return -EINVAL;
324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380
	sigdelsetmask(&sigmask, sigmask(SIGKILL) | sigmask(SIGSTOP));
	signotset(&sigmask);

	if (ufd == -1) {
		ctx = kmalloc(sizeof(*ctx), GFP_KERNEL);
		if (!ctx)
			return -ENOMEM;

		init_waitqueue_head(&ctx->wqh);
		ctx->sigmask = sigmask;
		ctx->tsk = current;

		sighand = current->sighand;
		/*
		 * Add this fd to the list of signal listeners.
		 */
		spin_lock_irq(&sighand->siglock);
		list_add_tail(&ctx->lnk, &sighand->signalfd_list);
		spin_unlock_irq(&sighand->siglock);

		/*
		 * When we call this, the initialization must be complete, since
		 * anon_inode_getfd() will install the fd.
		 */
		error = anon_inode_getfd(&ufd, &inode, &file, "[signalfd]",
					 &signalfd_fops, ctx);
		if (error)
			goto err_fdalloc;
	} else {
		file = fget(ufd);
		if (!file)
			return -EBADF;
		ctx = file->private_data;
		if (file->f_op != &signalfd_fops) {
			fput(file);
			return -EINVAL;
		}
		/*
		 * We need to be prepared of the fact that the sighand this fd
		 * is attached to, has been detched. In that case signalfd_lock()
		 * will return 0, and we'll just skip setting the new mask.
		 */
		if (signalfd_lock(ctx, &lk)) {
			ctx->sigmask = sigmask;
			signalfd_unlock(&lk);
		}
		wake_up(&ctx->wqh);
		fput(file);
	}

	return ufd;

err_fdalloc:
	signalfd_cleanup(ctx);
	return error;
}