timerfd.c 6.8 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22
/*
 *  fs/timerfd.c
 *
 *  Copyright (C) 2007  Davide Libenzi <davidel@xmailserver.org>
 *
 *
 *  Thanks to Thomas Gleixner for code reviews and useful comments.
 *
 */

#include <linux/file.h>
#include <linux/poll.h>
#include <linux/init.h>
#include <linux/fs.h>
#include <linux/sched.h>
#include <linux/kernel.h>
#include <linux/list.h>
#include <linux/spinlock.h>
#include <linux/time.h>
#include <linux/hrtimer.h>
#include <linux/anon_inodes.h>
#include <linux/timerfd.h>
23
#include <linux/syscalls.h>
24 25 26 27 28

struct timerfd_ctx {
	struct hrtimer tmr;
	ktime_t tintv;
	wait_queue_head_t wqh;
D
Davide Libenzi 已提交
29
	u64 ticks;
30
	int expired;
D
Davide Libenzi 已提交
31
	int clockid;
32 33 34 35 36
};

/*
 * This gets called when the timer event triggers. We set the "expired"
 * flag, but we do not re-arm the timer (in case it's necessary,
D
Davide Libenzi 已提交
37
 * tintv.tv64 != 0) until the timer is accessed.
38 39 40 41 42 43
 */
static enum hrtimer_restart timerfd_tmrproc(struct hrtimer *htmr)
{
	struct timerfd_ctx *ctx = container_of(htmr, struct timerfd_ctx, tmr);
	unsigned long flags;

D
Davide Libenzi 已提交
44
	spin_lock_irqsave(&ctx->wqh.lock, flags);
45
	ctx->expired = 1;
D
Davide Libenzi 已提交
46
	ctx->ticks++;
47
	wake_up_locked(&ctx->wqh);
D
Davide Libenzi 已提交
48
	spin_unlock_irqrestore(&ctx->wqh.lock, flags);
49 50 51 52

	return HRTIMER_NORESTART;
}

D
Davide Libenzi 已提交
53 54
static ktime_t timerfd_get_remaining(struct timerfd_ctx *ctx)
{
55
	ktime_t remaining;
D
Davide Libenzi 已提交
56

57
	remaining = hrtimer_expires_remaining(&ctx->tmr);
D
Davide Libenzi 已提交
58 59 60 61
	return remaining.tv64 < 0 ? ktime_set(0, 0): remaining;
}

static void timerfd_setup(struct timerfd_ctx *ctx, int flags,
62 63 64 65 66 67 68 69 70 71
			  const struct itimerspec *ktmr)
{
	enum hrtimer_mode htmode;
	ktime_t texp;

	htmode = (flags & TFD_TIMER_ABSTIME) ?
		HRTIMER_MODE_ABS: HRTIMER_MODE_REL;

	texp = timespec_to_ktime(ktmr->it_value);
	ctx->expired = 0;
D
Davide Libenzi 已提交
72
	ctx->ticks = 0;
73
	ctx->tintv = timespec_to_ktime(ktmr->it_interval);
D
Davide Libenzi 已提交
74
	hrtimer_init(&ctx->tmr, ctx->clockid, htmode);
75
	hrtimer_set_expires(&ctx->tmr, texp);
76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97
	ctx->tmr.function = timerfd_tmrproc;
	if (texp.tv64 != 0)
		hrtimer_start(&ctx->tmr, texp, htmode);
}

static int timerfd_release(struct inode *inode, struct file *file)
{
	struct timerfd_ctx *ctx = file->private_data;

	hrtimer_cancel(&ctx->tmr);
	kfree(ctx);
	return 0;
}

static unsigned int timerfd_poll(struct file *file, poll_table *wait)
{
	struct timerfd_ctx *ctx = file->private_data;
	unsigned int events = 0;
	unsigned long flags;

	poll_wait(file, &ctx->wqh, wait);

D
Davide Libenzi 已提交
98
	spin_lock_irqsave(&ctx->wqh.lock, flags);
D
Davide Libenzi 已提交
99
	if (ctx->ticks)
100
		events |= POLLIN;
D
Davide Libenzi 已提交
101
	spin_unlock_irqrestore(&ctx->wqh.lock, flags);
102 103 104 105 106 107 108 109 110

	return events;
}

static ssize_t timerfd_read(struct file *file, char __user *buf, size_t count,
			    loff_t *ppos)
{
	struct timerfd_ctx *ctx = file->private_data;
	ssize_t res;
111
	u64 ticks = 0;
112 113 114 115
	DECLARE_WAITQUEUE(wait, current);

	if (count < sizeof(ticks))
		return -EINVAL;
D
Davide Libenzi 已提交
116
	spin_lock_irq(&ctx->wqh.lock);
117
	res = -EAGAIN;
D
Davide Libenzi 已提交
118
	if (!ctx->ticks && !(file->f_flags & O_NONBLOCK)) {
119 120 121
		__add_wait_queue(&ctx->wqh, &wait);
		for (res = 0;;) {
			set_current_state(TASK_INTERRUPTIBLE);
D
Davide Libenzi 已提交
122
			if (ctx->ticks) {
123 124 125 126 127 128 129
				res = 0;
				break;
			}
			if (signal_pending(current)) {
				res = -ERESTARTSYS;
				break;
			}
D
Davide Libenzi 已提交
130
			spin_unlock_irq(&ctx->wqh.lock);
131
			schedule();
D
Davide Libenzi 已提交
132
			spin_lock_irq(&ctx->wqh.lock);
133 134 135 136
		}
		__remove_wait_queue(&ctx->wqh, &wait);
		__set_current_state(TASK_RUNNING);
	}
D
Davide Libenzi 已提交
137 138 139
	if (ctx->ticks) {
		ticks = ctx->ticks;
		if (ctx->expired && ctx->tintv.tv64) {
140 141 142 143 144 145
			/*
			 * If tintv.tv64 != 0, this is a periodic timer that
			 * needs to be re-armed. We avoid doing it in the timer
			 * callback to avoid DoS attacks specifying a very
			 * short timer period.
			 */
D
Davide Libenzi 已提交
146 147
			ticks += hrtimer_forward_now(&ctx->tmr,
						     ctx->tintv) - 1;
148
			hrtimer_restart(&ctx->tmr);
D
Davide Libenzi 已提交
149 150 151
		}
		ctx->expired = 0;
		ctx->ticks = 0;
152
	}
D
Davide Libenzi 已提交
153
	spin_unlock_irq(&ctx->wqh.lock);
154
	if (ticks)
155
		res = put_user(ticks, (u64 __user *) buf) ? -EFAULT: sizeof(ticks);
156 157 158 159 160 161 162 163 164
	return res;
}

static const struct file_operations timerfd_fops = {
	.release	= timerfd_release,
	.poll		= timerfd_poll,
	.read		= timerfd_read,
};

D
Davide Libenzi 已提交
165 166 167 168 169 170 171 172 173 174 175 176 177 178 179
static struct file *timerfd_fget(int fd)
{
	struct file *file;

	file = fget(fd);
	if (!file)
		return ERR_PTR(-EBADF);
	if (file->f_op != &timerfd_fops) {
		fput(file);
		return ERR_PTR(-EINVAL);
	}

	return file;
}

180
SYSCALL_DEFINE2(timerfd_create, int, clockid, int, flags)
181
{
A
Al Viro 已提交
182
	int ufd;
183 184
	struct timerfd_ctx *ctx;

185 186 187 188
	/* Check the TFD_* constants for consistency.  */
	BUILD_BUG_ON(TFD_CLOEXEC != O_CLOEXEC);
	BUILD_BUG_ON(TFD_NONBLOCK != O_NONBLOCK);

189
	if (flags & ~(TFD_CLOEXEC | TFD_NONBLOCK))
D
Davide Libenzi 已提交
190
		return -EINVAL;
191 192 193
	if (clockid != CLOCK_MONOTONIC &&
	    clockid != CLOCK_REALTIME)
		return -EINVAL;
D
Davide Libenzi 已提交
194 195 196 197 198 199 200 201 202

	ctx = kzalloc(sizeof(*ctx), GFP_KERNEL);
	if (!ctx)
		return -ENOMEM;

	init_waitqueue_head(&ctx->wqh);
	ctx->clockid = clockid;
	hrtimer_init(&ctx->tmr, clockid, HRTIMER_MODE_ABS);

U
Ulrich Drepper 已提交
203
	ufd = anon_inode_getfd("[timerfd]", &timerfd_fops, ctx,
204
			       flags & (O_CLOEXEC | O_NONBLOCK));
A
Al Viro 已提交
205
	if (ufd < 0)
D
Davide Libenzi 已提交
206 207 208 209 210
		kfree(ctx);

	return ufd;
}

211 212 213
SYSCALL_DEFINE4(timerfd_settime, int, ufd, int, flags,
		const struct itimerspec __user *, utmr,
		struct itimerspec __user *, otmr)
D
Davide Libenzi 已提交
214 215 216 217 218 219 220 221
{
	struct file *file;
	struct timerfd_ctx *ctx;
	struct itimerspec ktmr, kotmr;

	if (copy_from_user(&ktmr, utmr, sizeof(ktmr)))
		return -EFAULT;

222 223 224 225
	if (!timespec_valid(&ktmr.it_value) ||
	    !timespec_valid(&ktmr.it_interval))
		return -EINVAL;

D
Davide Libenzi 已提交
226 227 228 229
	file = timerfd_fget(ufd);
	if (IS_ERR(file))
		return PTR_ERR(file);
	ctx = file->private_data;
230

D
Davide Libenzi 已提交
231 232 233 234 235 236 237 238
	/*
	 * We need to stop the existing timer before reprogramming
	 * it to the new values.
	 */
	for (;;) {
		spin_lock_irq(&ctx->wqh.lock);
		if (hrtimer_try_to_cancel(&ctx->tmr) >= 0)
			break;
D
Davide Libenzi 已提交
239
		spin_unlock_irq(&ctx->wqh.lock);
D
Davide Libenzi 已提交
240
		cpu_relax();
241 242
	}

D
Davide Libenzi 已提交
243 244 245 246 247 248 249 250
	/*
	 * If the timer is expired and it's periodic, we need to advance it
	 * because the caller may want to know the previous expiration time.
	 * We do not update "ticks" and "expired" since the timer will be
	 * re-programmed again in the following timerfd_setup() call.
	 */
	if (ctx->expired && ctx->tintv.tv64)
		hrtimer_forward_now(&ctx->tmr, ctx->tintv);
251

D
Davide Libenzi 已提交
252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291
	kotmr.it_value = ktime_to_timespec(timerfd_get_remaining(ctx));
	kotmr.it_interval = ktime_to_timespec(ctx->tintv);

	/*
	 * Re-program the timer to the new value ...
	 */
	timerfd_setup(ctx, flags, &ktmr);

	spin_unlock_irq(&ctx->wqh.lock);
	fput(file);
	if (otmr && copy_to_user(otmr, &kotmr, sizeof(kotmr)))
		return -EFAULT;

	return 0;
}

asmlinkage long sys_timerfd_gettime(int ufd, struct itimerspec __user *otmr)
{
	struct file *file;
	struct timerfd_ctx *ctx;
	struct itimerspec kotmr;

	file = timerfd_fget(ufd);
	if (IS_ERR(file))
		return PTR_ERR(file);
	ctx = file->private_data;

	spin_lock_irq(&ctx->wqh.lock);
	if (ctx->expired && ctx->tintv.tv64) {
		ctx->expired = 0;
		ctx->ticks +=
			hrtimer_forward_now(&ctx->tmr, ctx->tintv) - 1;
		hrtimer_restart(&ctx->tmr);
	}
	kotmr.it_value = ktime_to_timespec(timerfd_get_remaining(ctx));
	kotmr.it_interval = ktime_to_timespec(ctx->tintv);
	spin_unlock_irq(&ctx->wqh.lock);
	fput(file);

	return copy_to_user(otmr, &kotmr, sizeof(kotmr)) ? -EFAULT: 0;
292 293
}