timerfd.c 6.7 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22
/*
 *  fs/timerfd.c
 *
 *  Copyright (C) 2007  Davide Libenzi <davidel@xmailserver.org>
 *
 *
 *  Thanks to Thomas Gleixner for code reviews and useful comments.
 *
 */

#include <linux/file.h>
#include <linux/poll.h>
#include <linux/init.h>
#include <linux/fs.h>
#include <linux/sched.h>
#include <linux/kernel.h>
#include <linux/list.h>
#include <linux/spinlock.h>
#include <linux/time.h>
#include <linux/hrtimer.h>
#include <linux/anon_inodes.h>
#include <linux/timerfd.h>
23
#include <linux/syscalls.h>
24 25 26 27 28

struct timerfd_ctx {
	struct hrtimer tmr;
	ktime_t tintv;
	wait_queue_head_t wqh;
D
Davide Libenzi 已提交
29
	u64 ticks;
30
	int expired;
D
Davide Libenzi 已提交
31
	int clockid;
32 33 34 35 36
};

/*
 * This gets called when the timer event triggers. We set the "expired"
 * flag, but we do not re-arm the timer (in case it's necessary,
D
Davide Libenzi 已提交
37
 * tintv.tv64 != 0) until the timer is accessed.
38 39 40 41 42 43
 */
static enum hrtimer_restart timerfd_tmrproc(struct hrtimer *htmr)
{
	struct timerfd_ctx *ctx = container_of(htmr, struct timerfd_ctx, tmr);
	unsigned long flags;

D
Davide Libenzi 已提交
44
	spin_lock_irqsave(&ctx->wqh.lock, flags);
45
	ctx->expired = 1;
D
Davide Libenzi 已提交
46
	ctx->ticks++;
47
	wake_up_locked(&ctx->wqh);
D
Davide Libenzi 已提交
48
	spin_unlock_irqrestore(&ctx->wqh.lock, flags);
49 50 51 52

	return HRTIMER_NORESTART;
}

D
Davide Libenzi 已提交
53 54 55 56 57 58 59 60 61 62 63
static ktime_t timerfd_get_remaining(struct timerfd_ctx *ctx)
{
	ktime_t now, remaining;

	now = ctx->tmr.base->get_time();
	remaining = ktime_sub(ctx->tmr.expires, now);

	return remaining.tv64 < 0 ? ktime_set(0, 0): remaining;
}

static void timerfd_setup(struct timerfd_ctx *ctx, int flags,
64 65 66 67 68 69 70 71 72 73
			  const struct itimerspec *ktmr)
{
	enum hrtimer_mode htmode;
	ktime_t texp;

	htmode = (flags & TFD_TIMER_ABSTIME) ?
		HRTIMER_MODE_ABS: HRTIMER_MODE_REL;

	texp = timespec_to_ktime(ktmr->it_value);
	ctx->expired = 0;
D
Davide Libenzi 已提交
74
	ctx->ticks = 0;
75
	ctx->tintv = timespec_to_ktime(ktmr->it_interval);
D
Davide Libenzi 已提交
76
	hrtimer_init(&ctx->tmr, ctx->clockid, htmode);
77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99
	ctx->tmr.expires = texp;
	ctx->tmr.function = timerfd_tmrproc;
	if (texp.tv64 != 0)
		hrtimer_start(&ctx->tmr, texp, htmode);
}

static int timerfd_release(struct inode *inode, struct file *file)
{
	struct timerfd_ctx *ctx = file->private_data;

	hrtimer_cancel(&ctx->tmr);
	kfree(ctx);
	return 0;
}

static unsigned int timerfd_poll(struct file *file, poll_table *wait)
{
	struct timerfd_ctx *ctx = file->private_data;
	unsigned int events = 0;
	unsigned long flags;

	poll_wait(file, &ctx->wqh, wait);

D
Davide Libenzi 已提交
100
	spin_lock_irqsave(&ctx->wqh.lock, flags);
D
Davide Libenzi 已提交
101
	if (ctx->ticks)
102
		events |= POLLIN;
D
Davide Libenzi 已提交
103
	spin_unlock_irqrestore(&ctx->wqh.lock, flags);
104 105 106 107 108 109 110 111 112

	return events;
}

static ssize_t timerfd_read(struct file *file, char __user *buf, size_t count,
			    loff_t *ppos)
{
	struct timerfd_ctx *ctx = file->private_data;
	ssize_t res;
113
	u64 ticks = 0;
114 115 116 117
	DECLARE_WAITQUEUE(wait, current);

	if (count < sizeof(ticks))
		return -EINVAL;
D
Davide Libenzi 已提交
118
	spin_lock_irq(&ctx->wqh.lock);
119
	res = -EAGAIN;
D
Davide Libenzi 已提交
120
	if (!ctx->ticks && !(file->f_flags & O_NONBLOCK)) {
121 122 123
		__add_wait_queue(&ctx->wqh, &wait);
		for (res = 0;;) {
			set_current_state(TASK_INTERRUPTIBLE);
D
Davide Libenzi 已提交
124
			if (ctx->ticks) {
125 126 127 128 129 130 131
				res = 0;
				break;
			}
			if (signal_pending(current)) {
				res = -ERESTARTSYS;
				break;
			}
D
Davide Libenzi 已提交
132
			spin_unlock_irq(&ctx->wqh.lock);
133
			schedule();
D
Davide Libenzi 已提交
134
			spin_lock_irq(&ctx->wqh.lock);
135 136 137 138
		}
		__remove_wait_queue(&ctx->wqh, &wait);
		__set_current_state(TASK_RUNNING);
	}
D
Davide Libenzi 已提交
139 140 141
	if (ctx->ticks) {
		ticks = ctx->ticks;
		if (ctx->expired && ctx->tintv.tv64) {
142 143 144 145 146 147
			/*
			 * If tintv.tv64 != 0, this is a periodic timer that
			 * needs to be re-armed. We avoid doing it in the timer
			 * callback to avoid DoS attacks specifying a very
			 * short timer period.
			 */
D
Davide Libenzi 已提交
148 149
			ticks += hrtimer_forward_now(&ctx->tmr,
						     ctx->tintv) - 1;
150
			hrtimer_restart(&ctx->tmr);
D
Davide Libenzi 已提交
151 152 153
		}
		ctx->expired = 0;
		ctx->ticks = 0;
154
	}
D
Davide Libenzi 已提交
155
	spin_unlock_irq(&ctx->wqh.lock);
156
	if (ticks)
157
		res = put_user(ticks, (u64 __user *) buf) ? -EFAULT: sizeof(ticks);
158 159 160 161 162 163 164 165 166
	return res;
}

static const struct file_operations timerfd_fops = {
	.release	= timerfd_release,
	.poll		= timerfd_poll,
	.read		= timerfd_read,
};

D
Davide Libenzi 已提交
167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182
static struct file *timerfd_fget(int fd)
{
	struct file *file;

	file = fget(fd);
	if (!file)
		return ERR_PTR(-EBADF);
	if (file->f_op != &timerfd_fops) {
		fput(file);
		return ERR_PTR(-EINVAL);
	}

	return file;
}

asmlinkage long sys_timerfd_create(int clockid, int flags)
183
{
D
Davide Libenzi 已提交
184
	int error, ufd;
185 186 187 188
	struct timerfd_ctx *ctx;
	struct file *file;
	struct inode *inode;

D
Davide Libenzi 已提交
189 190
	if (flags)
		return -EINVAL;
191 192 193
	if (clockid != CLOCK_MONOTONIC &&
	    clockid != CLOCK_REALTIME)
		return -EINVAL;
D
Davide Libenzi 已提交
194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223

	ctx = kzalloc(sizeof(*ctx), GFP_KERNEL);
	if (!ctx)
		return -ENOMEM;

	init_waitqueue_head(&ctx->wqh);
	ctx->clockid = clockid;
	hrtimer_init(&ctx->tmr, clockid, HRTIMER_MODE_ABS);

	error = anon_inode_getfd(&ufd, &inode, &file, "[timerfd]",
				 &timerfd_fops, ctx);
	if (error) {
		kfree(ctx);
		return error;
	}

	return ufd;
}

asmlinkage long sys_timerfd_settime(int ufd, int flags,
				    const struct itimerspec __user *utmr,
				    struct itimerspec __user *otmr)
{
	struct file *file;
	struct timerfd_ctx *ctx;
	struct itimerspec ktmr, kotmr;

	if (copy_from_user(&ktmr, utmr, sizeof(ktmr)))
		return -EFAULT;

224 225 226 227
	if (!timespec_valid(&ktmr.it_value) ||
	    !timespec_valid(&ktmr.it_interval))
		return -EINVAL;

D
Davide Libenzi 已提交
228 229 230 231
	file = timerfd_fget(ufd);
	if (IS_ERR(file))
		return PTR_ERR(file);
	ctx = file->private_data;
232

D
Davide Libenzi 已提交
233 234 235 236 237 238 239 240
	/*
	 * We need to stop the existing timer before reprogramming
	 * it to the new values.
	 */
	for (;;) {
		spin_lock_irq(&ctx->wqh.lock);
		if (hrtimer_try_to_cancel(&ctx->tmr) >= 0)
			break;
D
Davide Libenzi 已提交
241
		spin_unlock_irq(&ctx->wqh.lock);
D
Davide Libenzi 已提交
242
		cpu_relax();
243 244
	}

D
Davide Libenzi 已提交
245 246 247 248 249 250 251 252
	/*
	 * If the timer is expired and it's periodic, we need to advance it
	 * because the caller may want to know the previous expiration time.
	 * We do not update "ticks" and "expired" since the timer will be
	 * re-programmed again in the following timerfd_setup() call.
	 */
	if (ctx->expired && ctx->tintv.tv64)
		hrtimer_forward_now(&ctx->tmr, ctx->tintv);
253

D
Davide Libenzi 已提交
254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293
	kotmr.it_value = ktime_to_timespec(timerfd_get_remaining(ctx));
	kotmr.it_interval = ktime_to_timespec(ctx->tintv);

	/*
	 * Re-program the timer to the new value ...
	 */
	timerfd_setup(ctx, flags, &ktmr);

	spin_unlock_irq(&ctx->wqh.lock);
	fput(file);
	if (otmr && copy_to_user(otmr, &kotmr, sizeof(kotmr)))
		return -EFAULT;

	return 0;
}

asmlinkage long sys_timerfd_gettime(int ufd, struct itimerspec __user *otmr)
{
	struct file *file;
	struct timerfd_ctx *ctx;
	struct itimerspec kotmr;

	file = timerfd_fget(ufd);
	if (IS_ERR(file))
		return PTR_ERR(file);
	ctx = file->private_data;

	spin_lock_irq(&ctx->wqh.lock);
	if (ctx->expired && ctx->tintv.tv64) {
		ctx->expired = 0;
		ctx->ticks +=
			hrtimer_forward_now(&ctx->tmr, ctx->tintv) - 1;
		hrtimer_restart(&ctx->tmr);
	}
	kotmr.it_value = ktime_to_timespec(timerfd_get_remaining(ctx));
	kotmr.it_interval = ktime_to_timespec(ctx->tintv);
	spin_unlock_irq(&ctx->wqh.lock);
	fput(file);

	return copy_to_user(otmr, &kotmr, sizeof(kotmr)) ? -EFAULT: 0;
294 295
}