timerfd.c 6.7 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27
/*
 *  fs/timerfd.c
 *
 *  Copyright (C) 2007  Davide Libenzi <davidel@xmailserver.org>
 *
 *
 *  Thanks to Thomas Gleixner for code reviews and useful comments.
 *
 */

#include <linux/file.h>
#include <linux/poll.h>
#include <linux/init.h>
#include <linux/fs.h>
#include <linux/sched.h>
#include <linux/kernel.h>
#include <linux/list.h>
#include <linux/spinlock.h>
#include <linux/time.h>
#include <linux/hrtimer.h>
#include <linux/anon_inodes.h>
#include <linux/timerfd.h>

struct timerfd_ctx {
	struct hrtimer tmr;
	ktime_t tintv;
	wait_queue_head_t wqh;
D
Davide Libenzi 已提交
28
	u64 ticks;
29
	int expired;
D
Davide Libenzi 已提交
30
	int clockid;
31 32 33 34 35
};

/*
 * This gets called when the timer event triggers. We set the "expired"
 * flag, but we do not re-arm the timer (in case it's necessary,
D
Davide Libenzi 已提交
36
 * tintv.tv64 != 0) until the timer is accessed.
37 38 39 40 41 42
 */
static enum hrtimer_restart timerfd_tmrproc(struct hrtimer *htmr)
{
	struct timerfd_ctx *ctx = container_of(htmr, struct timerfd_ctx, tmr);
	unsigned long flags;

D
Davide Libenzi 已提交
43
	spin_lock_irqsave(&ctx->wqh.lock, flags);
44
	ctx->expired = 1;
D
Davide Libenzi 已提交
45
	ctx->ticks++;
46
	wake_up_locked(&ctx->wqh);
D
Davide Libenzi 已提交
47
	spin_unlock_irqrestore(&ctx->wqh.lock, flags);
48 49 50 51

	return HRTIMER_NORESTART;
}

D
Davide Libenzi 已提交
52 53 54 55 56 57 58 59 60 61 62
static ktime_t timerfd_get_remaining(struct timerfd_ctx *ctx)
{
	ktime_t now, remaining;

	now = ctx->tmr.base->get_time();
	remaining = ktime_sub(ctx->tmr.expires, now);

	return remaining.tv64 < 0 ? ktime_set(0, 0): remaining;
}

static void timerfd_setup(struct timerfd_ctx *ctx, int flags,
63 64 65 66 67 68 69 70 71 72
			  const struct itimerspec *ktmr)
{
	enum hrtimer_mode htmode;
	ktime_t texp;

	htmode = (flags & TFD_TIMER_ABSTIME) ?
		HRTIMER_MODE_ABS: HRTIMER_MODE_REL;

	texp = timespec_to_ktime(ktmr->it_value);
	ctx->expired = 0;
D
Davide Libenzi 已提交
73
	ctx->ticks = 0;
74
	ctx->tintv = timespec_to_ktime(ktmr->it_interval);
D
Davide Libenzi 已提交
75
	hrtimer_init(&ctx->tmr, ctx->clockid, htmode);
76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98
	ctx->tmr.expires = texp;
	ctx->tmr.function = timerfd_tmrproc;
	if (texp.tv64 != 0)
		hrtimer_start(&ctx->tmr, texp, htmode);
}

static int timerfd_release(struct inode *inode, struct file *file)
{
	struct timerfd_ctx *ctx = file->private_data;

	hrtimer_cancel(&ctx->tmr);
	kfree(ctx);
	return 0;
}

static unsigned int timerfd_poll(struct file *file, poll_table *wait)
{
	struct timerfd_ctx *ctx = file->private_data;
	unsigned int events = 0;
	unsigned long flags;

	poll_wait(file, &ctx->wqh, wait);

D
Davide Libenzi 已提交
99
	spin_lock_irqsave(&ctx->wqh.lock, flags);
D
Davide Libenzi 已提交
100
	if (ctx->ticks)
101
		events |= POLLIN;
D
Davide Libenzi 已提交
102
	spin_unlock_irqrestore(&ctx->wqh.lock, flags);
103 104 105 106 107 108 109 110 111

	return events;
}

static ssize_t timerfd_read(struct file *file, char __user *buf, size_t count,
			    loff_t *ppos)
{
	struct timerfd_ctx *ctx = file->private_data;
	ssize_t res;
112
	u64 ticks = 0;
113 114 115 116
	DECLARE_WAITQUEUE(wait, current);

	if (count < sizeof(ticks))
		return -EINVAL;
D
Davide Libenzi 已提交
117
	spin_lock_irq(&ctx->wqh.lock);
118
	res = -EAGAIN;
D
Davide Libenzi 已提交
119
	if (!ctx->ticks && !(file->f_flags & O_NONBLOCK)) {
120 121 122
		__add_wait_queue(&ctx->wqh, &wait);
		for (res = 0;;) {
			set_current_state(TASK_INTERRUPTIBLE);
D
Davide Libenzi 已提交
123
			if (ctx->ticks) {
124 125 126 127 128 129 130
				res = 0;
				break;
			}
			if (signal_pending(current)) {
				res = -ERESTARTSYS;
				break;
			}
D
Davide Libenzi 已提交
131
			spin_unlock_irq(&ctx->wqh.lock);
132
			schedule();
D
Davide Libenzi 已提交
133
			spin_lock_irq(&ctx->wqh.lock);
134 135 136 137
		}
		__remove_wait_queue(&ctx->wqh, &wait);
		__set_current_state(TASK_RUNNING);
	}
D
Davide Libenzi 已提交
138 139 140
	if (ctx->ticks) {
		ticks = ctx->ticks;
		if (ctx->expired && ctx->tintv.tv64) {
141 142 143 144 145 146
			/*
			 * If tintv.tv64 != 0, this is a periodic timer that
			 * needs to be re-armed. We avoid doing it in the timer
			 * callback to avoid DoS attacks specifying a very
			 * short timer period.
			 */
D
Davide Libenzi 已提交
147 148
			ticks += hrtimer_forward_now(&ctx->tmr,
						     ctx->tintv) - 1;
149
			hrtimer_restart(&ctx->tmr);
D
Davide Libenzi 已提交
150 151 152
		}
		ctx->expired = 0;
		ctx->ticks = 0;
153
	}
D
Davide Libenzi 已提交
154
	spin_unlock_irq(&ctx->wqh.lock);
155
	if (ticks)
156
		res = put_user(ticks, (u64 __user *) buf) ? -EFAULT: sizeof(ticks);
157 158 159 160 161 162 163 164 165
	return res;
}

static const struct file_operations timerfd_fops = {
	.release	= timerfd_release,
	.poll		= timerfd_poll,
	.read		= timerfd_read,
};

D
Davide Libenzi 已提交
166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181
static struct file *timerfd_fget(int fd)
{
	struct file *file;

	file = fget(fd);
	if (!file)
		return ERR_PTR(-EBADF);
	if (file->f_op != &timerfd_fops) {
		fput(file);
		return ERR_PTR(-EINVAL);
	}

	return file;
}

asmlinkage long sys_timerfd_create(int clockid, int flags)
182
{
D
Davide Libenzi 已提交
183
	int error, ufd;
184 185 186 187
	struct timerfd_ctx *ctx;
	struct file *file;
	struct inode *inode;

D
Davide Libenzi 已提交
188 189
	if (flags)
		return -EINVAL;
190 191 192
	if (clockid != CLOCK_MONOTONIC &&
	    clockid != CLOCK_REALTIME)
		return -EINVAL;
D
Davide Libenzi 已提交
193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222

	ctx = kzalloc(sizeof(*ctx), GFP_KERNEL);
	if (!ctx)
		return -ENOMEM;

	init_waitqueue_head(&ctx->wqh);
	ctx->clockid = clockid;
	hrtimer_init(&ctx->tmr, clockid, HRTIMER_MODE_ABS);

	error = anon_inode_getfd(&ufd, &inode, &file, "[timerfd]",
				 &timerfd_fops, ctx);
	if (error) {
		kfree(ctx);
		return error;
	}

	return ufd;
}

asmlinkage long sys_timerfd_settime(int ufd, int flags,
				    const struct itimerspec __user *utmr,
				    struct itimerspec __user *otmr)
{
	struct file *file;
	struct timerfd_ctx *ctx;
	struct itimerspec ktmr, kotmr;

	if (copy_from_user(&ktmr, utmr, sizeof(ktmr)))
		return -EFAULT;

223 224 225 226
	if (!timespec_valid(&ktmr.it_value) ||
	    !timespec_valid(&ktmr.it_interval))
		return -EINVAL;

D
Davide Libenzi 已提交
227 228 229 230
	file = timerfd_fget(ufd);
	if (IS_ERR(file))
		return PTR_ERR(file);
	ctx = file->private_data;
231

D
Davide Libenzi 已提交
232 233 234 235 236 237 238 239
	/*
	 * We need to stop the existing timer before reprogramming
	 * it to the new values.
	 */
	for (;;) {
		spin_lock_irq(&ctx->wqh.lock);
		if (hrtimer_try_to_cancel(&ctx->tmr) >= 0)
			break;
D
Davide Libenzi 已提交
240
		spin_unlock_irq(&ctx->wqh.lock);
D
Davide Libenzi 已提交
241
		cpu_relax();
242 243
	}

D
Davide Libenzi 已提交
244 245 246 247 248 249 250 251
	/*
	 * If the timer is expired and it's periodic, we need to advance it
	 * because the caller may want to know the previous expiration time.
	 * We do not update "ticks" and "expired" since the timer will be
	 * re-programmed again in the following timerfd_setup() call.
	 */
	if (ctx->expired && ctx->tintv.tv64)
		hrtimer_forward_now(&ctx->tmr, ctx->tintv);
252

D
Davide Libenzi 已提交
253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292
	kotmr.it_value = ktime_to_timespec(timerfd_get_remaining(ctx));
	kotmr.it_interval = ktime_to_timespec(ctx->tintv);

	/*
	 * Re-program the timer to the new value ...
	 */
	timerfd_setup(ctx, flags, &ktmr);

	spin_unlock_irq(&ctx->wqh.lock);
	fput(file);
	if (otmr && copy_to_user(otmr, &kotmr, sizeof(kotmr)))
		return -EFAULT;

	return 0;
}

asmlinkage long sys_timerfd_gettime(int ufd, struct itimerspec __user *otmr)
{
	struct file *file;
	struct timerfd_ctx *ctx;
	struct itimerspec kotmr;

	file = timerfd_fget(ufd);
	if (IS_ERR(file))
		return PTR_ERR(file);
	ctx = file->private_data;

	spin_lock_irq(&ctx->wqh.lock);
	if (ctx->expired && ctx->tintv.tv64) {
		ctx->expired = 0;
		ctx->ticks +=
			hrtimer_forward_now(&ctx->tmr, ctx->tintv) - 1;
		hrtimer_restart(&ctx->tmr);
	}
	kotmr.it_value = ktime_to_timespec(timerfd_get_remaining(ctx));
	kotmr.it_interval = ktime_to_timespec(ctx->tintv);
	spin_unlock_irq(&ctx->wqh.lock);
	fput(file);

	return copy_to_user(otmr, &kotmr, sizeof(kotmr)) ? -EFAULT: 0;
293 294
}