eventfd.c 22.6 KB
Newer Older
G
Gregory Haskins 已提交
1 2 3 4
/*
 * kvm eventfd support - use eventfd objects to signal various KVM events
 *
 * Copyright 2009 Novell.  All Rights Reserved.
A
Avi Kivity 已提交
5
 * Copyright 2010 Red Hat, Inc. and/or its affiliates.
G
Gregory Haskins 已提交
6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24
 *
 * Author:
 *	Gregory Haskins <ghaskins@novell.com>
 *
 * This file is free software; you can redistribute it and/or modify
 * it under the terms of version 2 of the GNU General Public License
 * as published by the Free Software Foundation.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.	 See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, write to the Free Software Foundation,
 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
 */

#include <linux/kvm_host.h>
G
Gregory Haskins 已提交
25
#include <linux/kvm.h>
E
Eric Auger 已提交
26
#include <linux/kvm_irqfd.h>
G
Gregory Haskins 已提交
27 28 29 30 31 32 33
#include <linux/workqueue.h>
#include <linux/syscalls.h>
#include <linux/wait.h>
#include <linux/poll.h>
#include <linux/file.h>
#include <linux/list.h>
#include <linux/eventfd.h>
G
Gregory Haskins 已提交
34
#include <linux/kernel.h>
35
#include <linux/srcu.h>
36
#include <linux/slab.h>
37
#include <linux/seqlock.h>
38
#include <linux/irqbypass.h>
39
#include <trace/events/kvm.h>
G
Gregory Haskins 已提交
40

41
#include <kvm/iodev.h>
G
Gregory Haskins 已提交
42

43
#ifdef CONFIG_HAVE_KVM_IRQFD
G
Gregory Haskins 已提交
44

P
Paolo Bonzini 已提交
45
static struct workqueue_struct *irqfd_cleanup_wq;
G
Gregory Haskins 已提交
46 47 48 49

static void
irqfd_inject(struct work_struct *work)
{
E
Eric Auger 已提交
50 51
	struct kvm_kernel_irqfd *irqfd =
		container_of(work, struct kvm_kernel_irqfd, inject);
G
Gregory Haskins 已提交
52 53
	struct kvm *kvm = irqfd->kvm;

54
	if (!irqfd->resampler) {
55 56 57 58
		kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID, irqfd->gsi, 1,
				false);
		kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID, irqfd->gsi, 0,
				false);
59 60
	} else
		kvm_set_irq(kvm, KVM_IRQFD_RESAMPLE_IRQ_SOURCE_ID,
61
			    irqfd->gsi, 1, false);
62 63 64 65 66 67 68 69 70 71
}

/*
 * Since resampler irqfds share an IRQ source ID, we de-assert once
 * then notify all of the resampler irqfds using this GSI.  We can't
 * do multiple de-asserts or we risk racing with incoming re-asserts.
 */
static void
irqfd_resampler_ack(struct kvm_irq_ack_notifier *kian)
{
E
Eric Auger 已提交
72
	struct kvm_kernel_irqfd_resampler *resampler;
73
	struct kvm *kvm;
E
Eric Auger 已提交
74
	struct kvm_kernel_irqfd *irqfd;
75
	int idx;
76

E
Eric Auger 已提交
77 78
	resampler = container_of(kian,
			struct kvm_kernel_irqfd_resampler, notifier);
79
	kvm = resampler->kvm;
80

81
	kvm_set_irq(kvm, KVM_IRQFD_RESAMPLE_IRQ_SOURCE_ID,
82
		    resampler->notifier.gsi, 0, false);
83

84
	idx = srcu_read_lock(&kvm->irq_srcu);
85 86 87 88

	list_for_each_entry_rcu(irqfd, &resampler->list, resampler_link)
		eventfd_signal(irqfd->resamplefd, 1);

89
	srcu_read_unlock(&kvm->irq_srcu, idx);
90 91 92
}

static void
E
Eric Auger 已提交
93
irqfd_resampler_shutdown(struct kvm_kernel_irqfd *irqfd)
94
{
E
Eric Auger 已提交
95
	struct kvm_kernel_irqfd_resampler *resampler = irqfd->resampler;
96 97 98 99 100
	struct kvm *kvm = resampler->kvm;

	mutex_lock(&kvm->irqfds.resampler_lock);

	list_del_rcu(&irqfd->resampler_link);
101
	synchronize_srcu(&kvm->irq_srcu);
102 103 104 105 106

	if (list_empty(&resampler->list)) {
		list_del(&resampler->link);
		kvm_unregister_irq_ack_notifier(kvm, &resampler->notifier);
		kvm_set_irq(kvm, KVM_IRQFD_RESAMPLE_IRQ_SOURCE_ID,
107
			    resampler->notifier.gsi, 0, false);
108 109 110 111
		kfree(resampler);
	}

	mutex_unlock(&kvm->irqfds.resampler_lock);
G
Gregory Haskins 已提交
112 113 114 115 116 117 118 119
}

/*
 * Race-free decouple logic (ordering is critical)
 */
static void
irqfd_shutdown(struct work_struct *work)
{
E
Eric Auger 已提交
120 121
	struct kvm_kernel_irqfd *irqfd =
		container_of(work, struct kvm_kernel_irqfd, shutdown);
122
	struct kvm *kvm = irqfd->kvm;
123
	u64 cnt;
G
Gregory Haskins 已提交
124

125 126 127
	/* Make sure irqfd has been initalized in assign path. */
	synchronize_srcu(&kvm->irq_srcu);

G
Gregory Haskins 已提交
128 129 130 131
	/*
	 * Synchronize with the wait-queue and unhook ourselves to prevent
	 * further events.
	 */
132
	eventfd_ctx_remove_wait_queue(irqfd->eventfd, &irqfd->wait, &cnt);
G
Gregory Haskins 已提交
133 134 135 136 137

	/*
	 * We know no new events will be scheduled at this point, so block
	 * until all previously outstanding events have completed
	 */
138
	flush_work(&irqfd->inject);
G
Gregory Haskins 已提交
139

140 141 142 143 144
	if (irqfd->resampler) {
		irqfd_resampler_shutdown(irqfd);
		eventfd_ctx_put(irqfd->resamplefd);
	}

G
Gregory Haskins 已提交
145 146 147
	/*
	 * It is now safe to release the object's resources
	 */
148 149 150
#ifdef CONFIG_HAVE_KVM_IRQ_BYPASS
	irq_bypass_unregister_consumer(&irqfd->consumer);
#endif
G
Gregory Haskins 已提交
151 152 153 154 155 156 157
	eventfd_ctx_put(irqfd->eventfd);
	kfree(irqfd);
}


/* assumes kvm->irqfds.lock is held */
static bool
E
Eric Auger 已提交
158
irqfd_is_active(struct kvm_kernel_irqfd *irqfd)
G
Gregory Haskins 已提交
159 160 161 162 163 164 165 166 167 168
{
	return list_empty(&irqfd->list) ? false : true;
}

/*
 * Mark the irqfd as inactive and schedule it for removal
 *
 * assumes kvm->irqfds.lock is held
 */
static void
E
Eric Auger 已提交
169
irqfd_deactivate(struct kvm_kernel_irqfd *irqfd)
G
Gregory Haskins 已提交
170 171 172 173 174
{
	BUG_ON(!irqfd_is_active(irqfd));

	list_del_init(&irqfd->list);

P
Paolo Bonzini 已提交
175
	queue_work(irqfd_cleanup_wq, &irqfd->shutdown);
G
Gregory Haskins 已提交
176 177
}

178
int __attribute__((weak)) kvm_arch_set_irq_inatomic(
179 180 181 182 183 184 185 186
				struct kvm_kernel_irq_routing_entry *irq,
				struct kvm *kvm, int irq_source_id,
				int level,
				bool line_status)
{
	return -EWOULDBLOCK;
}

G
Gregory Haskins 已提交
187 188 189 190
/*
 * Called with wqh->lock held and interrupts disabled
 */
static int
191
irqfd_wakeup(wait_queue_entry_t *wait, unsigned mode, int sync, void *key)
G
Gregory Haskins 已提交
192
{
E
Eric Auger 已提交
193 194
	struct kvm_kernel_irqfd *irqfd =
		container_of(wait, struct kvm_kernel_irqfd, wait);
A
Al Viro 已提交
195
	__poll_t flags = key_to_poll(key);
196
	struct kvm_kernel_irq_routing_entry irq;
197
	struct kvm *kvm = irqfd->kvm;
198
	unsigned seq;
199
	int idx;
G
Gregory Haskins 已提交
200

201
	if (flags & EPOLLIN) {
202
		idx = srcu_read_lock(&kvm->irq_srcu);
203 204 205 206
		do {
			seq = read_seqcount_begin(&irqfd->irq_entry_sc);
			irq = irqfd->irq_entry;
		} while (read_seqcount_retry(&irqfd->irq_entry_sc, seq));
G
Gregory Haskins 已提交
207
		/* An event has been signaled, inject an interrupt */
208 209 210
		if (kvm_arch_set_irq_inatomic(&irq, kvm,
					      KVM_USERSPACE_IRQ_SOURCE_ID, 1,
					      false) == -EWOULDBLOCK)
211
			schedule_work(&irqfd->inject);
212
		srcu_read_unlock(&kvm->irq_srcu, idx);
213
	}
G
Gregory Haskins 已提交
214

215
	if (flags & EPOLLHUP) {
G
Gregory Haskins 已提交
216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242
		/* The eventfd is closing, detach from KVM */
		unsigned long flags;

		spin_lock_irqsave(&kvm->irqfds.lock, flags);

		/*
		 * We must check if someone deactivated the irqfd before
		 * we could acquire the irqfds.lock since the item is
		 * deactivated from the KVM side before it is unhooked from
		 * the wait-queue.  If it is already deactivated, we can
		 * simply return knowing the other side will cleanup for us.
		 * We cannot race against the irqfd going away since the
		 * other side is required to acquire wqh->lock, which we hold
		 */
		if (irqfd_is_active(irqfd))
			irqfd_deactivate(irqfd);

		spin_unlock_irqrestore(&kvm->irqfds.lock, flags);
	}

	return 0;
}

static void
irqfd_ptable_queue_proc(struct file *file, wait_queue_head_t *wqh,
			poll_table *pt)
{
E
Eric Auger 已提交
243 244
	struct kvm_kernel_irqfd *irqfd =
		container_of(pt, struct kvm_kernel_irqfd, pt);
G
Gregory Haskins 已提交
245 246 247
	add_wait_queue(wqh, &irqfd->wait);
}

248
/* Must be called under irqfds.lock */
E
Eric Auger 已提交
249
static void irqfd_update(struct kvm *kvm, struct kvm_kernel_irqfd *irqfd)
250 251
{
	struct kvm_kernel_irq_routing_entry *e;
252
	struct kvm_kernel_irq_routing_entry entries[KVM_NR_IRQCHIPS];
253
	int n_entries;
254

255
	n_entries = kvm_irq_map_gsi(kvm, entries, irqfd->gsi);
256

257 258
	write_seqcount_begin(&irqfd->irq_entry_sc);

259
	e = entries;
260 261 262 263
	if (n_entries == 1)
		irqfd->irq_entry = *e;
	else
		irqfd->irq_entry.type = 0;
264 265

	write_seqcount_end(&irqfd->irq_entry_sc);
266 267
}

268 269 270 271 272 273 274 275 276 277
#ifdef CONFIG_HAVE_KVM_IRQ_BYPASS
void __attribute__((weak)) kvm_arch_irq_bypass_stop(
				struct irq_bypass_consumer *cons)
{
}

void __attribute__((weak)) kvm_arch_irq_bypass_start(
				struct irq_bypass_consumer *cons)
{
}
278 279 280 281 282 283 284

int  __attribute__((weak)) kvm_arch_update_irqfd_routing(
				struct kvm *kvm, unsigned int host_irq,
				uint32_t guest_irq, bool set)
{
	return 0;
}
285 286
#endif

G
Gregory Haskins 已提交
287
static int
288
kvm_irqfd_assign(struct kvm *kvm, struct kvm_irqfd *args)
G
Gregory Haskins 已提交
289
{
E
Eric Auger 已提交
290
	struct kvm_kernel_irqfd *irqfd, *tmp;
A
Al Viro 已提交
291
	struct fd f;
292
	struct eventfd_ctx *eventfd = NULL, *resamplefd = NULL;
G
Gregory Haskins 已提交
293
	int ret;
294
	__poll_t events;
295
	int idx;
G
Gregory Haskins 已提交
296

297 298 299
	if (!kvm_arch_intc_initialized(kvm))
		return -EAGAIN;

300
	irqfd = kzalloc(sizeof(*irqfd), GFP_KERNEL_ACCOUNT);
G
Gregory Haskins 已提交
301 302 303 304
	if (!irqfd)
		return -ENOMEM;

	irqfd->kvm = kvm;
305
	irqfd->gsi = args->gsi;
G
Gregory Haskins 已提交
306 307 308
	INIT_LIST_HEAD(&irqfd->list);
	INIT_WORK(&irqfd->inject, irqfd_inject);
	INIT_WORK(&irqfd->shutdown, irqfd_shutdown);
309
	seqcount_init(&irqfd->irq_entry_sc);
G
Gregory Haskins 已提交
310

A
Al Viro 已提交
311 312 313 314
	f = fdget(args->fd);
	if (!f.file) {
		ret = -EBADF;
		goto out;
G
Gregory Haskins 已提交
315 316
	}

A
Al Viro 已提交
317
	eventfd = eventfd_ctx_fileget(f.file);
G
Gregory Haskins 已提交
318 319 320 321 322 323 324
	if (IS_ERR(eventfd)) {
		ret = PTR_ERR(eventfd);
		goto fail;
	}

	irqfd->eventfd = eventfd;

325
	if (args->flags & KVM_IRQFD_FLAG_RESAMPLE) {
E
Eric Auger 已提交
326
		struct kvm_kernel_irqfd_resampler *resampler;
327 328 329 330 331 332 333 334 335 336 337 338 339

		resamplefd = eventfd_ctx_fdget(args->resamplefd);
		if (IS_ERR(resamplefd)) {
			ret = PTR_ERR(resamplefd);
			goto fail;
		}

		irqfd->resamplefd = resamplefd;
		INIT_LIST_HEAD(&irqfd->resampler_link);

		mutex_lock(&kvm->irqfds.resampler_lock);

		list_for_each_entry(resampler,
340
				    &kvm->irqfds.resampler_list, link) {
341 342 343 344 345 346 347
			if (resampler->notifier.gsi == irqfd->gsi) {
				irqfd->resampler = resampler;
				break;
			}
		}

		if (!irqfd->resampler) {
348 349
			resampler = kzalloc(sizeof(*resampler),
					    GFP_KERNEL_ACCOUNT);
350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368
			if (!resampler) {
				ret = -ENOMEM;
				mutex_unlock(&kvm->irqfds.resampler_lock);
				goto fail;
			}

			resampler->kvm = kvm;
			INIT_LIST_HEAD(&resampler->list);
			resampler->notifier.gsi = irqfd->gsi;
			resampler->notifier.irq_acked = irqfd_resampler_ack;
			INIT_LIST_HEAD(&resampler->link);

			list_add(&resampler->link, &kvm->irqfds.resampler_list);
			kvm_register_irq_ack_notifier(kvm,
						      &resampler->notifier);
			irqfd->resampler = resampler;
		}

		list_add_rcu(&irqfd->resampler_link, &irqfd->resampler->list);
369
		synchronize_srcu(&kvm->irq_srcu);
370 371 372 373

		mutex_unlock(&kvm->irqfds.resampler_lock);
	}

G
Gregory Haskins 已提交
374 375 376 377 378 379 380
	/*
	 * Install our own custom wake-up handling so we are notified via
	 * a callback whenever someone signals the underlying eventfd
	 */
	init_waitqueue_func_entry(&irqfd->wait, irqfd_wakeup);
	init_poll_funcptr(&irqfd->pt, irqfd_ptable_queue_proc);

381 382 383 384 385 386 387 388 389 390 391 392
	spin_lock_irq(&kvm->irqfds.lock);

	ret = 0;
	list_for_each_entry(tmp, &kvm->irqfds.items, list) {
		if (irqfd->eventfd != tmp->eventfd)
			continue;
		/* This fd is used for another irq already. */
		ret = -EBUSY;
		spin_unlock_irq(&kvm->irqfds.lock);
		goto fail;
	}

393 394
	idx = srcu_read_lock(&kvm->irq_srcu);
	irqfd_update(kvm, irqfd);
395

G
Gregory Haskins 已提交
396 397
	list_add_tail(&irqfd->list, &kvm->irqfds.items);

398 399
	spin_unlock_irq(&kvm->irqfds.lock);

G
Gregory Haskins 已提交
400 401 402 403
	/*
	 * Check if there was an event already pending on the eventfd
	 * before we registered, and trigger it as if we didn't miss it.
	 */
404
	events = vfs_poll(f.file, &irqfd->pt);
405

406
	if (events & EPOLLIN)
G
Gregory Haskins 已提交
407 408
		schedule_work(&irqfd->inject);

409
#ifdef CONFIG_HAVE_KVM_IRQ_BYPASS
410 411 412 413 414 415 416 417 418
	if (kvm_arch_has_irq_bypass()) {
		irqfd->consumer.token = (void *)irqfd->eventfd;
		irqfd->consumer.add_producer = kvm_arch_irq_bypass_add_producer;
		irqfd->consumer.del_producer = kvm_arch_irq_bypass_del_producer;
		irqfd->consumer.stop = kvm_arch_irq_bypass_stop;
		irqfd->consumer.start = kvm_arch_irq_bypass_start;
		ret = irq_bypass_register_consumer(&irqfd->consumer);
		if (ret)
			pr_info("irq bypass consumer (token %p) registration fails: %d\n",
419
				irqfd->consumer.token, ret);
420
	}
421
#endif
G
Gregory Haskins 已提交
422

423
	srcu_read_unlock(&kvm->irq_srcu, idx);
424 425 426 427 428 429

	/*
	 * do not drop the file until the irqfd is fully initialized, otherwise
	 * we might race against the EPOLLHUP
	 */
	fdput(f);
G
Gregory Haskins 已提交
430 431 432
	return 0;

fail:
433 434 435 436 437 438
	if (irqfd->resampler)
		irqfd_resampler_shutdown(irqfd);

	if (resamplefd && !IS_ERR(resamplefd))
		eventfd_ctx_put(resamplefd);

G
Gregory Haskins 已提交
439 440 441
	if (eventfd && !IS_ERR(eventfd))
		eventfd_ctx_put(eventfd);

A
Al Viro 已提交
442
	fdput(f);
G
Gregory Haskins 已提交
443

A
Al Viro 已提交
444
out:
G
Gregory Haskins 已提交
445 446 447
	kfree(irqfd);
	return ret;
}
448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469

bool kvm_irq_has_notifier(struct kvm *kvm, unsigned irqchip, unsigned pin)
{
	struct kvm_irq_ack_notifier *kian;
	int gsi, idx;

	idx = srcu_read_lock(&kvm->irq_srcu);
	gsi = kvm_irq_map_chip_pin(kvm, irqchip, pin);
	if (gsi != -1)
		hlist_for_each_entry_rcu(kian, &kvm->irq_ack_notifier_list,
					 link)
			if (kian->gsi == gsi) {
				srcu_read_unlock(&kvm->irq_srcu, idx);
				return true;
			}

	srcu_read_unlock(&kvm->irq_srcu, idx);

	return false;
}
EXPORT_SYMBOL_GPL(kvm_irq_has_notifier);

470
void kvm_notify_acked_gsi(struct kvm *kvm, int gsi)
471 472
{
	struct kvm_irq_ack_notifier *kian;
473 474 475 476 477 478 479 480 481

	hlist_for_each_entry_rcu(kian, &kvm->irq_ack_notifier_list,
				 link)
		if (kian->gsi == gsi)
			kian->irq_acked(kian);
}

void kvm_notify_acked_irq(struct kvm *kvm, unsigned irqchip, unsigned pin)
{
482 483 484 485 486 487 488
	int gsi, idx;

	trace_kvm_ack_irq(irqchip, pin);

	idx = srcu_read_lock(&kvm->irq_srcu);
	gsi = kvm_irq_map_chip_pin(kvm, irqchip, pin);
	if (gsi != -1)
489
		kvm_notify_acked_gsi(kvm, gsi);
490 491 492 493 494 495 496 497 498
	srcu_read_unlock(&kvm->irq_srcu, idx);
}

void kvm_register_irq_ack_notifier(struct kvm *kvm,
				   struct kvm_irq_ack_notifier *kian)
{
	mutex_lock(&kvm->irq_lock);
	hlist_add_head_rcu(&kian->link, &kvm->irq_ack_notifier_list);
	mutex_unlock(&kvm->irq_lock);
499
	kvm_arch_post_irq_ack_notifier_list_update(kvm);
500 501 502 503 504 505 506 507 508
}

void kvm_unregister_irq_ack_notifier(struct kvm *kvm,
				    struct kvm_irq_ack_notifier *kian)
{
	mutex_lock(&kvm->irq_lock);
	hlist_del_init_rcu(&kian->link);
	mutex_unlock(&kvm->irq_lock);
	synchronize_srcu(&kvm->irq_srcu);
509
	kvm_arch_post_irq_ack_notifier_list_update(kvm);
510
}
511
#endif
G
Gregory Haskins 已提交
512 513

void
G
Gregory Haskins 已提交
514
kvm_eventfd_init(struct kvm *kvm)
G
Gregory Haskins 已提交
515
{
516
#ifdef CONFIG_HAVE_KVM_IRQFD
G
Gregory Haskins 已提交
517 518
	spin_lock_init(&kvm->irqfds.lock);
	INIT_LIST_HEAD(&kvm->irqfds.items);
519 520
	INIT_LIST_HEAD(&kvm->irqfds.resampler_list);
	mutex_init(&kvm->irqfds.resampler_lock);
521
#endif
G
Gregory Haskins 已提交
522
	INIT_LIST_HEAD(&kvm->ioeventfds);
G
Gregory Haskins 已提交
523 524
}

525
#ifdef CONFIG_HAVE_KVM_IRQFD
G
Gregory Haskins 已提交
526 527 528 529
/*
 * shutdown any irqfd's that match fd+gsi
 */
static int
530
kvm_irqfd_deassign(struct kvm *kvm, struct kvm_irqfd *args)
G
Gregory Haskins 已提交
531
{
E
Eric Auger 已提交
532
	struct kvm_kernel_irqfd *irqfd, *tmp;
G
Gregory Haskins 已提交
533 534
	struct eventfd_ctx *eventfd;

535
	eventfd = eventfd_ctx_fdget(args->fd);
G
Gregory Haskins 已提交
536 537 538 539 540 541
	if (IS_ERR(eventfd))
		return PTR_ERR(eventfd);

	spin_lock_irq(&kvm->irqfds.lock);

	list_for_each_entry_safe(irqfd, tmp, &kvm->irqfds.items, list) {
542
		if (irqfd->eventfd == eventfd && irqfd->gsi == args->gsi) {
543
			/*
544
			 * This clearing of irq_entry.type is needed for when
545 546 547
			 * another thread calls kvm_irq_routing_update before
			 * we flush workqueue below (we synchronize with
			 * kvm_irq_routing_update using irqfds.lock).
548
			 */
549 550 551
			write_seqcount_begin(&irqfd->irq_entry_sc);
			irqfd->irq_entry.type = 0;
			write_seqcount_end(&irqfd->irq_entry_sc);
G
Gregory Haskins 已提交
552
			irqfd_deactivate(irqfd);
553
		}
G
Gregory Haskins 已提交
554 555 556 557 558 559 560 561 562 563
	}

	spin_unlock_irq(&kvm->irqfds.lock);
	eventfd_ctx_put(eventfd);

	/*
	 * Block until we know all outstanding shutdown jobs have completed
	 * so that we guarantee there will not be any more interrupts on this
	 * gsi once this deassign function returns.
	 */
P
Paolo Bonzini 已提交
564
	flush_workqueue(irqfd_cleanup_wq);
G
Gregory Haskins 已提交
565 566 567 568 569

	return 0;
}

int
570
kvm_irqfd(struct kvm *kvm, struct kvm_irqfd *args)
G
Gregory Haskins 已提交
571
{
572
	if (args->flags & ~(KVM_IRQFD_FLAG_DEASSIGN | KVM_IRQFD_FLAG_RESAMPLE))
A
Alex Williamson 已提交
573 574
		return -EINVAL;

575 576
	if (args->flags & KVM_IRQFD_FLAG_DEASSIGN)
		return kvm_irqfd_deassign(kvm, args);
G
Gregory Haskins 已提交
577

578
	return kvm_irqfd_assign(kvm, args);
G
Gregory Haskins 已提交
579 580 581 582 583 584 585 586 587
}

/*
 * This function is called as the kvm VM fd is being released. Shutdown all
 * irqfds that still remain open
 */
void
kvm_irqfd_release(struct kvm *kvm)
{
E
Eric Auger 已提交
588
	struct kvm_kernel_irqfd *irqfd, *tmp;
G
Gregory Haskins 已提交
589 590 591 592 593 594 595 596 597 598 599 600

	spin_lock_irq(&kvm->irqfds.lock);

	list_for_each_entry_safe(irqfd, tmp, &kvm->irqfds.items, list)
		irqfd_deactivate(irqfd);

	spin_unlock_irq(&kvm->irqfds.lock);

	/*
	 * Block until we know all outstanding shutdown jobs have completed
	 * since we do not take a kvm* reference.
	 */
P
Paolo Bonzini 已提交
601
	flush_workqueue(irqfd_cleanup_wq);
G
Gregory Haskins 已提交
602 603 604

}

605
/*
606
 * Take note of a change in irq routing.
607
 * Caller must invoke synchronize_srcu(&kvm->irq_srcu) afterwards.
608
 */
609
void kvm_irq_routing_update(struct kvm *kvm)
610
{
E
Eric Auger 已提交
611
	struct kvm_kernel_irqfd *irqfd;
612 613 614

	spin_lock_irq(&kvm->irqfds.lock);

615
	list_for_each_entry(irqfd, &kvm->irqfds.items, list) {
616
		irqfd_update(kvm, irqfd);
617

618 619 620 621 622 623 624 625 626 627
#ifdef CONFIG_HAVE_KVM_IRQ_BYPASS
		if (irqfd->producer) {
			int ret = kvm_arch_update_irqfd_routing(
					irqfd->kvm, irqfd->producer->irq,
					irqfd->gsi, 1);
			WARN_ON(ret);
		}
#endif
	}

628 629 630
	spin_unlock_irq(&kvm->irqfds.lock);
}

P
Paolo Bonzini 已提交
631 632 633 634 635 636 637 638 639 640 641 642 643 644
/*
 * create a host-wide workqueue for issuing deferred shutdown requests
 * aggregated from all vm* instances. We need our own isolated
 * queue to ease flushing work items when a VM exits.
 */
int kvm_irqfd_init(void)
{
	irqfd_cleanup_wq = alloc_workqueue("kvm-irqfd-cleanup", 0, 0);
	if (!irqfd_cleanup_wq)
		return -ENOMEM;

	return 0;
}

645
void kvm_irqfd_exit(void)
G
Gregory Haskins 已提交
646
{
P
Paolo Bonzini 已提交
647
	destroy_workqueue(irqfd_cleanup_wq);
G
Gregory Haskins 已提交
648
}
649
#endif
G
Gregory Haskins 已提交
650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666

/*
 * --------------------------------------------------------------------
 * ioeventfd: translate a PIO/MMIO memory write to an eventfd signal.
 *
 * userspace can register a PIO/MMIO address with an eventfd for receiving
 * notification when the memory has been touched.
 * --------------------------------------------------------------------
 */

struct _ioeventfd {
	struct list_head     list;
	u64                  addr;
	int                  length;
	struct eventfd_ctx  *eventfd;
	u64                  datamatch;
	struct kvm_io_device dev;
667
	u8                   bus_idx;
G
Gregory Haskins 已提交
668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689
	bool                 wildcard;
};

static inline struct _ioeventfd *
to_ioeventfd(struct kvm_io_device *dev)
{
	return container_of(dev, struct _ioeventfd, dev);
}

static void
ioeventfd_release(struct _ioeventfd *p)
{
	eventfd_ctx_put(p->eventfd);
	list_del(&p->list);
	kfree(p);
}

static bool
ioeventfd_in_range(struct _ioeventfd *p, gpa_t addr, int len, const void *val)
{
	u64 _val;

690 691 692 693 694 695 696 697 698
	if (addr != p->addr)
		/* address must be precise for a hit */
		return false;

	if (!p->length)
		/* length = 0 means only look at the address, so always a hit */
		return true;

	if (len != p->length)
G
Gregory Haskins 已提交
699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731
		/* address-range must be precise for a hit */
		return false;

	if (p->wildcard)
		/* all else equal, wildcard is always a hit */
		return true;

	/* otherwise, we have to actually compare the data */

	BUG_ON(!IS_ALIGNED((unsigned long)val, len));

	switch (len) {
	case 1:
		_val = *(u8 *)val;
		break;
	case 2:
		_val = *(u16 *)val;
		break;
	case 4:
		_val = *(u32 *)val;
		break;
	case 8:
		_val = *(u64 *)val;
		break;
	default:
		return false;
	}

	return _val == p->datamatch ? true : false;
}

/* MMIO/PIO writes trigger an event if the addr/val match */
static int
732 733
ioeventfd_write(struct kvm_vcpu *vcpu, struct kvm_io_device *this, gpa_t addr,
		int len, const void *val)
G
Gregory Haskins 已提交
734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767
{
	struct _ioeventfd *p = to_ioeventfd(this);

	if (!ioeventfd_in_range(p, addr, len, val))
		return -EOPNOTSUPP;

	eventfd_signal(p->eventfd, 1);
	return 0;
}

/*
 * This function is called as KVM is completely shutting down.  We do not
 * need to worry about locking just nuke anything we have as quickly as possible
 */
static void
ioeventfd_destructor(struct kvm_io_device *this)
{
	struct _ioeventfd *p = to_ioeventfd(this);

	ioeventfd_release(p);
}

static const struct kvm_io_device_ops ioeventfd_ops = {
	.write      = ioeventfd_write,
	.destructor = ioeventfd_destructor,
};

/* assumes kvm->slots_lock held */
static bool
ioeventfd_check_collision(struct kvm *kvm, struct _ioeventfd *p)
{
	struct _ioeventfd *_p;

	list_for_each_entry(_p, &kvm->ioeventfds, list)
768
		if (_p->bus_idx == p->bus_idx &&
769 770 771 772 773
		    _p->addr == p->addr &&
		    (!_p->length || !p->length ||
		     (_p->length == p->length &&
		      (_p->wildcard || p->wildcard ||
		       _p->datamatch == p->datamatch))))
G
Gregory Haskins 已提交
774 775 776 777 778
			return true;

	return false;
}

779 780 781 782 783 784 785 786 787
static enum kvm_bus ioeventfd_bus_from_flags(__u32 flags)
{
	if (flags & KVM_IOEVENTFD_FLAG_PIO)
		return KVM_PIO_BUS;
	if (flags & KVM_IOEVENTFD_FLAG_VIRTIO_CCW_NOTIFY)
		return KVM_VIRTIO_CCW_NOTIFY_BUS;
	return KVM_MMIO_BUS;
}

788 789 790
static int kvm_assign_ioeventfd_idx(struct kvm *kvm,
				enum kvm_bus bus_idx,
				struct kvm_ioeventfd *args)
G
Gregory Haskins 已提交
791 792
{

793 794 795
	struct eventfd_ctx *eventfd;
	struct _ioeventfd *p;
	int ret;
796

G
Gregory Haskins 已提交
797 798 799 800
	eventfd = eventfd_ctx_fdget(args->fd);
	if (IS_ERR(eventfd))
		return PTR_ERR(eventfd);

801
	p = kzalloc(sizeof(*p), GFP_KERNEL_ACCOUNT);
G
Gregory Haskins 已提交
802 803 804 805 806 807 808
	if (!p) {
		ret = -ENOMEM;
		goto fail;
	}

	INIT_LIST_HEAD(&p->list);
	p->addr    = args->addr;
809
	p->bus_idx = bus_idx;
G
Gregory Haskins 已提交
810 811 812 813 814 815 816 817 818
	p->length  = args->len;
	p->eventfd = eventfd;

	/* The datamatch feature is optional, otherwise this is a wildcard */
	if (args->flags & KVM_IOEVENTFD_FLAG_DATAMATCH)
		p->datamatch = args->datamatch;
	else
		p->wildcard = true;

819
	mutex_lock(&kvm->slots_lock);
G
Gregory Haskins 已提交
820

L
Lucas De Marchi 已提交
821
	/* Verify that there isn't a match already */
G
Gregory Haskins 已提交
822 823 824 825 826 827 828
	if (ioeventfd_check_collision(kvm, p)) {
		ret = -EEXIST;
		goto unlock_fail;
	}

	kvm_iodevice_init(&p->dev, &ioeventfd_ops);

829 830
	ret = kvm_io_bus_register_dev(kvm, bus_idx, p->addr, p->length,
				      &p->dev);
G
Gregory Haskins 已提交
831 832 833
	if (ret < 0)
		goto unlock_fail;

834
	kvm_get_bus(kvm, bus_idx)->ioeventfd_count++;
G
Gregory Haskins 已提交
835 836
	list_add_tail(&p->list, &kvm->ioeventfds);

837
	mutex_unlock(&kvm->slots_lock);
G
Gregory Haskins 已提交
838 839 840 841

	return 0;

unlock_fail:
842
	mutex_unlock(&kvm->slots_lock);
G
Gregory Haskins 已提交
843 844 845 846 847 848 849 850 851

fail:
	kfree(p);
	eventfd_ctx_put(eventfd);

	return ret;
}

static int
852 853
kvm_deassign_ioeventfd_idx(struct kvm *kvm, enum kvm_bus bus_idx,
			   struct kvm_ioeventfd *args)
G
Gregory Haskins 已提交
854 855 856
{
	struct _ioeventfd        *p, *tmp;
	struct eventfd_ctx       *eventfd;
857
	struct kvm_io_bus	 *bus;
G
Gregory Haskins 已提交
858 859 860 861 862 863
	int                       ret = -ENOENT;

	eventfd = eventfd_ctx_fdget(args->fd);
	if (IS_ERR(eventfd))
		return PTR_ERR(eventfd);

864
	mutex_lock(&kvm->slots_lock);
G
Gregory Haskins 已提交
865 866 867 868

	list_for_each_entry_safe(p, tmp, &kvm->ioeventfds, list) {
		bool wildcard = !(args->flags & KVM_IOEVENTFD_FLAG_DATAMATCH);

869 870
		if (p->bus_idx != bus_idx ||
		    p->eventfd != eventfd  ||
G
Gregory Haskins 已提交
871 872 873 874 875 876 877 878
		    p->addr != args->addr  ||
		    p->length != args->len ||
		    p->wildcard != wildcard)
			continue;

		if (!p->wildcard && p->datamatch != args->datamatch)
			continue;

M
Marcelo Tosatti 已提交
879
		kvm_io_bus_unregister_dev(kvm, bus_idx, &p->dev);
880 881 882
		bus = kvm_get_bus(kvm, bus_idx);
		if (bus)
			bus->ioeventfd_count--;
G
Gregory Haskins 已提交
883 884 885 886 887
		ioeventfd_release(p);
		ret = 0;
		break;
	}

888
	mutex_unlock(&kvm->slots_lock);
G
Gregory Haskins 已提交
889 890 891 892 893 894

	eventfd_ctx_put(eventfd);

	return ret;
}

895 896 897
static int kvm_deassign_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args)
{
	enum kvm_bus bus_idx = ioeventfd_bus_from_flags(args->flags);
898 899 900 901
	int ret = kvm_deassign_ioeventfd_idx(kvm, bus_idx, args);

	if (!args->len && bus_idx == KVM_MMIO_BUS)
		kvm_deassign_ioeventfd_idx(kvm, KVM_FAST_MMIO_BUS, args);
902

903
	return ret;
904 905 906 907 908 909
}

static int
kvm_assign_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args)
{
	enum kvm_bus              bus_idx;
910
	int ret;
911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933

	bus_idx = ioeventfd_bus_from_flags(args->flags);
	/* must be natural-word sized, or 0 to ignore length */
	switch (args->len) {
	case 0:
	case 1:
	case 2:
	case 4:
	case 8:
		break;
	default:
		return -EINVAL;
	}

	/* check for range overflow */
	if (args->addr + args->len < args->addr)
		return -EINVAL;

	/* check for extra flags that we don't understand */
	if (args->flags & ~KVM_IOEVENTFD_VALID_FLAG_MASK)
		return -EINVAL;

	/* ioeventfd with no length can't be combined with DATAMATCH */
934
	if (!args->len && (args->flags & KVM_IOEVENTFD_FLAG_DATAMATCH))
935 936
		return -EINVAL;

937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955
	ret = kvm_assign_ioeventfd_idx(kvm, bus_idx, args);
	if (ret)
		goto fail;

	/* When length is ignored, MMIO is also put on a separate bus, for
	 * faster lookups.
	 */
	if (!args->len && bus_idx == KVM_MMIO_BUS) {
		ret = kvm_assign_ioeventfd_idx(kvm, KVM_FAST_MMIO_BUS, args);
		if (ret < 0)
			goto fast_fail;
	}

	return 0;

fast_fail:
	kvm_deassign_ioeventfd_idx(kvm, bus_idx, args);
fail:
	return ret;
956 957
}

G
Gregory Haskins 已提交
958 959 960 961 962 963 964 965
int
kvm_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args)
{
	if (args->flags & KVM_IOEVENTFD_FLAG_DEASSIGN)
		return kvm_deassign_ioeventfd(kvm, args);

	return kvm_assign_ioeventfd(kvm, args);
}