virtio_pci_common.c 14.0 KB
Newer Older
A
Anthony Liguori 已提交
1
/*
2
 * Virtio PCI driver - common functionality for all device versions
A
Anthony Liguori 已提交
3 4 5 6 7
 *
 * This module allows virtio devices to be used over a virtual PCI device.
 * This can be used with QEMU based VMMs like KVM or Xen.
 *
 * Copyright IBM Corp. 2007
8
 * Copyright Red Hat, Inc. 2014
A
Anthony Liguori 已提交
9 10 11
 *
 * Authors:
 *  Anthony Liguori  <aliguori@us.ibm.com>
12 13
 *  Rusty Russell <rusty@rustcorp.com.au>
 *  Michael S. Tsirkin <mst@redhat.com>
A
Anthony Liguori 已提交
14 15 16 17 18 19
 *
 * This work is licensed under the terms of the GNU GPL, version 2 or later.
 * See the COPYING file in the top-level directory.
 *
 */

20
#include "virtio_pci_common.h"
A
Anthony Liguori 已提交
21

22
/* wait for pending irq handlers */
23
void vp_synchronize_vectors(struct virtio_device *vdev)
24 25 26 27 28 29 30 31 32 33 34
{
	struct virtio_pci_device *vp_dev = to_vp_device(vdev);
	int i;

	if (vp_dev->intx_enabled)
		synchronize_irq(vp_dev->pci_dev->irq);

	for (i = 0; i < vp_dev->msix_vectors; ++i)
		synchronize_irq(vp_dev->msix_entries[i].vector);
}

A
Anthony Liguori 已提交
35
/* the notify function used when creating a virt queue */
36
bool vp_notify(struct virtqueue *vq)
A
Anthony Liguori 已提交
37 38 39
{
	/* we write the queue's selector into the notification register to
	 * signal the other end */
40
	iowrite16(vq->index, (void __iomem *)vq->priv);
41
	return true;
A
Anthony Liguori 已提交
42 43
}

44 45 46 47 48
/* Handle a configuration change: Tell driver if it wants to know. */
static irqreturn_t vp_config_changed(int irq, void *opaque)
{
	struct virtio_pci_device *vp_dev = opaque;

49
	virtio_config_changed(&vp_dev->vdev);
50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70
	return IRQ_HANDLED;
}

/* Notify all virtqueues on an interrupt. */
static irqreturn_t vp_vring_interrupt(int irq, void *opaque)
{
	struct virtio_pci_device *vp_dev = opaque;
	struct virtio_pci_vq_info *info;
	irqreturn_t ret = IRQ_NONE;
	unsigned long flags;

	spin_lock_irqsave(&vp_dev->lock, flags);
	list_for_each_entry(info, &vp_dev->virtqueues, node) {
		if (vring_interrupt(irq, info->vq) == IRQ_HANDLED)
			ret = IRQ_HANDLED;
	}
	spin_unlock_irqrestore(&vp_dev->lock, flags);

	return ret;
}

A
Anthony Liguori 已提交
71 72 73 74 75 76 77 78 79 80 81 82 83
/* A small wrapper to also acknowledge the interrupt when it's handled.
 * I really need an EIO hook for the vring so I can ack the interrupt once we
 * know that we'll be handling the IRQ but before we invoke the callback since
 * the callback may notify the host which results in the host attempting to
 * raise an interrupt that we would then mask once we acknowledged the
 * interrupt. */
static irqreturn_t vp_interrupt(int irq, void *opaque)
{
	struct virtio_pci_device *vp_dev = opaque;
	u8 isr;

	/* reading the ISR has the effect of also clearing it so it's very
	 * important to save off the value. */
M
Michael S. Tsirkin 已提交
84
	isr = ioread8(vp_dev->isr);
A
Anthony Liguori 已提交
85 86 87 88 89 90

	/* It's definitely not us if the ISR was not high */
	if (!isr)
		return IRQ_NONE;

	/* Configuration change?  Tell driver if it wants to know. */
91 92
	if (isr & VIRTIO_PCI_ISR_CONFIG)
		vp_config_changed(irq, opaque);
A
Anthony Liguori 已提交
93

94
	return vp_vring_interrupt(irq, opaque);
A
Anthony Liguori 已提交
95 96
}

97 98 99 100 101 102 103 104 105 106 107 108 109
static void vp_free_vectors(struct virtio_device *vdev)
{
	struct virtio_pci_device *vp_dev = to_vp_device(vdev);
	int i;

	if (vp_dev->intx_enabled) {
		free_irq(vp_dev->pci_dev->irq, vp_dev);
		vp_dev->intx_enabled = 0;
	}

	for (i = 0; i < vp_dev->msix_used_vectors; ++i)
		free_irq(vp_dev->msix_entries[i].vector, vp_dev);

110 111 112 113
	for (i = 0; i < vp_dev->msix_vectors; i++)
		if (vp_dev->msix_affinity_masks[i])
			free_cpumask_var(vp_dev->msix_affinity_masks[i]);

114 115
	if (vp_dev->msix_enabled) {
		/* Disable the vector used for configuration */
116
		vp_dev->config_vector(vp_dev, VIRTIO_MSI_NO_VECTOR);
117 118

		pci_disable_msix(vp_dev->pci_dev);
119
		vp_dev->msix_enabled = 0;
120
	}
121

122
	vp_dev->msix_vectors = 0;
123 124 125 126 127
	vp_dev->msix_used_vectors = 0;
	kfree(vp_dev->msix_names);
	vp_dev->msix_names = NULL;
	kfree(vp_dev->msix_entries);
	vp_dev->msix_entries = NULL;
128 129
	kfree(vp_dev->msix_affinity_masks);
	vp_dev->msix_affinity_masks = NULL;
130 131
}

R
Rusty Russell 已提交
132 133
static int vp_request_msix_vectors(struct virtio_device *vdev, int nvectors,
				   bool per_vq_vectors)
134 135 136 137 138
{
	struct virtio_pci_device *vp_dev = to_vp_device(vdev);
	const char *name = dev_name(&vp_dev->vdev.dev);
	unsigned i, v;
	int err = -ENOMEM;
M
Michael S. Tsirkin 已提交
139

140 141
	vp_dev->msix_vectors = nvectors;

142 143 144
	vp_dev->msix_entries = kmalloc(nvectors * sizeof *vp_dev->msix_entries,
				       GFP_KERNEL);
	if (!vp_dev->msix_entries)
145
		goto error;
146 147 148
	vp_dev->msix_names = kmalloc(nvectors * sizeof *vp_dev->msix_names,
				     GFP_KERNEL);
	if (!vp_dev->msix_names)
149
		goto error;
150 151 152 153 154 155 156 157 158
	vp_dev->msix_affinity_masks
		= kzalloc(nvectors * sizeof *vp_dev->msix_affinity_masks,
			  GFP_KERNEL);
	if (!vp_dev->msix_affinity_masks)
		goto error;
	for (i = 0; i < nvectors; ++i)
		if (!alloc_cpumask_var(&vp_dev->msix_affinity_masks[i],
					GFP_KERNEL))
			goto error;
159 160 161 162

	for (i = 0; i < nvectors; ++i)
		vp_dev->msix_entries[i].entry = i;

163 164
	err = pci_enable_msix_exact(vp_dev->pci_dev,
				    vp_dev->msix_entries, nvectors);
M
Michael S. Tsirkin 已提交
165 166 167 168 169 170 171 172 173 174 175 176 177 178
	if (err)
		goto error;
	vp_dev->msix_enabled = 1;

	/* Set the vector used for configuration */
	v = vp_dev->msix_used_vectors;
	snprintf(vp_dev->msix_names[v], sizeof *vp_dev->msix_names,
		 "%s-config", name);
	err = request_irq(vp_dev->msix_entries[v].vector,
			  vp_config_changed, 0, vp_dev->msix_names[v],
			  vp_dev);
	if (err)
		goto error;
	++vp_dev->msix_used_vectors;
179

180
	v = vp_dev->config_vector(vp_dev, v);
M
Michael S. Tsirkin 已提交
181 182 183 184
	/* Verify we had enough resources to assign the vector */
	if (v == VIRTIO_MSI_NO_VECTOR) {
		err = -EBUSY;
		goto error;
185 186
	}

M
Michael S. Tsirkin 已提交
187
	if (!per_vq_vectors) {
188 189 190 191 192 193 194 195
		/* Shared vector for all VQs */
		v = vp_dev->msix_used_vectors;
		snprintf(vp_dev->msix_names[v], sizeof *vp_dev->msix_names,
			 "%s-virtqueues", name);
		err = request_irq(vp_dev->msix_entries[v].vector,
				  vp_vring_interrupt, 0, vp_dev->msix_names[v],
				  vp_dev);
		if (err)
196
			goto error;
197 198 199
		++vp_dev->msix_used_vectors;
	}
	return 0;
200
error:
201 202 203 204
	vp_free_vectors(vdev);
	return err;
}

R
Rusty Russell 已提交
205 206 207 208 209 210 211 212 213 214 215 216
static int vp_request_intx(struct virtio_device *vdev)
{
	int err;
	struct virtio_pci_device *vp_dev = to_vp_device(vdev);

	err = request_irq(vp_dev->pci_dev->irq, vp_interrupt,
			  IRQF_SHARED, dev_name(&vdev->dev), vp_dev);
	if (!err)
		vp_dev->intx_enabled = 1;
	return err;
}

217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235
static struct virtqueue *vp_setup_vq(struct virtio_device *vdev, unsigned index,
				     void (*callback)(struct virtqueue *vq),
				     const char *name,
				     u16 msix_vec)
{
	struct virtio_pci_device *vp_dev = to_vp_device(vdev);
	struct virtio_pci_vq_info *info = kmalloc(sizeof *info, GFP_KERNEL);
	struct virtqueue *vq;
	unsigned long flags;

	/* fill out our structure that represents an active queue */
	if (!info)
		return ERR_PTR(-ENOMEM);

	vq = vp_dev->setup_vq(vp_dev, info, index, callback, name, msix_vec);
	if (IS_ERR(vq))
		goto out_info;

	info->vq = vq;
236 237 238 239 240 241 242
	if (callback) {
		spin_lock_irqsave(&vp_dev->lock, flags);
		list_add(&info->node, &vp_dev->virtqueues);
		spin_unlock_irqrestore(&vp_dev->lock, flags);
	} else {
		INIT_LIST_HEAD(&info->node);
	}
A
Anthony Liguori 已提交
243

M
Michael S. Tsirkin 已提交
244
	vp_dev->vqs[index] = info;
A
Anthony Liguori 已提交
245 246 247 248
	return vq;

out_info:
	kfree(info);
249
	return vq;
A
Anthony Liguori 已提交
250 251
}

252 253 254 255 256 257 258 259 260 261 262
static void vp_del_vq(struct virtqueue *vq)
{
	struct virtio_pci_device *vp_dev = to_vp_device(vq->vdev);
	struct virtio_pci_vq_info *info = vp_dev->vqs[vq->index];
	unsigned long flags;

	spin_lock_irqsave(&vp_dev->lock, flags);
	list_del(&info->node);
	spin_unlock_irqrestore(&vp_dev->lock, flags);

	vp_dev->del_vq(info);
A
Anthony Liguori 已提交
263 264 265
	kfree(info);
}

266
/* the config->del_vqs() implementation */
267
void vp_del_vqs(struct virtio_device *vdev)
268
{
M
Michael S. Tsirkin 已提交
269
	struct virtio_pci_device *vp_dev = to_vp_device(vdev);
270
	struct virtqueue *vq, *n;
M
Michael S. Tsirkin 已提交
271
	struct virtio_pci_vq_info *info;
272

M
Michael S. Tsirkin 已提交
273
	list_for_each_entry_safe(vq, n, &vdev->vqs, list) {
M
Michael S. Tsirkin 已提交
274
		info = vp_dev->vqs[vq->index];
275 276
		if (vp_dev->per_vq_vectors &&
			info->msix_vector != VIRTIO_MSI_NO_VECTOR)
R
Rusty Russell 已提交
277 278
			free_irq(vp_dev->msix_entries[info->msix_vector].vector,
				 vq);
279
		vp_del_vq(vq);
M
Michael S. Tsirkin 已提交
280 281
	}
	vp_dev->per_vq_vectors = false;
282 283

	vp_free_vectors(vdev);
M
Michael S. Tsirkin 已提交
284
	kfree(vp_dev->vqs);
285
	vp_dev->vqs = NULL;
286 287
}

M
Michael S. Tsirkin 已提交
288 289 290 291
static int vp_try_to_find_vqs(struct virtio_device *vdev, unsigned nvqs,
			      struct virtqueue *vqs[],
			      vq_callback_t *callbacks[],
			      const char *names[],
R
Rusty Russell 已提交
292
			      bool use_msix,
M
Michael S. Tsirkin 已提交
293
			      bool per_vq_vectors)
294
{
M
Michael S. Tsirkin 已提交
295
	struct virtio_pci_device *vp_dev = to_vp_device(vdev);
R
Rusty Russell 已提交
296 297
	u16 msix_vec;
	int i, err, nvectors, allocated_vectors;
298

M
Michael S. Tsirkin 已提交
299 300 301 302
	vp_dev->vqs = kmalloc(nvqs * sizeof *vp_dev->vqs, GFP_KERNEL);
	if (!vp_dev->vqs)
		return -ENOMEM;

R
Rusty Russell 已提交
303 304 305 306
	if (!use_msix) {
		/* Old style: one normal interrupt for change and all vqs. */
		err = vp_request_intx(vdev);
		if (err)
M
Michael S. Tsirkin 已提交
307
			goto error_find;
R
Rusty Russell 已提交
308 309 310 311 312 313 314 315 316 317 318 319 320 321
	} else {
		if (per_vq_vectors) {
			/* Best option: one for change interrupt, one per vq. */
			nvectors = 1;
			for (i = 0; i < nvqs; ++i)
				if (callbacks[i])
					++nvectors;
		} else {
			/* Second best: one for change, shared for all vqs. */
			nvectors = 2;
		}

		err = vp_request_msix_vectors(vdev, nvectors, per_vq_vectors);
		if (err)
M
Michael S. Tsirkin 已提交
322
			goto error_find;
R
Rusty Russell 已提交
323
	}
324

M
Michael S. Tsirkin 已提交
325 326
	vp_dev->per_vq_vectors = per_vq_vectors;
	allocated_vectors = vp_dev->msix_used_vectors;
327
	for (i = 0; i < nvqs; ++i) {
328 329 330 331
		if (!names[i]) {
			vqs[i] = NULL;
			continue;
		} else if (!callbacks[i] || !vp_dev->msix_enabled)
R
Rusty Russell 已提交
332
			msix_vec = VIRTIO_MSI_NO_VECTOR;
M
Michael S. Tsirkin 已提交
333
		else if (vp_dev->per_vq_vectors)
R
Rusty Russell 已提交
334
			msix_vec = allocated_vectors++;
M
Michael S. Tsirkin 已提交
335
		else
R
Rusty Russell 已提交
336
			msix_vec = VP_MSIX_VQ_VECTOR;
337
		vqs[i] = vp_setup_vq(vdev, i, callbacks[i], names[i], msix_vec);
M
Michael S. Tsirkin 已提交
338 339
		if (IS_ERR(vqs[i])) {
			err = PTR_ERR(vqs[i]);
340
			goto error_find;
M
Michael S. Tsirkin 已提交
341
		}
342 343 344 345

		if (!vp_dev->per_vq_vectors || msix_vec == VIRTIO_MSI_NO_VECTOR)
			continue;

M
Michael S. Tsirkin 已提交
346
		/* allocate per-vq irq if available and necessary */
347 348 349 350 351 352 353 354 355 356 357
		snprintf(vp_dev->msix_names[msix_vec],
			 sizeof *vp_dev->msix_names,
			 "%s-%s",
			 dev_name(&vp_dev->vdev.dev), names[i]);
		err = request_irq(vp_dev->msix_entries[msix_vec].vector,
				  vring_interrupt, 0,
				  vp_dev->msix_names[msix_vec],
				  vqs[i]);
		if (err) {
			vp_del_vq(vqs[i]);
			goto error_find;
M
Michael S. Tsirkin 已提交
358
		}
359 360 361
	}
	return 0;

362
error_find:
363
	vp_del_vqs(vdev);
M
Michael S. Tsirkin 已提交
364 365 366 367
	return err;
}

/* the config->find_vqs() implementation */
368 369 370 371
int vp_find_vqs(struct virtio_device *vdev, unsigned nvqs,
		struct virtqueue *vqs[],
		vq_callback_t *callbacks[],
		const char *names[])
M
Michael S. Tsirkin 已提交
372
{
R
Rusty Russell 已提交
373
	int err;
M
Michael S. Tsirkin 已提交
374

R
Rusty Russell 已提交
375 376
	/* Try MSI-X with one vector per queue. */
	err = vp_try_to_find_vqs(vdev, nvqs, vqs, callbacks, names, true, true);
M
Michael S. Tsirkin 已提交
377 378
	if (!err)
		return 0;
R
Rusty Russell 已提交
379
	/* Fallback: MSI-X with one vector for config, one shared for queues. */
M
Michael S. Tsirkin 已提交
380
	err = vp_try_to_find_vqs(vdev, nvqs, vqs, callbacks, names,
R
Rusty Russell 已提交
381
				 true, false);
M
Michael S. Tsirkin 已提交
382 383 384
	if (!err)
		return 0;
	/* Finally fall back to regular interrupts. */
R
Rusty Russell 已提交
385 386
	return vp_try_to_find_vqs(vdev, nvqs, vqs, callbacks, names,
				  false, false);
387 388
}

389
const char *vp_bus_name(struct virtio_device *vdev)
390 391 392 393 394 395
{
	struct virtio_pci_device *vp_dev = to_vp_device(vdev);

	return pci_name(vp_dev->pci_dev);
}

396 397 398 399 400
/* Setup the affinity for a virtqueue:
 * - force the affinity for per vq vector
 * - OR over all affinities for shared MSI
 * - ignore the affinity request if we're using INTX
 */
401
int vp_set_vq_affinity(struct virtqueue *vq, int cpu)
402 403 404
{
	struct virtio_device *vdev = vq->vdev;
	struct virtio_pci_device *vp_dev = to_vp_device(vdev);
M
Michael S. Tsirkin 已提交
405
	struct virtio_pci_vq_info *info = vp_dev->vqs[vq->index];
406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424
	struct cpumask *mask;
	unsigned int irq;

	if (!vq->callback)
		return -EINVAL;

	if (vp_dev->msix_enabled) {
		mask = vp_dev->msix_affinity_masks[info->msix_vector];
		irq = vp_dev->msix_entries[info->msix_vector].vector;
		if (cpu == -1)
			irq_set_affinity_hint(irq, NULL);
		else {
			cpumask_set_cpu(cpu, mask);
			irq_set_affinity_hint(irq, mask);
		}
	}
	return 0;
}

425
#ifdef CONFIG_PM_SLEEP
426 427 428 429 430 431
static int virtio_pci_freeze(struct device *dev)
{
	struct pci_dev *pci_dev = to_pci_dev(dev);
	struct virtio_pci_device *vp_dev = pci_get_drvdata(pci_dev);
	int ret;

432
	ret = virtio_device_freeze(&vp_dev->vdev);
433 434 435 436 437 438

	if (!ret)
		pci_disable_device(pci_dev);
	return ret;
}

A
Amit Shah 已提交
439
static int virtio_pci_restore(struct device *dev)
440 441 442 443 444 445 446 447
{
	struct pci_dev *pci_dev = to_pci_dev(dev);
	struct virtio_pci_device *vp_dev = pci_get_drvdata(pci_dev);
	int ret;

	ret = pci_enable_device(pci_dev);
	if (ret)
		return ret;
A
Amit Shah 已提交
448

449
	pci_set_master(pci_dev);
450
	return virtio_device_restore(&vp_dev->vdev);
451 452
}

453
static const struct dev_pm_ops virtio_pci_pm_ops = {
454
	SET_SYSTEM_SLEEP_PM_OPS(virtio_pci_freeze, virtio_pci_restore)
A
Amit Shah 已提交
455
};
A
Anthony Liguori 已提交
456
#endif
457 458 459 460 461 462 463 464 465 466


/* Qumranet donated their vendor ID for devices 0x1000 thru 0x10FF. */
static const struct pci_device_id virtio_pci_id_table[] = {
	{ PCI_DEVICE(0x1af4, PCI_ANY_ID) },
	{ 0 }
};

MODULE_DEVICE_TABLE(pci, virtio_pci_id_table);

467 468 469 470 471 472 473 474 475 476 477
static void virtio_pci_release_dev(struct device *_d)
{
	struct virtio_device *vdev = dev_to_virtio(_d);
	struct virtio_pci_device *vp_dev = to_vp_device(vdev);

	/* As struct device is a kobject, it's not safe to
	 * free the memory (including the reference counter itself)
	 * until it's release callback. */
	kfree(vp_dev);
}

478 479 480
static int virtio_pci_probe(struct pci_dev *pci_dev,
			    const struct pci_device_id *id)
{
481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528
	struct virtio_pci_device *vp_dev;
	int rc;

	/* allocate our structure and fill it out */
	vp_dev = kzalloc(sizeof(struct virtio_pci_device), GFP_KERNEL);
	if (!vp_dev)
		return -ENOMEM;

	pci_set_drvdata(pci_dev, vp_dev);
	vp_dev->vdev.dev.parent = &pci_dev->dev;
	vp_dev->vdev.dev.release = virtio_pci_release_dev;
	vp_dev->pci_dev = pci_dev;
	INIT_LIST_HEAD(&vp_dev->virtqueues);
	spin_lock_init(&vp_dev->lock);

	/* Disable MSI/MSIX to bring device to a known good state. */
	pci_msi_off(pci_dev);

	/* enable the device */
	rc = pci_enable_device(pci_dev);
	if (rc)
		goto err_enable_device;

	rc = pci_request_regions(pci_dev, "virtio-pci");
	if (rc)
		goto err_request_regions;

	rc = virtio_pci_legacy_probe(vp_dev);
	if (rc)
		goto err_probe;

	pci_set_master(pci_dev);

	rc = register_virtio_device(&vp_dev->vdev);
	if (rc)
		goto err_register;

	return 0;

err_register:
	virtio_pci_legacy_remove(vp_dev);
err_probe:
	pci_release_regions(pci_dev);
err_request_regions:
	pci_disable_device(pci_dev);
err_enable_device:
	kfree(vp_dev);
	return rc;
529 530 531 532
}

static void virtio_pci_remove(struct pci_dev *pci_dev)
{
533 534 535 536 537 538 539 540
	struct virtio_pci_device *vp_dev = pci_get_drvdata(pci_dev);

	unregister_virtio_device(&vp_dev->vdev);

	virtio_pci_legacy_remove(pci_dev);

	pci_release_regions(pci_dev);
	pci_disable_device(pci_dev);
541 542 543 544 545 546 547 548 549 550 551 552 553
}

static struct pci_driver virtio_pci_driver = {
	.name		= "virtio-pci",
	.id_table	= virtio_pci_id_table,
	.probe		= virtio_pci_probe,
	.remove		= virtio_pci_remove,
#ifdef CONFIG_PM_SLEEP
	.driver.pm	= &virtio_pci_pm_ops,
#endif
};

module_pci_driver(virtio_pci_driver);
H
Herbert Xu 已提交
554 555 556 557 558

MODULE_AUTHOR("Anthony Liguori <aliguori@us.ibm.com>");
MODULE_DESCRIPTION("virtio-pci");
MODULE_LICENSE("GPL");
MODULE_VERSION("1");