virtio_pci_common.c 12.8 KB
Newer Older
A
Anthony Liguori 已提交
1
/*
2
 * Virtio PCI driver - common functionality for all device versions
A
Anthony Liguori 已提交
3 4 5 6 7
 *
 * This module allows virtio devices to be used over a virtual PCI device.
 * This can be used with QEMU based VMMs like KVM or Xen.
 *
 * Copyright IBM Corp. 2007
8
 * Copyright Red Hat, Inc. 2014
A
Anthony Liguori 已提交
9 10 11
 *
 * Authors:
 *  Anthony Liguori  <aliguori@us.ibm.com>
12 13
 *  Rusty Russell <rusty@rustcorp.com.au>
 *  Michael S. Tsirkin <mst@redhat.com>
A
Anthony Liguori 已提交
14 15 16 17 18 19
 *
 * This work is licensed under the terms of the GNU GPL, version 2 or later.
 * See the COPYING file in the top-level directory.
 *
 */

20
#include "virtio_pci_common.h"
A
Anthony Liguori 已提交
21

22
/* wait for pending irq handlers */
23
void vp_synchronize_vectors(struct virtio_device *vdev)
24 25 26 27 28 29 30 31 32 33 34
{
	struct virtio_pci_device *vp_dev = to_vp_device(vdev);
	int i;

	if (vp_dev->intx_enabled)
		synchronize_irq(vp_dev->pci_dev->irq);

	for (i = 0; i < vp_dev->msix_vectors; ++i)
		synchronize_irq(vp_dev->msix_entries[i].vector);
}

A
Anthony Liguori 已提交
35
/* the notify function used when creating a virt queue */
36
bool vp_notify(struct virtqueue *vq)
A
Anthony Liguori 已提交
37 38 39
{
	/* we write the queue's selector into the notification register to
	 * signal the other end */
40
	iowrite16(vq->index, (void __iomem *)vq->priv);
41
	return true;
A
Anthony Liguori 已提交
42 43
}

44 45 46 47 48
/* Handle a configuration change: Tell driver if it wants to know. */
static irqreturn_t vp_config_changed(int irq, void *opaque)
{
	struct virtio_pci_device *vp_dev = opaque;

49
	virtio_config_changed(&vp_dev->vdev);
50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70
	return IRQ_HANDLED;
}

/* Notify all virtqueues on an interrupt. */
static irqreturn_t vp_vring_interrupt(int irq, void *opaque)
{
	struct virtio_pci_device *vp_dev = opaque;
	struct virtio_pci_vq_info *info;
	irqreturn_t ret = IRQ_NONE;
	unsigned long flags;

	spin_lock_irqsave(&vp_dev->lock, flags);
	list_for_each_entry(info, &vp_dev->virtqueues, node) {
		if (vring_interrupt(irq, info->vq) == IRQ_HANDLED)
			ret = IRQ_HANDLED;
	}
	spin_unlock_irqrestore(&vp_dev->lock, flags);

	return ret;
}

A
Anthony Liguori 已提交
71 72 73 74 75 76 77 78 79 80 81 82 83
/* A small wrapper to also acknowledge the interrupt when it's handled.
 * I really need an EIO hook for the vring so I can ack the interrupt once we
 * know that we'll be handling the IRQ but before we invoke the callback since
 * the callback may notify the host which results in the host attempting to
 * raise an interrupt that we would then mask once we acknowledged the
 * interrupt. */
static irqreturn_t vp_interrupt(int irq, void *opaque)
{
	struct virtio_pci_device *vp_dev = opaque;
	u8 isr;

	/* reading the ISR has the effect of also clearing it so it's very
	 * important to save off the value. */
M
Michael S. Tsirkin 已提交
84
	isr = ioread8(vp_dev->isr);
A
Anthony Liguori 已提交
85 86 87 88 89 90

	/* It's definitely not us if the ISR was not high */
	if (!isr)
		return IRQ_NONE;

	/* Configuration change?  Tell driver if it wants to know. */
91 92
	if (isr & VIRTIO_PCI_ISR_CONFIG)
		vp_config_changed(irq, opaque);
A
Anthony Liguori 已提交
93

94
	return vp_vring_interrupt(irq, opaque);
A
Anthony Liguori 已提交
95 96
}

97 98 99 100 101 102 103 104 105 106 107 108 109
static void vp_free_vectors(struct virtio_device *vdev)
{
	struct virtio_pci_device *vp_dev = to_vp_device(vdev);
	int i;

	if (vp_dev->intx_enabled) {
		free_irq(vp_dev->pci_dev->irq, vp_dev);
		vp_dev->intx_enabled = 0;
	}

	for (i = 0; i < vp_dev->msix_used_vectors; ++i)
		free_irq(vp_dev->msix_entries[i].vector, vp_dev);

110 111 112 113
	for (i = 0; i < vp_dev->msix_vectors; i++)
		if (vp_dev->msix_affinity_masks[i])
			free_cpumask_var(vp_dev->msix_affinity_masks[i]);

114 115
	if (vp_dev->msix_enabled) {
		/* Disable the vector used for configuration */
116
		vp_dev->config_vector(vp_dev, VIRTIO_MSI_NO_VECTOR);
117 118

		pci_disable_msix(vp_dev->pci_dev);
119
		vp_dev->msix_enabled = 0;
120
	}
121

122
	vp_dev->msix_vectors = 0;
123 124 125 126 127
	vp_dev->msix_used_vectors = 0;
	kfree(vp_dev->msix_names);
	vp_dev->msix_names = NULL;
	kfree(vp_dev->msix_entries);
	vp_dev->msix_entries = NULL;
128 129
	kfree(vp_dev->msix_affinity_masks);
	vp_dev->msix_affinity_masks = NULL;
130 131
}

R
Rusty Russell 已提交
132 133
static int vp_request_msix_vectors(struct virtio_device *vdev, int nvectors,
				   bool per_vq_vectors)
134 135 136 137 138
{
	struct virtio_pci_device *vp_dev = to_vp_device(vdev);
	const char *name = dev_name(&vp_dev->vdev.dev);
	unsigned i, v;
	int err = -ENOMEM;
M
Michael S. Tsirkin 已提交
139

140 141
	vp_dev->msix_vectors = nvectors;

142 143 144
	vp_dev->msix_entries = kmalloc(nvectors * sizeof *vp_dev->msix_entries,
				       GFP_KERNEL);
	if (!vp_dev->msix_entries)
145
		goto error;
146 147 148
	vp_dev->msix_names = kmalloc(nvectors * sizeof *vp_dev->msix_names,
				     GFP_KERNEL);
	if (!vp_dev->msix_names)
149
		goto error;
150 151 152 153 154 155 156 157 158
	vp_dev->msix_affinity_masks
		= kzalloc(nvectors * sizeof *vp_dev->msix_affinity_masks,
			  GFP_KERNEL);
	if (!vp_dev->msix_affinity_masks)
		goto error;
	for (i = 0; i < nvectors; ++i)
		if (!alloc_cpumask_var(&vp_dev->msix_affinity_masks[i],
					GFP_KERNEL))
			goto error;
159 160 161 162

	for (i = 0; i < nvectors; ++i)
		vp_dev->msix_entries[i].entry = i;

163 164
	err = pci_enable_msix_exact(vp_dev->pci_dev,
				    vp_dev->msix_entries, nvectors);
M
Michael S. Tsirkin 已提交
165 166 167 168 169 170 171 172 173 174 175 176 177 178
	if (err)
		goto error;
	vp_dev->msix_enabled = 1;

	/* Set the vector used for configuration */
	v = vp_dev->msix_used_vectors;
	snprintf(vp_dev->msix_names[v], sizeof *vp_dev->msix_names,
		 "%s-config", name);
	err = request_irq(vp_dev->msix_entries[v].vector,
			  vp_config_changed, 0, vp_dev->msix_names[v],
			  vp_dev);
	if (err)
		goto error;
	++vp_dev->msix_used_vectors;
179

180
	v = vp_dev->config_vector(vp_dev, v);
M
Michael S. Tsirkin 已提交
181 182 183 184
	/* Verify we had enough resources to assign the vector */
	if (v == VIRTIO_MSI_NO_VECTOR) {
		err = -EBUSY;
		goto error;
185 186
	}

M
Michael S. Tsirkin 已提交
187
	if (!per_vq_vectors) {
188 189 190 191 192 193 194 195
		/* Shared vector for all VQs */
		v = vp_dev->msix_used_vectors;
		snprintf(vp_dev->msix_names[v], sizeof *vp_dev->msix_names,
			 "%s-virtqueues", name);
		err = request_irq(vp_dev->msix_entries[v].vector,
				  vp_vring_interrupt, 0, vp_dev->msix_names[v],
				  vp_dev);
		if (err)
196
			goto error;
197 198 199
		++vp_dev->msix_used_vectors;
	}
	return 0;
200
error:
201 202 203 204
	vp_free_vectors(vdev);
	return err;
}

R
Rusty Russell 已提交
205 206 207 208 209 210 211 212 213 214 215 216
static int vp_request_intx(struct virtio_device *vdev)
{
	int err;
	struct virtio_pci_device *vp_dev = to_vp_device(vdev);

	err = request_irq(vp_dev->pci_dev->irq, vp_interrupt,
			  IRQF_SHARED, dev_name(&vdev->dev), vp_dev);
	if (!err)
		vp_dev->intx_enabled = 1;
	return err;
}

217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235
static struct virtqueue *vp_setup_vq(struct virtio_device *vdev, unsigned index,
				     void (*callback)(struct virtqueue *vq),
				     const char *name,
				     u16 msix_vec)
{
	struct virtio_pci_device *vp_dev = to_vp_device(vdev);
	struct virtio_pci_vq_info *info = kmalloc(sizeof *info, GFP_KERNEL);
	struct virtqueue *vq;
	unsigned long flags;

	/* fill out our structure that represents an active queue */
	if (!info)
		return ERR_PTR(-ENOMEM);

	vq = vp_dev->setup_vq(vp_dev, info, index, callback, name, msix_vec);
	if (IS_ERR(vq))
		goto out_info;

	info->vq = vq;
236 237 238 239 240 241 242
	if (callback) {
		spin_lock_irqsave(&vp_dev->lock, flags);
		list_add(&info->node, &vp_dev->virtqueues);
		spin_unlock_irqrestore(&vp_dev->lock, flags);
	} else {
		INIT_LIST_HEAD(&info->node);
	}
A
Anthony Liguori 已提交
243

M
Michael S. Tsirkin 已提交
244
	vp_dev->vqs[index] = info;
A
Anthony Liguori 已提交
245 246 247 248
	return vq;

out_info:
	kfree(info);
249
	return vq;
A
Anthony Liguori 已提交
250 251
}

252 253 254 255 256 257 258 259 260 261 262
static void vp_del_vq(struct virtqueue *vq)
{
	struct virtio_pci_device *vp_dev = to_vp_device(vq->vdev);
	struct virtio_pci_vq_info *info = vp_dev->vqs[vq->index];
	unsigned long flags;

	spin_lock_irqsave(&vp_dev->lock, flags);
	list_del(&info->node);
	spin_unlock_irqrestore(&vp_dev->lock, flags);

	vp_dev->del_vq(info);
A
Anthony Liguori 已提交
263 264 265
	kfree(info);
}

266
/* the config->del_vqs() implementation */
267
void vp_del_vqs(struct virtio_device *vdev)
268
{
M
Michael S. Tsirkin 已提交
269
	struct virtio_pci_device *vp_dev = to_vp_device(vdev);
270
	struct virtqueue *vq, *n;
M
Michael S. Tsirkin 已提交
271
	struct virtio_pci_vq_info *info;
272

M
Michael S. Tsirkin 已提交
273
	list_for_each_entry_safe(vq, n, &vdev->vqs, list) {
M
Michael S. Tsirkin 已提交
274
		info = vp_dev->vqs[vq->index];
275 276
		if (vp_dev->per_vq_vectors &&
			info->msix_vector != VIRTIO_MSI_NO_VECTOR)
R
Rusty Russell 已提交
277 278
			free_irq(vp_dev->msix_entries[info->msix_vector].vector,
				 vq);
279
		vp_del_vq(vq);
M
Michael S. Tsirkin 已提交
280 281
	}
	vp_dev->per_vq_vectors = false;
282 283

	vp_free_vectors(vdev);
M
Michael S. Tsirkin 已提交
284
	kfree(vp_dev->vqs);
285 286
}

M
Michael S. Tsirkin 已提交
287 288 289 290
static int vp_try_to_find_vqs(struct virtio_device *vdev, unsigned nvqs,
			      struct virtqueue *vqs[],
			      vq_callback_t *callbacks[],
			      const char *names[],
R
Rusty Russell 已提交
291
			      bool use_msix,
M
Michael S. Tsirkin 已提交
292
			      bool per_vq_vectors)
293
{
M
Michael S. Tsirkin 已提交
294
	struct virtio_pci_device *vp_dev = to_vp_device(vdev);
R
Rusty Russell 已提交
295 296
	u16 msix_vec;
	int i, err, nvectors, allocated_vectors;
297

M
Michael S. Tsirkin 已提交
298 299 300 301
	vp_dev->vqs = kmalloc(nvqs * sizeof *vp_dev->vqs, GFP_KERNEL);
	if (!vp_dev->vqs)
		return -ENOMEM;

R
Rusty Russell 已提交
302 303 304 305
	if (!use_msix) {
		/* Old style: one normal interrupt for change and all vqs. */
		err = vp_request_intx(vdev);
		if (err)
M
Michael S. Tsirkin 已提交
306
			goto error_find;
R
Rusty Russell 已提交
307 308 309 310 311 312 313 314 315 316 317 318 319 320
	} else {
		if (per_vq_vectors) {
			/* Best option: one for change interrupt, one per vq. */
			nvectors = 1;
			for (i = 0; i < nvqs; ++i)
				if (callbacks[i])
					++nvectors;
		} else {
			/* Second best: one for change, shared for all vqs. */
			nvectors = 2;
		}

		err = vp_request_msix_vectors(vdev, nvectors, per_vq_vectors);
		if (err)
M
Michael S. Tsirkin 已提交
321
			goto error_find;
R
Rusty Russell 已提交
322
	}
323

M
Michael S. Tsirkin 已提交
324 325
	vp_dev->per_vq_vectors = per_vq_vectors;
	allocated_vectors = vp_dev->msix_used_vectors;
326
	for (i = 0; i < nvqs; ++i) {
327 328 329 330
		if (!names[i]) {
			vqs[i] = NULL;
			continue;
		} else if (!callbacks[i] || !vp_dev->msix_enabled)
R
Rusty Russell 已提交
331
			msix_vec = VIRTIO_MSI_NO_VECTOR;
M
Michael S. Tsirkin 已提交
332
		else if (vp_dev->per_vq_vectors)
R
Rusty Russell 已提交
333
			msix_vec = allocated_vectors++;
M
Michael S. Tsirkin 已提交
334
		else
R
Rusty Russell 已提交
335
			msix_vec = VP_MSIX_VQ_VECTOR;
336
		vqs[i] = vp_setup_vq(vdev, i, callbacks[i], names[i], msix_vec);
M
Michael S. Tsirkin 已提交
337 338
		if (IS_ERR(vqs[i])) {
			err = PTR_ERR(vqs[i]);
339
			goto error_find;
M
Michael S. Tsirkin 已提交
340
		}
341 342 343 344

		if (!vp_dev->per_vq_vectors || msix_vec == VIRTIO_MSI_NO_VECTOR)
			continue;

M
Michael S. Tsirkin 已提交
345
		/* allocate per-vq irq if available and necessary */
346 347 348 349 350 351 352 353 354 355 356
		snprintf(vp_dev->msix_names[msix_vec],
			 sizeof *vp_dev->msix_names,
			 "%s-%s",
			 dev_name(&vp_dev->vdev.dev), names[i]);
		err = request_irq(vp_dev->msix_entries[msix_vec].vector,
				  vring_interrupt, 0,
				  vp_dev->msix_names[msix_vec],
				  vqs[i]);
		if (err) {
			vp_del_vq(vqs[i]);
			goto error_find;
M
Michael S. Tsirkin 已提交
357
		}
358 359 360
	}
	return 0;

361
error_find:
362
	vp_del_vqs(vdev);
M
Michael S. Tsirkin 已提交
363 364 365 366
	return err;
}

/* the config->find_vqs() implementation */
367 368 369 370
int vp_find_vqs(struct virtio_device *vdev, unsigned nvqs,
		struct virtqueue *vqs[],
		vq_callback_t *callbacks[],
		const char *names[])
M
Michael S. Tsirkin 已提交
371
{
R
Rusty Russell 已提交
372
	int err;
M
Michael S. Tsirkin 已提交
373

R
Rusty Russell 已提交
374 375
	/* Try MSI-X with one vector per queue. */
	err = vp_try_to_find_vqs(vdev, nvqs, vqs, callbacks, names, true, true);
M
Michael S. Tsirkin 已提交
376 377
	if (!err)
		return 0;
R
Rusty Russell 已提交
378
	/* Fallback: MSI-X with one vector for config, one shared for queues. */
M
Michael S. Tsirkin 已提交
379
	err = vp_try_to_find_vqs(vdev, nvqs, vqs, callbacks, names,
R
Rusty Russell 已提交
380
				 true, false);
M
Michael S. Tsirkin 已提交
381 382 383
	if (!err)
		return 0;
	/* Finally fall back to regular interrupts. */
R
Rusty Russell 已提交
384 385
	return vp_try_to_find_vqs(vdev, nvqs, vqs, callbacks, names,
				  false, false);
386 387
}

388
const char *vp_bus_name(struct virtio_device *vdev)
389 390 391 392 393 394
{
	struct virtio_pci_device *vp_dev = to_vp_device(vdev);

	return pci_name(vp_dev->pci_dev);
}

395 396 397 398 399
/* Setup the affinity for a virtqueue:
 * - force the affinity for per vq vector
 * - OR over all affinities for shared MSI
 * - ignore the affinity request if we're using INTX
 */
400
int vp_set_vq_affinity(struct virtqueue *vq, int cpu)
401 402 403
{
	struct virtio_device *vdev = vq->vdev;
	struct virtio_pci_device *vp_dev = to_vp_device(vdev);
M
Michael S. Tsirkin 已提交
404
	struct virtio_pci_vq_info *info = vp_dev->vqs[vq->index];
405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423
	struct cpumask *mask;
	unsigned int irq;

	if (!vq->callback)
		return -EINVAL;

	if (vp_dev->msix_enabled) {
		mask = vp_dev->msix_affinity_masks[info->msix_vector];
		irq = vp_dev->msix_entries[info->msix_vector].vector;
		if (cpu == -1)
			irq_set_affinity_hint(irq, NULL);
		else {
			cpumask_set_cpu(cpu, mask);
			irq_set_affinity_hint(irq, mask);
		}
	}
	return 0;
}

424
void virtio_pci_release_dev(struct device *_d)
425
{
426 427 428 429 430
	/*
	 * No need for a release method as we allocate/free
	 * all devices together with the pci devices.
	 * Provide an empty one to avoid getting a warning from core.
	 */
431 432
}

433
#ifdef CONFIG_PM_SLEEP
434 435 436 437 438 439
static int virtio_pci_freeze(struct device *dev)
{
	struct pci_dev *pci_dev = to_pci_dev(dev);
	struct virtio_pci_device *vp_dev = pci_get_drvdata(pci_dev);
	int ret;

440
	ret = virtio_device_freeze(&vp_dev->vdev);
441 442 443 444 445 446

	if (!ret)
		pci_disable_device(pci_dev);
	return ret;
}

A
Amit Shah 已提交
447
static int virtio_pci_restore(struct device *dev)
448 449 450 451 452 453 454 455
{
	struct pci_dev *pci_dev = to_pci_dev(dev);
	struct virtio_pci_device *vp_dev = pci_get_drvdata(pci_dev);
	int ret;

	ret = pci_enable_device(pci_dev);
	if (ret)
		return ret;
A
Amit Shah 已提交
456

457
	pci_set_master(pci_dev);
458
	return virtio_device_restore(&vp_dev->vdev);
459 460
}

461
static const struct dev_pm_ops virtio_pci_pm_ops = {
462
	SET_SYSTEM_SLEEP_PM_OPS(virtio_pci_freeze, virtio_pci_restore)
A
Amit Shah 已提交
463
};
A
Anthony Liguori 已提交
464
#endif
465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496


/* Qumranet donated their vendor ID for devices 0x1000 thru 0x10FF. */
static const struct pci_device_id virtio_pci_id_table[] = {
	{ PCI_DEVICE(0x1af4, PCI_ANY_ID) },
	{ 0 }
};

MODULE_DEVICE_TABLE(pci, virtio_pci_id_table);

static int virtio_pci_probe(struct pci_dev *pci_dev,
			    const struct pci_device_id *id)
{
	return virtio_pci_legacy_probe(pci_dev, id);
}

static void virtio_pci_remove(struct pci_dev *pci_dev)
{
     virtio_pci_legacy_remove(pci_dev);
}

static struct pci_driver virtio_pci_driver = {
	.name		= "virtio-pci",
	.id_table	= virtio_pci_id_table,
	.probe		= virtio_pci_probe,
	.remove		= virtio_pci_remove,
#ifdef CONFIG_PM_SLEEP
	.driver.pm	= &virtio_pci_pm_ops,
#endif
};

module_pci_driver(virtio_pci_driver);
H
Herbert Xu 已提交
497 498 499 500 501

MODULE_AUTHOR("Anthony Liguori <aliguori@us.ibm.com>");
MODULE_DESCRIPTION("virtio-pci");
MODULE_LICENSE("GPL");
MODULE_VERSION("1");