提交 3a1d5384 编写于 作者: L Linus Torvalds

Merge tag 'for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mst/vhost

Pull virtio, vhost updates from Michael Tsirkin:
 "Fixes, features, performance:

   - new iommu device

   - vhost guest memory access using vmap (just meta-data for now)

   - minor fixes"

* tag 'for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mst/vhost:
  virtio-mmio: add error check for platform_get_irq
  scsi: virtio_scsi: Use struct_size() helper
  iommu/virtio: Add event queue
  iommu/virtio: Add probe request
  iommu: Add virtio-iommu driver
  PCI: OF: Initialize dev->fwnode appropriately
  of: Allow the iommu-map property to omit untranslated devices
  dt-bindings: virtio: Add virtio-pci-iommu node
  dt-bindings: virtio-mmio: Add IOMMU description
  vhost: fix clang build warning
  vhost: access vq metadata through kernel virtual address
  vhost: factor out setting vring addr and num
  vhost: introduce helpers to get the size of metadata area
  vhost: rename vq_iotlb_prefetch() to vq_meta_prefetch()
  vhost: fine grain userspace memory accessors
  vhost: generalize adding used elem
* virtio IOMMU PCI device
When virtio-iommu uses the PCI transport, its programming interface is
discovered dynamically by the PCI probing infrastructure. However the
device tree statically describes the relation between IOMMU and DMA
masters. Therefore, the PCI root complex that hosts the virtio-iommu
contains a child node representing the IOMMU device explicitly.
Required properties:
- compatible: Should be "virtio,pci-iommu"
- reg: PCI address of the IOMMU. As defined in the PCI Bus
Binding reference [1], the reg property is a five-cell
address encoded as (phys.hi phys.mid phys.lo size.hi
size.lo). phys.hi should contain the device's BDF as
0b00000000 bbbbbbbb dddddfff 00000000. The other cells
should be zero.
- #iommu-cells: Each platform DMA master managed by the IOMMU is assigned
an endpoint ID, described by the "iommus" property [2].
For virtio-iommu, #iommu-cells must be 1.
Notes:
- DMA from the IOMMU device isn't managed by another IOMMU. Therefore the
virtio-iommu node doesn't have an "iommus" property, and is omitted from
the iommu-map property of the root complex.
Example:
pcie@10000000 {
compatible = "pci-host-ecam-generic";
...
/* The IOMMU programming interface uses slot 00:01.0 */
iommu0: iommu@0008 {
compatible = "virtio,pci-iommu";
reg = <0x00000800 0 0 0 0>;
#iommu-cells = <1>;
};
/*
* The IOMMU manages all functions in this PCI domain except
* itself. Omit BDF 00:01.0.
*/
iommu-map = <0x0 &iommu0 0x0 0x8>
<0x9 &iommu0 0x9 0xfff7>;
};
pcie@20000000 {
compatible = "pci-host-ecam-generic";
...
/*
* The IOMMU also manages all functions from this domain,
* with endpoint IDs 0x10000 - 0x1ffff
*/
iommu-map = <0x0 &iommu0 0x10000 0x10000>;
};
ethernet@fe001000 {
...
/* The IOMMU manages this platform device with endpoint ID 0x20000 */
iommus = <&iommu0 0x20000>;
};
[1] Documentation/devicetree/bindings/pci/pci.txt
[2] Documentation/devicetree/bindings/iommu/iommu.txt
...@@ -8,10 +8,40 @@ Required properties: ...@@ -8,10 +8,40 @@ Required properties:
- reg: control registers base address and size including configuration space - reg: control registers base address and size including configuration space
- interrupts: interrupt generated by the device - interrupts: interrupt generated by the device
Required properties for virtio-iommu:
- #iommu-cells: When the node corresponds to a virtio-iommu device, it is
linked to DMA masters using the "iommus" or "iommu-map"
properties [1][2]. #iommu-cells specifies the size of the
"iommus" property. For virtio-iommu #iommu-cells must be
1, each cell describing a single endpoint ID.
Optional properties:
- iommus: If the device accesses memory through an IOMMU, it should
have an "iommus" property [1]. Since virtio-iommu itself
does not access memory through an IOMMU, the "virtio,mmio"
node cannot have both an "#iommu-cells" and an "iommus"
property.
Example: Example:
virtio_block@3000 { virtio_block@3000 {
compatible = "virtio,mmio"; compatible = "virtio,mmio";
reg = <0x3000 0x100>; reg = <0x3000 0x100>;
interrupts = <41>; interrupts = <41>;
/* Device has endpoint ID 23 */
iommus = <&viommu 23>
} }
viommu: iommu@3100 {
compatible = "virtio,mmio";
reg = <0x3100 0x100>;
interrupts = <42>;
#iommu-cells = <1>
}
[1] Documentation/devicetree/bindings/iommu/iommu.txt
[2] Documentation/devicetree/bindings/pci/pci-iommu.txt
...@@ -17107,6 +17107,13 @@ S: Maintained ...@@ -17107,6 +17107,13 @@ S: Maintained
F: drivers/virtio/virtio_input.c F: drivers/virtio/virtio_input.c
F: include/uapi/linux/virtio_input.h F: include/uapi/linux/virtio_input.h
VIRTIO IOMMU DRIVER
M: Jean-Philippe Brucker <jean-philippe.brucker@arm.com>
L: virtualization@lists.linux-foundation.org
S: Maintained
F: drivers/iommu/virtio-iommu.c
F: include/uapi/linux/virtio_iommu.h
VIRTUAL BOX GUEST DEVICE DRIVER VIRTUAL BOX GUEST DEVICE DRIVER
M: Hans de Goede <hdegoede@redhat.com> M: Hans de Goede <hdegoede@redhat.com>
M: Arnd Bergmann <arnd@arndb.de> M: Arnd Bergmann <arnd@arndb.de>
......
...@@ -473,4 +473,15 @@ config HYPERV_IOMMU ...@@ -473,4 +473,15 @@ config HYPERV_IOMMU
Stub IOMMU driver to handle IRQs as to allow Hyper-V Linux Stub IOMMU driver to handle IRQs as to allow Hyper-V Linux
guests to run with x2APIC mode enabled. guests to run with x2APIC mode enabled.
config VIRTIO_IOMMU
bool "Virtio IOMMU driver"
depends on VIRTIO=y
depends on ARM64
select IOMMU_API
select INTERVAL_TREE
help
Para-virtualised IOMMU driver with virtio.
Say Y here if you intend to run this kernel as a guest.
endif # IOMMU_SUPPORT endif # IOMMU_SUPPORT
...@@ -33,3 +33,4 @@ obj-$(CONFIG_FSL_PAMU) += fsl_pamu.o fsl_pamu_domain.o ...@@ -33,3 +33,4 @@ obj-$(CONFIG_FSL_PAMU) += fsl_pamu.o fsl_pamu_domain.o
obj-$(CONFIG_S390_IOMMU) += s390-iommu.o obj-$(CONFIG_S390_IOMMU) += s390-iommu.o
obj-$(CONFIG_QCOM_IOMMU) += qcom_iommu.o obj-$(CONFIG_QCOM_IOMMU) += qcom_iommu.o
obj-$(CONFIG_HYPERV_IOMMU) += hyperv-iommu.o obj-$(CONFIG_HYPERV_IOMMU) += hyperv-iommu.o
obj-$(CONFIG_VIRTIO_IOMMU) += virtio-iommu.o
此差异已折叠。
...@@ -2294,8 +2294,12 @@ int of_map_rid(struct device_node *np, u32 rid, ...@@ -2294,8 +2294,12 @@ int of_map_rid(struct device_node *np, u32 rid,
return 0; return 0;
} }
pr_err("%pOF: Invalid %s translation - no match for rid 0x%x on %pOF\n", pr_info("%pOF: no %s translation for rid 0x%x on %pOF\n", np, map_name,
np, map_name, rid, target && *target ? *target : NULL); rid, target && *target ? *target : NULL);
return -EFAULT;
/* Bypasses translation */
if (id_out)
*id_out = rid;
return 0;
} }
EXPORT_SYMBOL_GPL(of_map_rid); EXPORT_SYMBOL_GPL(of_map_rid);
...@@ -22,12 +22,15 @@ void pci_set_of_node(struct pci_dev *dev) ...@@ -22,12 +22,15 @@ void pci_set_of_node(struct pci_dev *dev)
return; return;
dev->dev.of_node = of_pci_find_child_device(dev->bus->dev.of_node, dev->dev.of_node = of_pci_find_child_device(dev->bus->dev.of_node,
dev->devfn); dev->devfn);
if (dev->dev.of_node)
dev->dev.fwnode = &dev->dev.of_node->fwnode;
} }
void pci_release_of_node(struct pci_dev *dev) void pci_release_of_node(struct pci_dev *dev)
{ {
of_node_put(dev->dev.of_node); of_node_put(dev->dev.of_node);
dev->dev.of_node = NULL; dev->dev.of_node = NULL;
dev->dev.fwnode = NULL;
} }
void pci_set_bus_of_node(struct pci_bus *bus) void pci_set_bus_of_node(struct pci_bus *bus)
...@@ -41,13 +44,18 @@ void pci_set_bus_of_node(struct pci_bus *bus) ...@@ -41,13 +44,18 @@ void pci_set_bus_of_node(struct pci_bus *bus)
if (node && of_property_read_bool(node, "external-facing")) if (node && of_property_read_bool(node, "external-facing"))
bus->self->untrusted = true; bus->self->untrusted = true;
} }
bus->dev.of_node = node; bus->dev.of_node = node;
if (bus->dev.of_node)
bus->dev.fwnode = &bus->dev.of_node->fwnode;
} }
void pci_release_bus_of_node(struct pci_bus *bus) void pci_release_bus_of_node(struct pci_bus *bus)
{ {
of_node_put(bus->dev.of_node); of_node_put(bus->dev.of_node);
bus->dev.of_node = NULL; bus->dev.of_node = NULL;
bus->dev.fwnode = NULL;
} }
struct device_node * __weak pcibios_get_phb_of_node(struct pci_bus *bus) struct device_node * __weak pcibios_get_phb_of_node(struct pci_bus *bus)
......
...@@ -792,7 +792,7 @@ static int virtscsi_probe(struct virtio_device *vdev) ...@@ -792,7 +792,7 @@ static int virtscsi_probe(struct virtio_device *vdev)
num_targets = virtscsi_config_get(vdev, max_target) + 1; num_targets = virtscsi_config_get(vdev, max_target) + 1;
shost = scsi_host_alloc(&virtscsi_host_template, shost = scsi_host_alloc(&virtscsi_host_template,
sizeof(*vscsi) + sizeof(vscsi->req_vqs[0]) * num_queues); struct_size(vscsi, req_vqs, num_queues));
if (!shost) if (!shost)
return -ENOMEM; return -ENOMEM;
......
...@@ -956,7 +956,7 @@ static void handle_tx(struct vhost_net *net) ...@@ -956,7 +956,7 @@ static void handle_tx(struct vhost_net *net)
if (!sock) if (!sock)
goto out; goto out;
if (!vq_iotlb_prefetch(vq)) if (!vq_meta_prefetch(vq))
goto out; goto out;
vhost_disable_notify(&net->dev, vq); vhost_disable_notify(&net->dev, vq);
...@@ -1125,7 +1125,7 @@ static void handle_rx(struct vhost_net *net) ...@@ -1125,7 +1125,7 @@ static void handle_rx(struct vhost_net *net)
if (!sock) if (!sock)
goto out; goto out;
if (!vq_iotlb_prefetch(vq)) if (!vq_meta_prefetch(vq))
goto out; goto out;
vhost_disable_notify(&net->dev, vq); vhost_disable_notify(&net->dev, vq);
......
此差异已折叠。
...@@ -12,6 +12,9 @@ ...@@ -12,6 +12,9 @@
#include <linux/virtio_config.h> #include <linux/virtio_config.h>
#include <linux/virtio_ring.h> #include <linux/virtio_ring.h>
#include <linux/atomic.h> #include <linux/atomic.h>
#include <linux/pagemap.h>
#include <linux/mmu_notifier.h>
#include <asm/cacheflush.h>
struct vhost_work; struct vhost_work;
typedef void (*vhost_work_fn_t)(struct vhost_work *work); typedef void (*vhost_work_fn_t)(struct vhost_work *work);
...@@ -80,6 +83,24 @@ enum vhost_uaddr_type { ...@@ -80,6 +83,24 @@ enum vhost_uaddr_type {
VHOST_NUM_ADDRS = 3, VHOST_NUM_ADDRS = 3,
}; };
struct vhost_map {
int npages;
void *addr;
struct page **pages;
};
struct vhost_uaddr {
unsigned long uaddr;
size_t size;
bool write;
};
#if defined(CONFIG_MMU_NOTIFIER) && ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE == 0
#define VHOST_ARCH_CAN_ACCEL_UACCESS 1
#else
#define VHOST_ARCH_CAN_ACCEL_UACCESS 0
#endif
/* The virtqueue structure describes a queue attached to a device. */ /* The virtqueue structure describes a queue attached to a device. */
struct vhost_virtqueue { struct vhost_virtqueue {
struct vhost_dev *dev; struct vhost_dev *dev;
...@@ -90,7 +111,22 @@ struct vhost_virtqueue { ...@@ -90,7 +111,22 @@ struct vhost_virtqueue {
struct vring_desc __user *desc; struct vring_desc __user *desc;
struct vring_avail __user *avail; struct vring_avail __user *avail;
struct vring_used __user *used; struct vring_used __user *used;
#if VHOST_ARCH_CAN_ACCEL_UACCESS
/* Read by memory accessors, modified by meta data
* prefetching, MMU notifier and vring ioctl().
* Synchonrized through mmu_lock (writers) and RCU (writers
* and readers).
*/
struct vhost_map __rcu *maps[VHOST_NUM_ADDRS];
/* Read by MMU notifier, modified by vring ioctl(),
* synchronized through MMU notifier
* registering/unregistering.
*/
struct vhost_uaddr uaddrs[VHOST_NUM_ADDRS];
#endif
const struct vhost_umem_node *meta_iotlb[VHOST_NUM_ADDRS]; const struct vhost_umem_node *meta_iotlb[VHOST_NUM_ADDRS];
struct file *kick; struct file *kick;
struct eventfd_ctx *call_ctx; struct eventfd_ctx *call_ctx;
struct eventfd_ctx *error_ctx; struct eventfd_ctx *error_ctx;
...@@ -145,6 +181,8 @@ struct vhost_virtqueue { ...@@ -145,6 +181,8 @@ struct vhost_virtqueue {
bool user_be; bool user_be;
#endif #endif
u32 busyloop_timeout; u32 busyloop_timeout;
spinlock_t mmu_lock;
int invalidate_count;
}; };
struct vhost_msg_node { struct vhost_msg_node {
...@@ -158,6 +196,9 @@ struct vhost_msg_node { ...@@ -158,6 +196,9 @@ struct vhost_msg_node {
struct vhost_dev { struct vhost_dev {
struct mm_struct *mm; struct mm_struct *mm;
#ifdef CONFIG_MMU_NOTIFIER
struct mmu_notifier mmu_notifier;
#endif
struct mutex mutex; struct mutex mutex;
struct vhost_virtqueue **vqs; struct vhost_virtqueue **vqs;
int nvqs; int nvqs;
...@@ -212,7 +253,7 @@ bool vhost_enable_notify(struct vhost_dev *, struct vhost_virtqueue *); ...@@ -212,7 +253,7 @@ bool vhost_enable_notify(struct vhost_dev *, struct vhost_virtqueue *);
int vhost_log_write(struct vhost_virtqueue *vq, struct vhost_log *log, int vhost_log_write(struct vhost_virtqueue *vq, struct vhost_log *log,
unsigned int log_num, u64 len, unsigned int log_num, u64 len,
struct iovec *iov, int count); struct iovec *iov, int count);
int vq_iotlb_prefetch(struct vhost_virtqueue *vq); int vq_meta_prefetch(struct vhost_virtqueue *vq);
struct vhost_msg_node *vhost_new_msg(struct vhost_virtqueue *vq, int type); struct vhost_msg_node *vhost_new_msg(struct vhost_virtqueue *vq, int type);
void vhost_enqueue_msg(struct vhost_dev *dev, void vhost_enqueue_msg(struct vhost_dev *dev,
......
...@@ -463,9 +463,14 @@ static int vm_find_vqs(struct virtio_device *vdev, unsigned nvqs, ...@@ -463,9 +463,14 @@ static int vm_find_vqs(struct virtio_device *vdev, unsigned nvqs,
struct irq_affinity *desc) struct irq_affinity *desc)
{ {
struct virtio_mmio_device *vm_dev = to_virtio_mmio_device(vdev); struct virtio_mmio_device *vm_dev = to_virtio_mmio_device(vdev);
unsigned int irq = platform_get_irq(vm_dev->pdev, 0); int irq = platform_get_irq(vm_dev->pdev, 0);
int i, err, queue_idx = 0; int i, err, queue_idx = 0;
if (irq < 0) {
dev_err(&vdev->dev, "Cannot get IRQ resource\n");
return irq;
}
err = request_irq(irq, vm_interrupt, IRQF_SHARED, err = request_irq(irq, vm_interrupt, IRQF_SHARED,
dev_name(&vdev->dev), vm_dev); dev_name(&vdev->dev), vm_dev);
if (err) if (err)
......
...@@ -43,5 +43,6 @@ ...@@ -43,5 +43,6 @@
#define VIRTIO_ID_INPUT 18 /* virtio input */ #define VIRTIO_ID_INPUT 18 /* virtio input */
#define VIRTIO_ID_VSOCK 19 /* virtio vsock transport */ #define VIRTIO_ID_VSOCK 19 /* virtio vsock transport */
#define VIRTIO_ID_CRYPTO 20 /* virtio crypto */ #define VIRTIO_ID_CRYPTO 20 /* virtio crypto */
#define VIRTIO_ID_IOMMU 23 /* virtio IOMMU */
#endif /* _LINUX_VIRTIO_IDS_H */ #endif /* _LINUX_VIRTIO_IDS_H */
/* SPDX-License-Identifier: BSD-3-Clause */
/*
* Virtio-iommu definition v0.9
*
* Copyright (C) 2018 Arm Ltd.
*/
#ifndef _UAPI_LINUX_VIRTIO_IOMMU_H
#define _UAPI_LINUX_VIRTIO_IOMMU_H
#include <linux/types.h>
/* Feature bits */
#define VIRTIO_IOMMU_F_INPUT_RANGE 0
#define VIRTIO_IOMMU_F_DOMAIN_BITS 1
#define VIRTIO_IOMMU_F_MAP_UNMAP 2
#define VIRTIO_IOMMU_F_BYPASS 3
#define VIRTIO_IOMMU_F_PROBE 4
struct virtio_iommu_range {
__u64 start;
__u64 end;
};
struct virtio_iommu_config {
/* Supported page sizes */
__u64 page_size_mask;
/* Supported IOVA range */
struct virtio_iommu_range input_range;
/* Max domain ID size */
__u8 domain_bits;
__u8 padding[3];
/* Probe buffer size */
__u32 probe_size;
};
/* Request types */
#define VIRTIO_IOMMU_T_ATTACH 0x01
#define VIRTIO_IOMMU_T_DETACH 0x02
#define VIRTIO_IOMMU_T_MAP 0x03
#define VIRTIO_IOMMU_T_UNMAP 0x04
#define VIRTIO_IOMMU_T_PROBE 0x05
/* Status types */
#define VIRTIO_IOMMU_S_OK 0x00
#define VIRTIO_IOMMU_S_IOERR 0x01
#define VIRTIO_IOMMU_S_UNSUPP 0x02
#define VIRTIO_IOMMU_S_DEVERR 0x03
#define VIRTIO_IOMMU_S_INVAL 0x04
#define VIRTIO_IOMMU_S_RANGE 0x05
#define VIRTIO_IOMMU_S_NOENT 0x06
#define VIRTIO_IOMMU_S_FAULT 0x07
struct virtio_iommu_req_head {
__u8 type;
__u8 reserved[3];
};
struct virtio_iommu_req_tail {
__u8 status;
__u8 reserved[3];
};
struct virtio_iommu_req_attach {
struct virtio_iommu_req_head head;
__le32 domain;
__le32 endpoint;
__u8 reserved[8];
struct virtio_iommu_req_tail tail;
};
struct virtio_iommu_req_detach {
struct virtio_iommu_req_head head;
__le32 domain;
__le32 endpoint;
__u8 reserved[8];
struct virtio_iommu_req_tail tail;
};
#define VIRTIO_IOMMU_MAP_F_READ (1 << 0)
#define VIRTIO_IOMMU_MAP_F_WRITE (1 << 1)
#define VIRTIO_IOMMU_MAP_F_EXEC (1 << 2)
#define VIRTIO_IOMMU_MAP_F_MMIO (1 << 3)
#define VIRTIO_IOMMU_MAP_F_MASK (VIRTIO_IOMMU_MAP_F_READ | \
VIRTIO_IOMMU_MAP_F_WRITE | \
VIRTIO_IOMMU_MAP_F_EXEC | \
VIRTIO_IOMMU_MAP_F_MMIO)
struct virtio_iommu_req_map {
struct virtio_iommu_req_head head;
__le32 domain;
__le64 virt_start;
__le64 virt_end;
__le64 phys_start;
__le32 flags;
struct virtio_iommu_req_tail tail;
};
struct virtio_iommu_req_unmap {
struct virtio_iommu_req_head head;
__le32 domain;
__le64 virt_start;
__le64 virt_end;
__u8 reserved[4];
struct virtio_iommu_req_tail tail;
};
#define VIRTIO_IOMMU_PROBE_T_NONE 0
#define VIRTIO_IOMMU_PROBE_T_RESV_MEM 1
#define VIRTIO_IOMMU_PROBE_T_MASK 0xfff
struct virtio_iommu_probe_property {
__le16 type;
__le16 length;
};
#define VIRTIO_IOMMU_RESV_MEM_T_RESERVED 0
#define VIRTIO_IOMMU_RESV_MEM_T_MSI 1
struct virtio_iommu_probe_resv_mem {
struct virtio_iommu_probe_property head;
__u8 subtype;
__u8 reserved[3];
__le64 start;
__le64 end;
};
struct virtio_iommu_req_probe {
struct virtio_iommu_req_head head;
__le32 endpoint;
__u8 reserved[64];
__u8 properties[];
/*
* Tail follows the variable-length properties array. No padding,
* property lengths are all aligned on 8 bytes.
*/
};
/* Fault types */
#define VIRTIO_IOMMU_FAULT_R_UNKNOWN 0
#define VIRTIO_IOMMU_FAULT_R_DOMAIN 1
#define VIRTIO_IOMMU_FAULT_R_MAPPING 2
#define VIRTIO_IOMMU_FAULT_F_READ (1 << 0)
#define VIRTIO_IOMMU_FAULT_F_WRITE (1 << 1)
#define VIRTIO_IOMMU_FAULT_F_EXEC (1 << 2)
#define VIRTIO_IOMMU_FAULT_F_ADDRESS (1 << 8)
struct virtio_iommu_fault {
__u8 reason;
__u8 reserved[3];
__le32 flags;
__le32 endpoint;
__u8 reserved2[4];
__le64 address;
};
#endif
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册