diff --git a/src/qemu/qemu_domain_address.c b/src/qemu/qemu_domain_address.c index b247c851a3b1835d438dd6aa5f196c4bce23e764..d943c8bcaa3a9e7af6242d89c3019bb816b9cce0 100644 --- a/src/qemu/qemu_domain_address.c +++ b/src/qemu/qemu_domain_address.c @@ -25,6 +25,7 @@ #include "qemu_domain_address.h" #include "qemu_domain.h" +#include "network/bridge_driver.h" #include "viralloc.h" #include "virerror.h" #include "virlog.h" @@ -905,6 +906,243 @@ qemuDomainFillAllPCIConnectFlags(virDomainDefPtr def, } +/** + * qemuDomainFindUnusedIsolationGroupIter: + * @def: domain definition + * @dev: device definition + * @info: device information + * @opaque: user data + * + * Used to implement qemuDomainFindUnusedIsolationGroup(). You probably + * don't want to call this directly. + * + * Return: 0 if the isolation group is not used by the device, <1 otherwise. + */ +static int +qemuDomainFindUnusedIsolationGroupIter(virDomainDefPtr def ATTRIBUTE_UNUSED, + virDomainDeviceDefPtr dev ATTRIBUTE_UNUSED, + virDomainDeviceInfoPtr info, + void *opaque) +{ + unsigned int *isolationGroup = opaque; + + if (info->isolationGroup == *isolationGroup) + return -1; + + return 0; +} + + +/** + * qemuDomainFindUnusedIsolationGroup: + * @def: domain definition + * + * Find an isolation group that is not used by any device in @def yet. + * + * Normally, we'd look up the device's IOMMU group and base its isolation + * group on that; however, when a network interface uses a network backed + * by SR-IOV Virtual Functions, we can't know at PCI address assignment + * time which host device will be used so we can't look up its IOMMU group. + * + * We still want such a device to be isolated: this function can be used + * to obtain a synthetic isolation group usable for the purpose. + * + * Return: unused isolation group + */ +static unsigned int +qemuDomainFindUnusedIsolationGroup(virDomainDefPtr def) +{ + unsigned int isolationGroup = UINT_MAX; + + /* We start from the highest possible isolation group and work our + * way backwards so that we're working in a completely different range + * from IOMMU groups, thus avoiding clashes. We're realistically going + * to call this function just a few times per guest anyway */ + while (isolationGroup > 0 && + virDomainDeviceInfoIterate(def, + qemuDomainFindUnusedIsolationGroupIter, + &isolationGroup) < 0) { + isolationGroup--; + } + + return isolationGroup; +} + + +/** + * qemuDomainFillDeviceIsolationGroup: + * @def: domain definition + * @dev: device definition + * + * Fill isolation group information for a single device. + * + * Return: 0 on success, <0 on failure + * */ +int +qemuDomainFillDeviceIsolationGroup(virDomainDefPtr def, + virDomainDeviceDefPtr dev) +{ + int ret = -1; + + /* Only host devices need their isolation group to be different from + * the default. Interfaces of type hostdev are just host devices in + * disguise, but we don't need to handle them separately because for + * each such interface a corresponding hostdev is also added to the + * guest configuration */ + if (dev->type == VIR_DOMAIN_DEVICE_HOSTDEV) { + virDomainHostdevDefPtr hostdev = dev->data.hostdev; + virDomainDeviceInfoPtr info = hostdev->info; + virPCIDeviceAddressPtr hostAddr; + int tmp; + + /* Only PCI host devices are subject to isolation */ + if (hostdev->mode != VIR_DOMAIN_HOSTDEV_MODE_SUBSYS || + hostdev->source.subsys.type != VIR_DOMAIN_HOSTDEV_SUBSYS_TYPE_PCI) { + goto skip; + } + + hostAddr = &hostdev->source.subsys.u.pci.addr; + + /* If a non-default isolation has already been assigned to the + * device, we can avoid looking up the information again */ + if (info->isolationGroup > 0) + goto skip; + + /* The isolation group depends on the IOMMU group assigned by the host */ + tmp = virPCIDeviceAddressGetIOMMUGroupNum(hostAddr); + + if (tmp < 0) { + VIR_WARN("Can't look up isolation group for host device " + "%04x:%02x:%02x.%x", + hostAddr->domain, hostAddr->bus, + hostAddr->slot, hostAddr->function); + goto cleanup; + } + + /* The isolation group for a host device is its IOMMU group, + * increased by one: this is because zero is a valid IOMMU group but + * that's also the default isolation group, which we want to save + * for emulated devices. Shifting isolation groups for host devices + * by one ensures there is no overlap */ + info->isolationGroup = tmp + 1; + + VIR_DEBUG("Isolation group for host device %04x:%02x:%02x.%x is %u", + hostAddr->domain, hostAddr->bus, + hostAddr->slot, hostAddr->function, + info->isolationGroup); + + } else if (dev->type == VIR_DOMAIN_DEVICE_NET) { + virDomainNetDefPtr iface = dev->data.net; + virDomainDeviceInfoPtr info = &iface->info; + unsigned int tmp; + + /* Network interfaces can ultimately result in the guest being + * assigned a host device if the libvirt network they're connected + * to is of type hostdev. All other kinds of network interfaces don't + * require us to isolate the guest device, so we can skip them */ + if (iface->type != VIR_DOMAIN_NET_TYPE_NETWORK || + networkGetActualType(iface) != VIR_DOMAIN_NET_TYPE_HOSTDEV) { + goto skip; + } + + /* If a non-default isolation has already been assigned to the + * device, we can avoid looking up the information again */ + if (info->isolationGroup > 0) + goto skip; + + /* Obtain a synthetic isolation group for the device, since at this + * point in time we don't have access to the IOMMU group of the host + * device that will eventually be used by the guest */ + tmp = qemuDomainFindUnusedIsolationGroup(def); + + if (tmp == 0) { + VIR_WARN("Can't obtain usable isolation group for interface " + "configured to use hostdev-backed network '%s'", + iface->data.network.name); + goto cleanup; + } + + info->isolationGroup = tmp; + + VIR_DEBUG("Isolation group for interface configured to use " + "hostdev-backed network '%s' is %u", + iface->data.network.name, info->isolationGroup); + } + + skip: + ret = 0; + + cleanup: + return ret; +} + + +/** + * qemuDomainFillDeviceIsolationGroupIter: + * @def: domain definition + * @dev: device definition + * @info: device information + * @opaque: user data + * + * A version of qemuDomainFillDeviceIsolationGroup() to be used + * with virDomainDeviceInfoIterate() + * + * Return: 0 on success, <0 on failure + */ +static int +qemuDomainFillDeviceIsolationGroupIter(virDomainDefPtr def, + virDomainDeviceDefPtr dev, + virDomainDeviceInfoPtr info ATTRIBUTE_UNUSED, + void *opaque ATTRIBUTE_UNUSED) +{ + return qemuDomainFillDeviceIsolationGroup(def, dev); +} + + +/** + * qemuDomainSetupIsolationGroups: + * @def: domain definition + * + * High-level function to set up isolation groups for all devices + * and controllers in @def. Isolation groups will only be set up if + * the guest architecture and machine type require it, so this + * function can and should be called unconditionally before attempting + * to assign any PCI address. + * + * Return: 0 on success, <0 on failure + */ +static int +qemuDomainSetupIsolationGroups(virDomainDefPtr def) +{ + int idx; + int ret = -1; + + /* Only pSeries guests care about isolation groups at the moment */ + if (!qemuDomainIsPSeries(def)) + return 0; + + idx = virDomainControllerFind(def, VIR_DOMAIN_CONTROLLER_TYPE_PCI, 0); + if (idx < 0) + goto cleanup; + + /* We want to prevent hostdevs from being plugged into the default PHB: + * we can make sure that doesn't happen by locking its isolation group */ + def->controllers[idx]->info.isolationGroupLocked = true; + + /* Fill in isolation groups for all other devices */ + if (virDomainDeviceInfoIterate(def, + qemuDomainFillDeviceIsolationGroupIter, + NULL) < 0) { + goto cleanup; + } + + ret = 0; + + cleanup: + return ret; +} + + /** * qemuDomainFillDevicePCIConnectFlags: * @@ -2054,6 +2292,9 @@ qemuDomainAssignPCIAddresses(virDomainDefPtr def, if (qemuDomainFillAllPCIConnectFlags(def, qemuCaps, driver) < 0) goto cleanup; + if (qemuDomainSetupIsolationGroups(def) < 0) + goto cleanup; + if (nbuses > 0) { /* 1st pass to figure out how many PCI bridges we need */ if (!(addrs = qemuDomainPCIAddressSetCreate(def, nbuses, true))) diff --git a/src/qemu/qemu_domain_address.h b/src/qemu/qemu_domain_address.h index 067f4e799703e9a06eed9bce73c526ebf0b8f736..b5644fa9c240d8f93511947a7012cde16f1bb541 100644 --- a/src/qemu/qemu_domain_address.h +++ b/src/qemu/qemu_domain_address.h @@ -44,6 +44,10 @@ int qemuDomainEnsurePCIAddress(virDomainObjPtr obj, virQEMUDriverPtr driver) ATTRIBUTE_NONNULL(1) ATTRIBUTE_NONNULL(2) ATTRIBUTE_NONNULL(3); +int qemuDomainFillDeviceIsolationGroup(virDomainDefPtr def, + virDomainDeviceDefPtr dev) + ATTRIBUTE_NONNULL(1) ATTRIBUTE_NONNULL(2); + void qemuDomainReleaseDeviceAddress(virDomainObjPtr vm, virDomainDeviceInfoPtr info, const char *devstr); diff --git a/src/qemu/qemu_hotplug.c b/src/qemu/qemu_hotplug.c index 4bc49720b552762c7f67e3c28148b884c8487e77..da5aafaab346f5114c16ef07e80fba56941d2e50 100644 --- a/src/qemu/qemu_hotplug.c +++ b/src/qemu/qemu_hotplug.c @@ -1476,6 +1476,13 @@ qemuDomainAttachHostPCIDevice(virQEMUDriverPtr driver, if (qemuAssignDeviceHostdevAlias(vm->def, &info->alias, -1) < 0) goto error; + + if (qemuDomainIsPSeries(vm->def)) { + /* Isolation groups are only relevant for pSeries guests */ + if (qemuDomainFillDeviceIsolationGroup(vm->def, &dev) < 0) + goto error; + } + if (qemuDomainEnsurePCIAddress(vm, &dev, driver) < 0) goto error; releaseaddr = true; diff --git a/tests/qemumemlocktest.c b/tests/qemumemlocktest.c index c0f1dc31934dd94231f6fe4039255461e155b508..268563df4f567918922baec2c7fcab81668bfc78 100644 --- a/tests/qemumemlocktest.c +++ b/tests/qemumemlocktest.c @@ -131,7 +131,7 @@ mymain(void) DO_TEST("pseries-hardlimit", 2147483648); DO_TEST("pseries-locked", VIR_DOMAIN_MEMORY_PARAM_UNLIMITED); - DO_TEST("pseries-hostdev", 2168455168); + DO_TEST("pseries-hostdev", 4320133120); DO_TEST("pseries-hardlimit+locked", 2147483648); DO_TEST("pseries-hardlimit+hostdev", 2147483648); diff --git a/tests/qemuxml2argvdata/qemuxml2argv-pseries-hostdevs-1.args b/tests/qemuxml2argvdata/qemuxml2argv-pseries-hostdevs-1.args index 051ffdeb3ea5a06dadddfbacf219696877c945d8..8a4a4c5a63fb6b56afcf48f6a032af4fa98a5d54 100644 --- a/tests/qemuxml2argvdata/qemuxml2argv-pseries-hostdevs-1.args +++ b/tests/qemuxml2argvdata/qemuxml2argv-pseries-hostdevs-1.args @@ -18,6 +18,8 @@ QEMU_AUDIO_DRV=none \ server,nowait \ -mon chardev=charmonitor,id=monitor,mode=readline \ -boot c \ --device vfio-pci,host=0005:90:01.0,id=hostdev0,bus=pci.0,addr=0x1 \ --device vfio-pci,host=0001:01:00.0,id=hostdev1,bus=pci.0,addr=0x2 \ --device vfio-pci,host=0001:01:00.1,id=hostdev2,bus=pci.0,addr=0x3 +-device spapr-pci-host-bridge,index=1,id=pci.1 \ +-device spapr-pci-host-bridge,index=2,id=pci.2 \ +-device vfio-pci,host=0005:90:01.0,id=hostdev0,bus=pci.1.0,addr=0x1 \ +-device vfio-pci,host=0001:01:00.0,id=hostdev1,bus=pci.2.0,addr=0x1 \ +-device vfio-pci,host=0001:01:00.1,id=hostdev2,bus=pci.2.0,addr=0x2 diff --git a/tests/qemuxml2argvdata/qemuxml2argv-pseries-hostdevs-2.args b/tests/qemuxml2argvdata/qemuxml2argv-pseries-hostdevs-2.args index 83d4306036dc18e2f38ac74b2ec5faaf56c39d3d..cd5b66404ea15f486edb2ca7d00635c03f2b67a0 100644 --- a/tests/qemuxml2argvdata/qemuxml2argv-pseries-hostdevs-2.args +++ b/tests/qemuxml2argvdata/qemuxml2argv-pseries-hostdevs-2.args @@ -19,6 +19,7 @@ server,nowait \ -mon chardev=charmonitor,id=monitor,mode=readline \ -boot c \ -device spapr-pci-host-bridge,index=1,id=pci.1 \ +-device spapr-pci-host-bridge,index=2,id=pci.2 \ -device virtio-scsi-pci,id=scsi0,bus=pci.1.0,addr=0x1 \ -device vfio-pci,host=0001:01:00.0,id=hostdev0,bus=pci.1.0,addr=0x2 \ --device vfio-pci,host=0005:90:01.0,id=hostdev1,bus=pci.0,addr=0x1 +-device vfio-pci,host=0005:90:01.0,id=hostdev1,bus=pci.2.0,addr=0x1 diff --git a/tests/qemuxml2argvdata/qemuxml2argv-pseries-hostdevs-3.args b/tests/qemuxml2argvdata/qemuxml2argv-pseries-hostdevs-3.args index eda6cc73ace797e6518c73b3d40a39649bbb6e08..66a31ba1a86a928acd450717bf49ea6c721e398d 100644 --- a/tests/qemuxml2argvdata/qemuxml2argv-pseries-hostdevs-3.args +++ b/tests/qemuxml2argvdata/qemuxml2argv-pseries-hostdevs-3.args @@ -21,4 +21,4 @@ server,nowait \ -device spapr-pci-host-bridge,index=1,id=pci.1 \ -device spapr-pci-host-bridge,index=2,id=pci.2 \ -device vfio-pci,host=0001:01:00.0,id=hostdev0,bus=pci.2.0,addr=0x1 \ --device vfio-pci,host=0001:01:00.1,id=hostdev1,bus=pci.0,addr=0x1 +-device vfio-pci,host=0001:01:00.1,id=hostdev1,bus=pci.2.0,addr=0x2 diff --git a/tests/qemuxml2xmloutdata/qemuxml2xmlout-pseries-hostdevs-1.xml b/tests/qemuxml2xmloutdata/qemuxml2xmlout-pseries-hostdevs-1.xml index fa9e4daca536180f3189b305bb5993a61f3dcd68..e77a060a38a7e51a1b91c054d16706b6d85144d0 100644 --- a/tests/qemuxml2xmloutdata/qemuxml2xmlout-pseries-hostdevs-1.xml +++ b/tests/qemuxml2xmloutdata/qemuxml2xmlout-pseries-hostdevs-1.xml @@ -19,27 +19,35 @@ + + + + + + + +
-
+
-
+
-
+
diff --git a/tests/qemuxml2xmloutdata/qemuxml2xmlout-pseries-hostdevs-2.xml b/tests/qemuxml2xmloutdata/qemuxml2xmlout-pseries-hostdevs-2.xml index 17ff4c85378d37e8f8453b4c76b4f5bdc6724afc..cfa395b001f02e0224c52cf8b4c62734b1329526 100644 --- a/tests/qemuxml2xmloutdata/qemuxml2xmlout-pseries-hostdevs-2.xml +++ b/tests/qemuxml2xmloutdata/qemuxml2xmlout-pseries-hostdevs-2.xml @@ -26,6 +26,10 @@ + + + + @@ -38,7 +42,7 @@
-
+
diff --git a/tests/qemuxml2xmloutdata/qemuxml2xmlout-pseries-hostdevs-3.xml b/tests/qemuxml2xmloutdata/qemuxml2xmlout-pseries-hostdevs-3.xml index 58023ecd722a4b95bfd522eeb063c367978af1ed..f91959b805892d17a741fccb7fd6465d3dad121f 100644 --- a/tests/qemuxml2xmloutdata/qemuxml2xmlout-pseries-hostdevs-3.xml +++ b/tests/qemuxml2xmloutdata/qemuxml2xmlout-pseries-hostdevs-3.xml @@ -39,7 +39,7 @@
-
+