提交 4ef33685 编写于 作者: C Christoph Hellwig 提交者: Bjorn Helgaas

PCI: Spread interrupt vectors in pci_alloc_irq_vectors()

Set the affinity_mask in the PCI device before allocating vectors so that
the affinity can be propagated through the MSI descriptor structures to the
core IRQ code.  To facilitate this, new __pci_enable_msi_range() and
__pci_enable_msix_range() helpers are factored out of their not prefixed
variants which assigning the new IRQ affinity mask in the PCI device so
that the low-level interrupt code can perform the interrupt affinity
assignment and do node-local allocations.

A new PCI_IRQ_NOAFFINITY flag is added to pci_alloc_irq_vectors() so that
this function can also be used by drivers that don't wish to use the
automatic affinity assignment.

[bhelgaas: omit "else" after "return" consistently]
Signed-off-by: NChristoph Hellwig <hch@lst.de>
Signed-off-by: NBjorn Helgaas <bhelgaas@google.com>
Reviewed-by: NAlexander Gordeev <agordeev@redhat.com>
上级 aff17164
...@@ -99,6 +99,10 @@ PCI_IRQ_NOMSI and PCI_IRQ_NOMSIX flag in case a device claims to support ...@@ -99,6 +99,10 @@ PCI_IRQ_NOMSI and PCI_IRQ_NOMSIX flag in case a device claims to support
MSI or MSI-X, but the support is broken, or to pass PCI_IRQ_NOLEGACY in MSI or MSI-X, but the support is broken, or to pass PCI_IRQ_NOLEGACY in
case the device does not support legacy interrupt lines. case the device does not support legacy interrupt lines.
By default this function will spread the interrupts around the available
CPUs, but this feature can be disabled by passing the PCI_IRQ_NOAFFINITY
flag.
To get the Linux IRQ numbers passed to request_irq() and free_irq() and the To get the Linux IRQ numbers passed to request_irq() and free_irq() and the
vectors, use the following function: vectors, use the following function:
......
...@@ -569,6 +569,7 @@ static struct msi_desc *msi_setup_entry(struct pci_dev *dev, int nvec) ...@@ -569,6 +569,7 @@ static struct msi_desc *msi_setup_entry(struct pci_dev *dev, int nvec)
entry->msi_attrib.multi_cap = (control & PCI_MSI_FLAGS_QMASK) >> 1; entry->msi_attrib.multi_cap = (control & PCI_MSI_FLAGS_QMASK) >> 1;
entry->msi_attrib.multiple = ilog2(__roundup_pow_of_two(nvec)); entry->msi_attrib.multiple = ilog2(__roundup_pow_of_two(nvec));
entry->nvec_used = nvec; entry->nvec_used = nvec;
entry->affinity = dev->irq_affinity;
if (control & PCI_MSI_FLAGS_64BIT) if (control & PCI_MSI_FLAGS_64BIT)
entry->mask_pos = dev->msi_cap + PCI_MSI_MASK_64; entry->mask_pos = dev->msi_cap + PCI_MSI_MASK_64;
...@@ -680,10 +681,18 @@ static void __iomem *msix_map_region(struct pci_dev *dev, unsigned nr_entries) ...@@ -680,10 +681,18 @@ static void __iomem *msix_map_region(struct pci_dev *dev, unsigned nr_entries)
static int msix_setup_entries(struct pci_dev *dev, void __iomem *base, static int msix_setup_entries(struct pci_dev *dev, void __iomem *base,
struct msix_entry *entries, int nvec) struct msix_entry *entries, int nvec)
{ {
const struct cpumask *mask = NULL;
struct msi_desc *entry; struct msi_desc *entry;
int i; int cpu = -1, i;
for (i = 0; i < nvec; i++) { for (i = 0; i < nvec; i++) {
if (dev->irq_affinity) {
cpu = cpumask_next(cpu, dev->irq_affinity);
if (cpu >= nr_cpu_ids)
cpu = cpumask_first(dev->irq_affinity);
mask = cpumask_of(cpu);
}
entry = alloc_msi_entry(&dev->dev); entry = alloc_msi_entry(&dev->dev);
if (!entry) { if (!entry) {
if (!i) if (!i)
...@@ -703,6 +712,7 @@ static int msix_setup_entries(struct pci_dev *dev, void __iomem *base, ...@@ -703,6 +712,7 @@ static int msix_setup_entries(struct pci_dev *dev, void __iomem *base,
entry->msi_attrib.default_irq = dev->irq; entry->msi_attrib.default_irq = dev->irq;
entry->mask_base = base; entry->mask_base = base;
entry->nvec_used = 1; entry->nvec_used = 1;
entry->affinity = mask;
list_add_tail(&entry->list, dev_to_msi_list(&dev->dev)); list_add_tail(&entry->list, dev_to_msi_list(&dev->dev));
} }
...@@ -1028,19 +1038,8 @@ int pci_msi_enabled(void) ...@@ -1028,19 +1038,8 @@ int pci_msi_enabled(void)
} }
EXPORT_SYMBOL(pci_msi_enabled); EXPORT_SYMBOL(pci_msi_enabled);
/** static int __pci_enable_msi_range(struct pci_dev *dev, int minvec, int maxvec,
* pci_enable_msi_range - configure device's MSI capability structure unsigned int flags)
* @dev: device to configure
* @minvec: minimal number of interrupts to configure
* @maxvec: maximum number of interrupts to configure
*
* This function tries to allocate a maximum possible number of interrupts in a
* range between @minvec and @maxvec. It returns a negative errno if an error
* occurs. If it succeeds, it returns the actual number of interrupts allocated
* and updates the @dev's irq member to the lowest new interrupt number;
* the other interrupt numbers allocated to this device are consecutive.
**/
int pci_enable_msi_range(struct pci_dev *dev, int minvec, int maxvec)
{ {
int nvec; int nvec;
int rc; int rc;
...@@ -1063,25 +1062,85 @@ int pci_enable_msi_range(struct pci_dev *dev, int minvec, int maxvec) ...@@ -1063,25 +1062,85 @@ int pci_enable_msi_range(struct pci_dev *dev, int minvec, int maxvec)
nvec = pci_msi_vec_count(dev); nvec = pci_msi_vec_count(dev);
if (nvec < 0) if (nvec < 0)
return nvec; return nvec;
else if (nvec < minvec) if (nvec < minvec)
return -EINVAL; return -EINVAL;
else if (nvec > maxvec)
if (nvec > maxvec)
nvec = maxvec; nvec = maxvec;
do { for (;;) {
if (!(flags & PCI_IRQ_NOAFFINITY)) {
dev->irq_affinity = irq_create_affinity_mask(&nvec);
if (nvec < minvec)
return -ENOSPC;
}
rc = msi_capability_init(dev, nvec); rc = msi_capability_init(dev, nvec);
if (rc < 0) { if (rc == 0)
return nvec;
kfree(dev->irq_affinity);
dev->irq_affinity = NULL;
if (rc < 0)
return rc; return rc;
} else if (rc > 0) { if (rc < minvec)
if (rc < minvec) return -ENOSPC;
nvec = rc;
}
}
/**
* pci_enable_msi_range - configure device's MSI capability structure
* @dev: device to configure
* @minvec: minimal number of interrupts to configure
* @maxvec: maximum number of interrupts to configure
*
* This function tries to allocate a maximum possible number of interrupts in a
* range between @minvec and @maxvec. It returns a negative errno if an error
* occurs. If it succeeds, it returns the actual number of interrupts allocated
* and updates the @dev's irq member to the lowest new interrupt number;
* the other interrupt numbers allocated to this device are consecutive.
**/
int pci_enable_msi_range(struct pci_dev *dev, int minvec, int maxvec)
{
return __pci_enable_msi_range(dev, minvec, maxvec, PCI_IRQ_NOAFFINITY);
}
EXPORT_SYMBOL(pci_enable_msi_range);
static int __pci_enable_msix_range(struct pci_dev *dev,
struct msix_entry *entries, int minvec, int maxvec,
unsigned int flags)
{
int nvec = maxvec;
int rc;
if (maxvec < minvec)
return -ERANGE;
for (;;) {
if (!(flags & PCI_IRQ_NOAFFINITY)) {
dev->irq_affinity = irq_create_affinity_mask(&nvec);
if (nvec < minvec)
return -ENOSPC; return -ENOSPC;
nvec = rc;
} }
} while (rc);
return nvec; rc = pci_enable_msix(dev, entries, nvec);
if (rc == 0)
return nvec;
kfree(dev->irq_affinity);
dev->irq_affinity = NULL;
if (rc < 0)
return rc;
if (rc < minvec)
return -ENOSPC;
nvec = rc;
}
} }
EXPORT_SYMBOL(pci_enable_msi_range);
/** /**
* pci_enable_msix_range - configure device's MSI-X capability structure * pci_enable_msix_range - configure device's MSI-X capability structure
...@@ -1099,26 +1158,10 @@ EXPORT_SYMBOL(pci_enable_msi_range); ...@@ -1099,26 +1158,10 @@ EXPORT_SYMBOL(pci_enable_msi_range);
* with new allocated MSI-X interrupts. * with new allocated MSI-X interrupts.
**/ **/
int pci_enable_msix_range(struct pci_dev *dev, struct msix_entry *entries, int pci_enable_msix_range(struct pci_dev *dev, struct msix_entry *entries,
int minvec, int maxvec) int minvec, int maxvec)
{ {
int nvec = maxvec; return __pci_enable_msix_range(dev, entries, minvec, maxvec,
int rc; PCI_IRQ_NOAFFINITY);
if (maxvec < minvec)
return -ERANGE;
do {
rc = pci_enable_msix(dev, entries, nvec);
if (rc < 0) {
return rc;
} else if (rc > 0) {
if (rc < minvec)
return -ENOSPC;
nvec = rc;
}
} while (rc);
return nvec;
} }
EXPORT_SYMBOL(pci_enable_msix_range); EXPORT_SYMBOL(pci_enable_msix_range);
...@@ -1145,13 +1188,14 @@ int pci_alloc_irq_vectors(struct pci_dev *dev, unsigned int min_vecs, ...@@ -1145,13 +1188,14 @@ int pci_alloc_irq_vectors(struct pci_dev *dev, unsigned int min_vecs,
int vecs = -ENOSPC; int vecs = -ENOSPC;
if (!(flags & PCI_IRQ_NOMSIX)) { if (!(flags & PCI_IRQ_NOMSIX)) {
vecs = pci_enable_msix_range(dev, NULL, min_vecs, max_vecs); vecs = __pci_enable_msix_range(dev, NULL, min_vecs, max_vecs,
flags);
if (vecs > 0) if (vecs > 0)
return vecs; return vecs;
} }
if (!(flags & PCI_IRQ_NOMSI)) { if (!(flags & PCI_IRQ_NOMSI)) {
vecs = pci_enable_msi_range(dev, min_vecs, max_vecs); vecs = __pci_enable_msi_range(dev, min_vecs, max_vecs, flags);
if (vecs > 0) if (vecs > 0)
return vecs; return vecs;
} }
......
...@@ -320,6 +320,7 @@ struct pci_dev { ...@@ -320,6 +320,7 @@ struct pci_dev {
* directly, use the values stored here. They might be different! * directly, use the values stored here. They might be different!
*/ */
unsigned int irq; unsigned int irq;
struct cpumask *irq_affinity;
struct resource resource[DEVICE_COUNT_RESOURCE]; /* I/O and memory regions + expansion ROMs */ struct resource resource[DEVICE_COUNT_RESOURCE]; /* I/O and memory regions + expansion ROMs */
bool match_driver; /* Skip attaching driver */ bool match_driver; /* Skip attaching driver */
...@@ -1240,6 +1241,7 @@ int pci_set_vga_state(struct pci_dev *pdev, bool decode, ...@@ -1240,6 +1241,7 @@ int pci_set_vga_state(struct pci_dev *pdev, bool decode,
#define PCI_IRQ_NOLEGACY (1 << 0) /* don't use legacy interrupts */ #define PCI_IRQ_NOLEGACY (1 << 0) /* don't use legacy interrupts */
#define PCI_IRQ_NOMSI (1 << 1) /* don't use MSI interrupts */ #define PCI_IRQ_NOMSI (1 << 1) /* don't use MSI interrupts */
#define PCI_IRQ_NOMSIX (1 << 2) /* don't use MSI-X interrupts */ #define PCI_IRQ_NOMSIX (1 << 2) /* don't use MSI-X interrupts */
#define PCI_IRQ_NOAFFINITY (1 << 3) /* don't auto-assign affinity */
/* kmem_cache style wrapper around pci_alloc_consistent() */ /* kmem_cache style wrapper around pci_alloc_consistent() */
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册