提交 9f86262d 编写于 作者: L Linus Torvalds

Merge git://git.infradead.org/intel-iommu

Pull intel iommu updates from David Woodhouse:
 "This lays a little of the groundwork for upcoming Shared Virtual
  Memory support — fixing some bogus #defines for capability bits and
  adding the new ones, and starting to use the new wider page tables
  where we can, in anticipation of actually filling in the new fields
  therein.

  It also allows graphics devices to be assigned to VM guests again.
  This got broken in 3.17 by disallowing assignment of RMRR-afflicted
  devices.  Like USB, we do understand why there's an RMRR for graphics
  devices — and unlike USB, it's actually sane.  So we can make an
  exception for graphics devices, just as we do USB controllers.

  Finally, tone down the warning about the X2APIC_OPT_OUT bit, due to
  persistent requests.  X2APIC_OPT_OUT was added to the spec as a nasty
  hack to allow broken BIOSes to forbid us from using X2APIC when they
  do stupid and invasive things and would break if we did.

  Someone noticed that since Windows doesn't have full IOMMU support for
  DMA protection, setting the X2APIC_OPT_OUT bit made Windows avoid
  initialising the IOMMU on the graphics unit altogether.

  This means that it would be available for use in "driver mode", where
  the IOMMU registers are made available through a BAR of the graphics
  device and the graphics driver can do SVM all for itself.

  So they started setting the X2APIC_OPT_OUT bit on *all* platforms with
  SVM capabilities.  And even the platforms which *might*, if the
  planets had been aligned correctly, possibly have had SVM capability
  but which in practice actually don't"

* git://git.infradead.org/intel-iommu:
  iommu/vt-d: support extended root and context entries
  iommu/vt-d: Add new extended capabilities from v2.3 VT-d specification
  iommu/vt-d: Allow RMRR on graphics devices too
  iommu/vt-d: Print x2apic opt out info instead of printing a warning
  iommu/vt-d: kill bogus ecap_niotlb_iunits()
...@@ -50,6 +50,7 @@ ...@@ -50,6 +50,7 @@
#define CONTEXT_SIZE VTD_PAGE_SIZE #define CONTEXT_SIZE VTD_PAGE_SIZE
#define IS_GFX_DEVICE(pdev) ((pdev->class >> 16) == PCI_BASE_CLASS_DISPLAY) #define IS_GFX_DEVICE(pdev) ((pdev->class >> 16) == PCI_BASE_CLASS_DISPLAY)
#define IS_USB_DEVICE(pdev) ((pdev->class >> 8) == PCI_CLASS_SERIAL_USB)
#define IS_ISA_DEVICE(pdev) ((pdev->class >> 8) == PCI_CLASS_BRIDGE_ISA) #define IS_ISA_DEVICE(pdev) ((pdev->class >> 8) == PCI_CLASS_BRIDGE_ISA)
#define IS_AZALIA(pdev) ((pdev)->vendor == 0x8086 && (pdev)->device == 0x3a3e) #define IS_AZALIA(pdev) ((pdev)->vendor == 0x8086 && (pdev)->device == 0x3a3e)
...@@ -184,32 +185,11 @@ static int force_on = 0; ...@@ -184,32 +185,11 @@ static int force_on = 0;
* 64-127: Reserved * 64-127: Reserved
*/ */
struct root_entry { struct root_entry {
u64 val; u64 lo;
u64 rsvd1; u64 hi;
}; };
#define ROOT_ENTRY_NR (VTD_PAGE_SIZE/sizeof(struct root_entry)) #define ROOT_ENTRY_NR (VTD_PAGE_SIZE/sizeof(struct root_entry))
static inline bool root_present(struct root_entry *root)
{
return (root->val & 1);
}
static inline void set_root_present(struct root_entry *root)
{
root->val |= 1;
}
static inline void set_root_value(struct root_entry *root, unsigned long value)
{
root->val &= ~VTD_PAGE_MASK;
root->val |= value & VTD_PAGE_MASK;
}
static inline struct context_entry *
get_context_addr_from_root(struct root_entry *root)
{
return (struct context_entry *)
(root_present(root)?phys_to_virt(
root->val & VTD_PAGE_MASK) :
NULL);
}
/* /*
* low 64 bits: * low 64 bits:
...@@ -682,6 +662,40 @@ static void domain_update_iommu_cap(struct dmar_domain *domain) ...@@ -682,6 +662,40 @@ static void domain_update_iommu_cap(struct dmar_domain *domain)
domain->iommu_superpage = domain_update_iommu_superpage(NULL); domain->iommu_superpage = domain_update_iommu_superpage(NULL);
} }
static inline struct context_entry *iommu_context_addr(struct intel_iommu *iommu,
u8 bus, u8 devfn, int alloc)
{
struct root_entry *root = &iommu->root_entry[bus];
struct context_entry *context;
u64 *entry;
if (ecap_ecs(iommu->ecap)) {
if (devfn >= 0x80) {
devfn -= 0x80;
entry = &root->hi;
}
devfn *= 2;
}
entry = &root->lo;
if (*entry & 1)
context = phys_to_virt(*entry & VTD_PAGE_MASK);
else {
unsigned long phy_addr;
if (!alloc)
return NULL;
context = alloc_pgtable_page(iommu->node);
if (!context)
return NULL;
__iommu_flush_cache(iommu, (void *)context, CONTEXT_SIZE);
phy_addr = virt_to_phys((void *)context);
*entry = phy_addr | 1;
__iommu_flush_cache(iommu, entry, sizeof(*entry));
}
return &context[devfn];
}
static struct intel_iommu *device_to_iommu(struct device *dev, u8 *bus, u8 *devfn) static struct intel_iommu *device_to_iommu(struct device *dev, u8 *bus, u8 *devfn)
{ {
struct dmar_drhd_unit *drhd = NULL; struct dmar_drhd_unit *drhd = NULL;
...@@ -741,75 +755,36 @@ static void domain_flush_cache(struct dmar_domain *domain, ...@@ -741,75 +755,36 @@ static void domain_flush_cache(struct dmar_domain *domain,
clflush_cache_range(addr, size); clflush_cache_range(addr, size);
} }
/* Gets context entry for a given bus and devfn */
static struct context_entry * device_to_context_entry(struct intel_iommu *iommu,
u8 bus, u8 devfn)
{
struct root_entry *root;
struct context_entry *context;
unsigned long phy_addr;
unsigned long flags;
spin_lock_irqsave(&iommu->lock, flags);
root = &iommu->root_entry[bus];
context = get_context_addr_from_root(root);
if (!context) {
context = (struct context_entry *)
alloc_pgtable_page(iommu->node);
if (!context) {
spin_unlock_irqrestore(&iommu->lock, flags);
return NULL;
}
__iommu_flush_cache(iommu, (void *)context, CONTEXT_SIZE);
phy_addr = virt_to_phys((void *)context);
set_root_value(root, phy_addr);
set_root_present(root);
__iommu_flush_cache(iommu, root, sizeof(*root));
}
spin_unlock_irqrestore(&iommu->lock, flags);
return &context[devfn];
}
static int device_context_mapped(struct intel_iommu *iommu, u8 bus, u8 devfn) static int device_context_mapped(struct intel_iommu *iommu, u8 bus, u8 devfn)
{ {
struct root_entry *root;
struct context_entry *context; struct context_entry *context;
int ret; int ret = 0;
unsigned long flags; unsigned long flags;
spin_lock_irqsave(&iommu->lock, flags); spin_lock_irqsave(&iommu->lock, flags);
root = &iommu->root_entry[bus]; context = iommu_context_addr(iommu, bus, devfn, 0);
context = get_context_addr_from_root(root); if (context)
if (!context) { ret = context_present(context);
ret = 0;
goto out;
}
ret = context_present(&context[devfn]);
out:
spin_unlock_irqrestore(&iommu->lock, flags); spin_unlock_irqrestore(&iommu->lock, flags);
return ret; return ret;
} }
static void clear_context_table(struct intel_iommu *iommu, u8 bus, u8 devfn) static void clear_context_table(struct intel_iommu *iommu, u8 bus, u8 devfn)
{ {
struct root_entry *root;
struct context_entry *context; struct context_entry *context;
unsigned long flags; unsigned long flags;
spin_lock_irqsave(&iommu->lock, flags); spin_lock_irqsave(&iommu->lock, flags);
root = &iommu->root_entry[bus]; context = iommu_context_addr(iommu, bus, devfn, 0);
context = get_context_addr_from_root(root);
if (context) { if (context) {
context_clear_entry(&context[devfn]); context_clear_entry(context);
__iommu_flush_cache(iommu, &context[devfn], \ __iommu_flush_cache(iommu, context, sizeof(*context));
sizeof(*context));
} }
spin_unlock_irqrestore(&iommu->lock, flags); spin_unlock_irqrestore(&iommu->lock, flags);
} }
static void free_context_table(struct intel_iommu *iommu) static void free_context_table(struct intel_iommu *iommu)
{ {
struct root_entry *root;
int i; int i;
unsigned long flags; unsigned long flags;
struct context_entry *context; struct context_entry *context;
...@@ -819,10 +794,17 @@ static void free_context_table(struct intel_iommu *iommu) ...@@ -819,10 +794,17 @@ static void free_context_table(struct intel_iommu *iommu)
goto out; goto out;
} }
for (i = 0; i < ROOT_ENTRY_NR; i++) { for (i = 0; i < ROOT_ENTRY_NR; i++) {
root = &iommu->root_entry[i]; context = iommu_context_addr(iommu, i, 0, 0);
context = get_context_addr_from_root(root); if (context)
free_pgtable_page(context);
if (!ecap_ecs(iommu->ecap))
continue;
context = iommu_context_addr(iommu, i, 0x80, 0);
if (context) if (context)
free_pgtable_page(context); free_pgtable_page(context);
} }
free_pgtable_page(iommu->root_entry); free_pgtable_page(iommu->root_entry);
iommu->root_entry = NULL; iommu->root_entry = NULL;
...@@ -1146,14 +1128,16 @@ static int iommu_alloc_root_entry(struct intel_iommu *iommu) ...@@ -1146,14 +1128,16 @@ static int iommu_alloc_root_entry(struct intel_iommu *iommu)
static void iommu_set_root_entry(struct intel_iommu *iommu) static void iommu_set_root_entry(struct intel_iommu *iommu)
{ {
void *addr; u64 addr;
u32 sts; u32 sts;
unsigned long flag; unsigned long flag;
addr = iommu->root_entry; addr = virt_to_phys(iommu->root_entry);
if (ecap_ecs(iommu->ecap))
addr |= DMA_RTADDR_RTT;
raw_spin_lock_irqsave(&iommu->register_lock, flag); raw_spin_lock_irqsave(&iommu->register_lock, flag);
dmar_writeq(iommu->reg + DMAR_RTADDR_REG, virt_to_phys(addr)); dmar_writeq(iommu->reg + DMAR_RTADDR_REG, addr);
writel(iommu->gcmd | DMA_GCMD_SRTP, iommu->reg + DMAR_GCMD_REG); writel(iommu->gcmd | DMA_GCMD_SRTP, iommu->reg + DMAR_GCMD_REG);
...@@ -1800,7 +1784,9 @@ static int domain_context_mapping_one(struct dmar_domain *domain, ...@@ -1800,7 +1784,9 @@ static int domain_context_mapping_one(struct dmar_domain *domain,
BUG_ON(translation != CONTEXT_TT_PASS_THROUGH && BUG_ON(translation != CONTEXT_TT_PASS_THROUGH &&
translation != CONTEXT_TT_MULTI_LEVEL); translation != CONTEXT_TT_MULTI_LEVEL);
context = device_to_context_entry(iommu, bus, devfn); spin_lock_irqsave(&iommu->lock, flags);
context = iommu_context_addr(iommu, bus, devfn, 1);
spin_unlock_irqrestore(&iommu->lock, flags);
if (!context) if (!context)
return -ENOMEM; return -ENOMEM;
spin_lock_irqsave(&iommu->lock, flags); spin_lock_irqsave(&iommu->lock, flags);
...@@ -2564,6 +2550,10 @@ static bool device_has_rmrr(struct device *dev) ...@@ -2564,6 +2550,10 @@ static bool device_has_rmrr(struct device *dev)
* In both cases we assume that PCI USB devices with RMRRs have them largely * In both cases we assume that PCI USB devices with RMRRs have them largely
* for historical reasons and that the RMRR space is not actively used post * for historical reasons and that the RMRR space is not actively used post
* boot. This exclusion may change if vendors begin to abuse it. * boot. This exclusion may change if vendors begin to abuse it.
*
* The same exception is made for graphics devices, with the requirement that
* any use of the RMRR regions will be torn down before assigning the device
* to a guest.
*/ */
static bool device_is_rmrr_locked(struct device *dev) static bool device_is_rmrr_locked(struct device *dev)
{ {
...@@ -2573,7 +2563,7 @@ static bool device_is_rmrr_locked(struct device *dev) ...@@ -2573,7 +2563,7 @@ static bool device_is_rmrr_locked(struct device *dev)
if (dev_is_pci(dev)) { if (dev_is_pci(dev)) {
struct pci_dev *pdev = to_pci_dev(dev); struct pci_dev *pdev = to_pci_dev(dev);
if ((pdev->class >> 8) == PCI_CLASS_SERIAL_USB) if (IS_USB_DEVICE(pdev) || IS_GFX_DEVICE(pdev))
return false; return false;
} }
......
...@@ -637,10 +637,7 @@ static int __init intel_enable_irq_remapping(void) ...@@ -637,10 +637,7 @@ static int __init intel_enable_irq_remapping(void)
if (x2apic_supported()) { if (x2apic_supported()) {
eim = !dmar_x2apic_optout(); eim = !dmar_x2apic_optout();
if (!eim) if (!eim)
printk(KERN_WARNING pr_info("x2apic is disabled because BIOS sets x2apic opt out bit. You can use 'intremap=no_x2apic_optout' to override the BIOS setting.\n");
"Your BIOS is broken and requested that x2apic be disabled.\n"
"This will slightly decrease performance.\n"
"Use 'intremap=no_x2apic_optout' to override BIOS request.\n");
} }
for_each_iommu(iommu, drhd) { for_each_iommu(iommu, drhd) {
......
...@@ -115,10 +115,19 @@ static inline void dmar_writeq(void __iomem *addr, u64 val) ...@@ -115,10 +115,19 @@ static inline void dmar_writeq(void __iomem *addr, u64 val)
* Extended Capability Register * Extended Capability Register
*/ */
#define ecap_niotlb_iunits(e) ((((e) >> 24) & 0xff) + 1) #define ecap_pss(e) ((e >> 35) & 0x1f)
#define ecap_eafs(e) ((e >> 34) & 0x1)
#define ecap_nwfs(e) ((e >> 33) & 0x1)
#define ecap_srs(e) ((e >> 31) & 0x1)
#define ecap_ers(e) ((e >> 30) & 0x1)
#define ecap_prs(e) ((e >> 29) & 0x1)
#define ecap_pasid(e) ((e >> 28) & 0x1)
#define ecap_dis(e) ((e >> 27) & 0x1)
#define ecap_nest(e) ((e >> 26) & 0x1)
#define ecap_mts(e) ((e >> 25) & 0x1)
#define ecap_ecs(e) ((e >> 24) & 0x1)
#define ecap_iotlb_offset(e) ((((e) >> 8) & 0x3ff) * 16) #define ecap_iotlb_offset(e) ((((e) >> 8) & 0x3ff) * 16)
#define ecap_max_iotlb_offset(e) \ #define ecap_max_iotlb_offset(e) (ecap_iotlb_offset(e) + 16)
(ecap_iotlb_offset(e) + ecap_niotlb_iunits(e) * 16)
#define ecap_coherent(e) ((e) & 0x1) #define ecap_coherent(e) ((e) & 0x1)
#define ecap_qis(e) ((e) & 0x2) #define ecap_qis(e) ((e) & 0x2)
#define ecap_pass_through(e) ((e >> 6) & 0x1) #define ecap_pass_through(e) ((e >> 6) & 0x1)
...@@ -180,6 +189,9 @@ static inline void dmar_writeq(void __iomem *addr, u64 val) ...@@ -180,6 +189,9 @@ static inline void dmar_writeq(void __iomem *addr, u64 val)
#define DMA_GSTS_IRES (((u32)1) << 25) #define DMA_GSTS_IRES (((u32)1) << 25)
#define DMA_GSTS_CFIS (((u32)1) << 23) #define DMA_GSTS_CFIS (((u32)1) << 23)
/* DMA_RTADDR_REG */
#define DMA_RTADDR_RTT (((u64)1) << 11)
/* CCMD_REG */ /* CCMD_REG */
#define DMA_CCMD_ICC (((u64)1) << 63) #define DMA_CCMD_ICC (((u64)1) << 63)
#define DMA_CCMD_GLOBAL_INVL (((u64)1) << 61) #define DMA_CCMD_GLOBAL_INVL (((u64)1) << 61)
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册