diff --git a/Documentation/devicetree/bindings/iommu/arm,smmu-v3.txt b/Documentation/devicetree/bindings/iommu/arm,smmu-v3.txt index c03eec1168721bdb1c851f7c42eba34ec48f4ba7..3443e0f838dfc8a53e548527a05cf9892f2c2a92 100644 --- a/Documentation/devicetree/bindings/iommu/arm,smmu-v3.txt +++ b/Documentation/devicetree/bindings/iommu/arm,smmu-v3.txt @@ -35,3 +35,6 @@ the PCIe specification. NOTE: this only applies to the SMMU itself, not masters connected upstream of the SMMU. + +- hisilicon,broken-prefetch-cmd + : Avoid sending CMD_PREFETCH_* commands to the SMMU. diff --git a/drivers/iommu/arm-smmu-v3.c b/drivers/iommu/arm-smmu-v3.c index 8e9ec81ce4bbd85473d6d6a35e7c3567569187ee..da902baaa7946aac569b7ebe8a316c647dfd8187 100644 --- a/drivers/iommu/arm-smmu-v3.c +++ b/drivers/iommu/arm-smmu-v3.c @@ -199,9 +199,10 @@ * Stream table. * * Linear: Enough to cover 1 << IDR1.SIDSIZE entries - * 2lvl: 8k L1 entries, 256 lazy entries per table (each table covers a PCI bus) + * 2lvl: 128k L1 entries, + * 256 lazy entries per table (each table covers a PCI bus) */ -#define STRTAB_L1_SZ_SHIFT 16 +#define STRTAB_L1_SZ_SHIFT 20 #define STRTAB_SPLIT 8 #define STRTAB_L1_DESC_DWORDS 1 @@ -269,10 +270,10 @@ #define ARM64_TCR_TG0_SHIFT 14 #define ARM64_TCR_TG0_MASK 0x3UL #define CTXDESC_CD_0_TCR_IRGN0_SHIFT 8 -#define ARM64_TCR_IRGN0_SHIFT 24 +#define ARM64_TCR_IRGN0_SHIFT 8 #define ARM64_TCR_IRGN0_MASK 0x3UL #define CTXDESC_CD_0_TCR_ORGN0_SHIFT 10 -#define ARM64_TCR_ORGN0_SHIFT 26 +#define ARM64_TCR_ORGN0_SHIFT 10 #define ARM64_TCR_ORGN0_MASK 0x3UL #define CTXDESC_CD_0_TCR_SH0_SHIFT 12 #define ARM64_TCR_SH0_SHIFT 12 @@ -542,6 +543,9 @@ struct arm_smmu_device { #define ARM_SMMU_FEAT_HYP (1 << 12) u32 features; +#define ARM_SMMU_OPT_SKIP_PREFETCH (1 << 0) + u32 options; + struct arm_smmu_cmdq cmdq; struct arm_smmu_evtq evtq; struct arm_smmu_priq priq; @@ -602,11 +606,35 @@ struct arm_smmu_domain { static DEFINE_SPINLOCK(arm_smmu_devices_lock); static LIST_HEAD(arm_smmu_devices); +struct arm_smmu_option_prop { + u32 opt; + const char *prop; +}; + +static struct arm_smmu_option_prop arm_smmu_options[] = { + { ARM_SMMU_OPT_SKIP_PREFETCH, "hisilicon,broken-prefetch-cmd" }, + { 0, NULL}, +}; + static struct arm_smmu_domain *to_smmu_domain(struct iommu_domain *dom) { return container_of(dom, struct arm_smmu_domain, domain); } +static void parse_driver_options(struct arm_smmu_device *smmu) +{ + int i = 0; + + do { + if (of_property_read_bool(smmu->dev->of_node, + arm_smmu_options[i].prop)) { + smmu->options |= arm_smmu_options[i].opt; + dev_notice(smmu->dev, "option %s\n", + arm_smmu_options[i].prop); + } + } while (arm_smmu_options[++i].opt); +} + /* Low-level queue manipulation functions */ static bool queue_full(struct arm_smmu_queue *q) { @@ -1036,7 +1064,8 @@ static void arm_smmu_write_strtab_ent(struct arm_smmu_device *smmu, u32 sid, arm_smmu_sync_ste_for_sid(smmu, sid); /* It's likely that we'll want to use the new STE soon */ - arm_smmu_cmdq_issue_cmd(smmu, &prefetch_cmd); + if (!(smmu->options & ARM_SMMU_OPT_SKIP_PREFETCH)) + arm_smmu_cmdq_issue_cmd(smmu, &prefetch_cmd); } static void arm_smmu_init_bypass_stes(u64 *strtab, unsigned int nent) @@ -1064,7 +1093,7 @@ static int arm_smmu_init_l2_strtab(struct arm_smmu_device *smmu, u32 sid) return 0; size = 1 << (STRTAB_SPLIT + ilog2(STRTAB_STE_DWORDS) + 3); - strtab = &cfg->strtab[sid >> STRTAB_SPLIT << STRTAB_L1_DESC_DWORDS]; + strtab = &cfg->strtab[(sid >> STRTAB_SPLIT) * STRTAB_L1_DESC_DWORDS]; desc->span = STRTAB_SPLIT + 1; desc->l2ptr = dma_zalloc_coherent(smmu->dev, size, &desc->l2ptr_dma, @@ -2020,21 +2049,23 @@ static int arm_smmu_init_strtab_2lvl(struct arm_smmu_device *smmu) { void *strtab; u64 reg; - u32 size; + u32 size, l1size; int ret; struct arm_smmu_strtab_cfg *cfg = &smmu->strtab_cfg; /* Calculate the L1 size, capped to the SIDSIZE */ size = STRTAB_L1_SZ_SHIFT - (ilog2(STRTAB_L1_DESC_DWORDS) + 3); size = min(size, smmu->sid_bits - STRTAB_SPLIT); - if (size + STRTAB_SPLIT < smmu->sid_bits) + cfg->num_l1_ents = 1 << size; + + size += STRTAB_SPLIT; + if (size < smmu->sid_bits) dev_warn(smmu->dev, "2-level strtab only covers %u/%u bits of SID\n", - size + STRTAB_SPLIT, smmu->sid_bits); + size, smmu->sid_bits); - cfg->num_l1_ents = 1 << size; - size = cfg->num_l1_ents * (STRTAB_L1_DESC_DWORDS << 3); - strtab = dma_zalloc_coherent(smmu->dev, size, &cfg->strtab_dma, + l1size = cfg->num_l1_ents * (STRTAB_L1_DESC_DWORDS << 3); + strtab = dma_zalloc_coherent(smmu->dev, l1size, &cfg->strtab_dma, GFP_KERNEL); if (!strtab) { dev_err(smmu->dev, @@ -2055,8 +2086,7 @@ static int arm_smmu_init_strtab_2lvl(struct arm_smmu_device *smmu) ret = arm_smmu_init_l1_strtab(smmu); if (ret) dma_free_coherent(smmu->dev, - cfg->num_l1_ents * - (STRTAB_L1_DESC_DWORDS << 3), + l1size, strtab, cfg->strtab_dma); return ret; @@ -2573,6 +2603,8 @@ static int arm_smmu_device_dt_probe(struct platform_device *pdev) if (irq > 0) smmu->gerr_irq = irq; + parse_driver_options(smmu); + /* Probe the h/w */ ret = arm_smmu_device_probe(smmu); if (ret) diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c index a98a7b27aca1dec2cb2f53319df8a49abcf8e645..0649b94f59584ca5b885cd0ecad595a84af0d89d 100644 --- a/drivers/iommu/intel-iommu.c +++ b/drivers/iommu/intel-iommu.c @@ -1830,8 +1830,9 @@ static int domain_init(struct dmar_domain *domain, int guest_width) static void domain_exit(struct dmar_domain *domain) { + struct dmar_drhd_unit *drhd; + struct intel_iommu *iommu; struct page *freelist = NULL; - int i; /* Domain 0 is reserved, so dont process it */ if (!domain) @@ -1851,8 +1852,10 @@ static void domain_exit(struct dmar_domain *domain) /* clear attached or cached domains */ rcu_read_lock(); - for_each_set_bit(i, domain->iommu_bmp, g_num_of_iommus) - iommu_detach_domain(domain, g_iommus[i]); + for_each_active_iommu(iommu, drhd) + if (domain_type_is_vm(domain) || + test_bit(iommu->seq_id, domain->iommu_bmp)) + iommu_detach_domain(domain, iommu); rcu_read_unlock(); dma_free_pagelist(freelist);