arm-smmu.c 63.3 KB
Newer Older
1
// SPDX-License-Identifier: GPL-2.0-only
2 3 4 5 6 7 8 9 10 11 12 13 14
/*
 * IOMMU API for ARM architected SMMU implementations.
 *
 * Copyright (C) 2013 ARM Limited
 *
 * Author: Will Deacon <will.deacon@arm.com>
 *
 * This driver currently supports:
 *	- SMMUv1 and v2 implementations
 *	- Stream-matching and stream-indexing
 *	- v7/v8 long-descriptor format
 *	- Non-secure access to the SMMU
 *	- Context fault reporting
15
 *	- Extended Stream ID (16 bit)
16 17 18 19
 */

#define pr_fmt(fmt) "arm-smmu: " fmt

20 21
#include <linux/acpi.h>
#include <linux/acpi_iort.h>
22
#include <linux/bitfield.h>
23 24 25 26 27
#include <linux/delay.h>
#include <linux/dma-mapping.h>
#include <linux/err.h>
#include <linux/interrupt.h>
#include <linux/io.h>
28
#include <linux/iopoll.h>
29
#include <linux/module.h>
30
#include <linux/of.h>
31
#include <linux/of_address.h>
32
#include <linux/of_device.h>
33
#include <linux/pci.h>
34
#include <linux/platform_device.h>
35
#include <linux/pm_runtime.h>
36
#include <linux/ratelimit.h>
37 38
#include <linux/slab.h>

39
#include <linux/fsl/mc.h>
40

41
#include "arm-smmu.h"
R
Robin Murphy 已提交
42
#include "../../dma-iommu.h"
43

44 45 46 47 48 49 50 51 52
/*
 * Apparently, some Qualcomm arm64 platforms which appear to expose their SMMU
 * global register space are still, in fact, using a hypervisor to mediate it
 * by trapping and emulating register accesses. Sadly, some deployed versions
 * of said trapping code have bugs wherein they go horribly wrong for stores
 * using r31 (i.e. XZR/WZR) as the source register.
 */
#define QCOM_DUMMY_VAL -1

53 54 55
#define MSI_IOVA_BASE			0x8000000
#define MSI_IOVA_LENGTH			0x100000

56
static int force_stage;
57
module_param(force_stage, int, S_IRUGO);
58 59
MODULE_PARM_DESC(force_stage,
	"Force SMMU mappings to be installed at a particular stage of translation. A value of '1' or '2' forces the corresponding stage. All other values are ignored (i.e. no stage is forced). Note that selecting a specific stage will disable support for nested translation.");
60 61
static bool disable_bypass =
	IS_ENABLED(CONFIG_ARM_SMMU_DISABLE_BYPASS_BY_DEFAULT);
62 63 64
module_param(disable_bypass, bool, S_IRUGO);
MODULE_PARM_DESC(disable_bypass,
	"Disable bypass streams such that incoming transactions from devices that are not attached to an iommu domain will report an abort back to the device and will not be allowed to pass through the SMMU.");
65

66 67 68 69
#define s2cr_init_val (struct arm_smmu_s2cr){				\
	.type = disable_bypass ? S2CR_TYPE_FAULT : S2CR_TYPE_BYPASS,	\
}

70 71
static bool using_legacy_binding, using_generic_binding;

72 73 74
static inline int arm_smmu_rpm_get(struct arm_smmu_device *smmu)
{
	if (pm_runtime_enabled(smmu->dev))
75
		return pm_runtime_resume_and_get(smmu->dev);
76 77 78 79 80 81 82

	return 0;
}

static inline void arm_smmu_rpm_put(struct arm_smmu_device *smmu)
{
	if (pm_runtime_enabled(smmu->dev))
83
		pm_runtime_put_autosuspend(smmu->dev);
84 85
}

86 87 88 89 90
static struct arm_smmu_domain *to_smmu_domain(struct iommu_domain *dom)
{
	return container_of(dom, struct arm_smmu_domain, domain);
}

91 92 93 94
static struct platform_driver arm_smmu_driver;
static struct iommu_ops arm_smmu_ops;

#ifdef CONFIG_ARM_SMMU_LEGACY_DT_BINDINGS
95
static struct device_node *dev_get_dev_node(struct device *dev)
96 97 98
{
	if (dev_is_pci(dev)) {
		struct pci_bus *bus = to_pci_dev(dev)->bus;
99

100 101
		while (!pci_is_root_bus(bus))
			bus = bus->parent;
102
		return of_node_get(bus->bridge->parent->of_node);
103 104
	}

105
	return of_node_get(dev->of_node);
106 107
}

108
static int __arm_smmu_get_pci_sid(struct pci_dev *pdev, u16 alias, void *data)
109
{
110 111
	*((__be32 *)data) = cpu_to_be32(alias);
	return 0; /* Continue walking */
112 113
}

114
static int __find_legacy_master_phandle(struct device *dev, void *data)
115
{
116 117 118 119 120
	struct of_phandle_iterator *it = *(void **)data;
	struct device_node *np = it->node;
	int err;

	of_for_each_phandle(it, err, dev->of_node, "mmu-masters",
121
			    "#stream-id-cells", -1)
122 123 124 125 126 127
		if (it->node == np) {
			*(void **)data = dev;
			return 1;
		}
	it->node = np;
	return err == -ENOENT ? 0 : err;
128 129
}

130 131
static int arm_smmu_register_legacy_master(struct device *dev,
					   struct arm_smmu_device **smmu)
132
{
133
	struct device *smmu_dev;
134 135 136
	struct device_node *np;
	struct of_phandle_iterator it;
	void *data = &it;
137
	u32 *sids;
138 139
	__be32 pci_sid;
	int err;
140

141 142 143 144 145
	np = dev_get_dev_node(dev);
	if (!np || !of_find_property(np, "#stream-id-cells", NULL)) {
		of_node_put(np);
		return -ENODEV;
	}
146

147
	it.node = np;
148 149
	err = driver_for_each_device(&arm_smmu_driver.driver, NULL, &data,
				     __find_legacy_master_phandle);
150
	smmu_dev = data;
151 152 153 154 155
	of_node_put(np);
	if (err == 0)
		return -ENODEV;
	if (err < 0)
		return err;
156

157 158 159 160 161 162 163
	if (dev_is_pci(dev)) {
		/* "mmu-masters" assumes Stream ID == Requester ID */
		pci_for_each_dma_alias(to_pci_dev(dev), __arm_smmu_get_pci_sid,
				       &pci_sid);
		it.cur = &pci_sid;
		it.cur_count = 1;
	}
164

165 166 167 168
	err = iommu_fwspec_init(dev, &smmu_dev->of_node->fwnode,
				&arm_smmu_ops);
	if (err)
		return err;
169

170 171 172
	sids = kcalloc(it.cur_count, sizeof(*sids), GFP_KERNEL);
	if (!sids)
		return -ENOMEM;
173

174 175 176 177 178
	*smmu = dev_get_drvdata(smmu_dev);
	of_phandle_iterator_args(&it, sids, it.cur_count);
	err = iommu_fwspec_add_ids(dev, sids, it.cur_count);
	kfree(sids);
	return err;
179
}
180 181 182 183 184 185 186 187
#else
static int arm_smmu_register_legacy_master(struct device *dev,
					   struct arm_smmu_device **smmu)
{
	return -ENODEV;
}
#endif /* CONFIG_ARM_SMMU_LEGACY_DT_BINDINGS */

188 189 190 191 192 193
static void __arm_smmu_free_bitmap(unsigned long *map, int idx)
{
	clear_bit(idx, map);
}

/* Wait for any pending TLB invalidations to complete */
194 195
static void __arm_smmu_tlb_sync(struct arm_smmu_device *smmu, int page,
				int sync, int status)
196
{
197
	unsigned int spin_cnt, delay;
198
	u32 reg;
199

200 201 202
	if (smmu->impl && unlikely(smmu->impl->tlb_sync))
		return smmu->impl->tlb_sync(smmu, page, sync, status);

203
	arm_smmu_writel(smmu, page, sync, QCOM_DUMMY_VAL);
204 205
	for (delay = 1; delay < TLB_LOOP_TIMEOUT; delay *= 2) {
		for (spin_cnt = TLB_SPIN_COUNT; spin_cnt > 0; spin_cnt--) {
206
			reg = arm_smmu_readl(smmu, page, status);
207
			if (!(reg & ARM_SMMU_sTLBGSTATUS_GSACTIVE))
208 209
				return;
			cpu_relax();
210
		}
211
		udelay(delay);
212
	}
213 214
	dev_err_ratelimited(smmu->dev,
			    "TLB sync timed out -- SMMU may be deadlocked\n");
215 216
}

217 218
static void arm_smmu_tlb_sync_global(struct arm_smmu_device *smmu)
{
219
	unsigned long flags;
220

221
	spin_lock_irqsave(&smmu->global_sync_lock, flags);
222
	__arm_smmu_tlb_sync(smmu, ARM_SMMU_GR0, ARM_SMMU_GR0_sTLBGSYNC,
223
			    ARM_SMMU_GR0_sTLBGSTATUS);
224
	spin_unlock_irqrestore(&smmu->global_sync_lock, flags);
225 226
}

227
static void arm_smmu_tlb_sync_context(struct arm_smmu_domain *smmu_domain)
228
{
229
	struct arm_smmu_device *smmu = smmu_domain->smmu;
230
	unsigned long flags;
231

232
	spin_lock_irqsave(&smmu_domain->cb_lock, flags);
233 234
	__arm_smmu_tlb_sync(smmu, ARM_SMMU_CB(smmu, smmu_domain->cfg.cbndx),
			    ARM_SMMU_CB_TLBSYNC, ARM_SMMU_CB_TLBSTATUS);
235
	spin_unlock_irqrestore(&smmu_domain->cb_lock, flags);
236 237
}

238
static void arm_smmu_tlb_inv_context_s1(void *cookie)
239
{
240
	struct arm_smmu_domain *smmu_domain = cookie;
241
	/*
242 243
	 * The TLBI write may be relaxed, so ensure that PTEs cleared by the
	 * current CPU are visible beforehand.
244
	 */
245 246 247
	wmb();
	arm_smmu_cb_write(smmu_domain->smmu, smmu_domain->cfg.cbndx,
			  ARM_SMMU_CB_S1_TLBIASID, smmu_domain->cfg.asid);
248
	arm_smmu_tlb_sync_context(smmu_domain);
249
}
250

251 252 253 254
static void arm_smmu_tlb_inv_context_s2(void *cookie)
{
	struct arm_smmu_domain *smmu_domain = cookie;
	struct arm_smmu_device *smmu = smmu_domain->smmu;
255

256 257 258
	/* See above */
	wmb();
	arm_smmu_gr0_write(smmu, ARM_SMMU_GR0_TLBIVMID, smmu_domain->cfg.vmid);
259
	arm_smmu_tlb_sync_global(smmu);
260 261
}

262
static void arm_smmu_tlb_inv_range_s1(unsigned long iova, size_t size,
263
				      size_t granule, void *cookie, int reg)
264 265
{
	struct arm_smmu_domain *smmu_domain = cookie;
266
	struct arm_smmu_device *smmu = smmu_domain->smmu;
267
	struct arm_smmu_cfg *cfg = &smmu_domain->cfg;
268
	int idx = cfg->cbndx;
269

270
	if (smmu->features & ARM_SMMU_FEAT_COHERENT_WALK)
271 272
		wmb();

273 274 275 276
	if (cfg->fmt != ARM_SMMU_CTX_FMT_AARCH64) {
		iova = (iova >> 12) << 12;
		iova |= cfg->asid;
		do {
277
			arm_smmu_cb_write(smmu, idx, reg, iova);
278 279
			iova += granule;
		} while (size -= granule);
280
	} else {
281
		iova >>= 12;
282
		iova |= (u64)cfg->asid << 48;
283
		do {
284
			arm_smmu_cb_writeq(smmu, idx, reg, iova);
285 286
			iova += granule >> 12;
		} while (size -= granule);
287 288 289
	}
}

290
static void arm_smmu_tlb_inv_range_s2(unsigned long iova, size_t size,
291
				      size_t granule, void *cookie, int reg)
292 293 294
{
	struct arm_smmu_domain *smmu_domain = cookie;
	struct arm_smmu_device *smmu = smmu_domain->smmu;
295
	int idx = smmu_domain->cfg.cbndx;
296 297 298 299 300 301

	if (smmu->features & ARM_SMMU_FEAT_COHERENT_WALK)
		wmb();

	iova >>= 12;
	do {
302
		if (smmu_domain->cfg.fmt == ARM_SMMU_CTX_FMT_AARCH64)
303
			arm_smmu_cb_writeq(smmu, idx, reg, iova);
304
		else
305
			arm_smmu_cb_write(smmu, idx, reg, iova);
306 307 308 309
		iova += granule >> 12;
	} while (size -= granule);
}

310 311
static void arm_smmu_tlb_inv_walk_s1(unsigned long iova, size_t size,
				     size_t granule, void *cookie)
312
{
313 314 315 316 317 318 319 320 321 322
	struct arm_smmu_domain *smmu_domain = cookie;
	struct arm_smmu_cfg *cfg = &smmu_domain->cfg;

	if (cfg->flush_walk_prefer_tlbiasid) {
		arm_smmu_tlb_inv_context_s1(cookie);
	} else {
		arm_smmu_tlb_inv_range_s1(iova, size, granule, cookie,
					  ARM_SMMU_CB_S1_TLBIVA);
		arm_smmu_tlb_sync_context(cookie);
	}
323
}
324

325 326 327
static void arm_smmu_tlb_add_page_s1(struct iommu_iotlb_gather *gather,
				     unsigned long iova, size_t granule,
				     void *cookie)
328
{
329 330
	arm_smmu_tlb_inv_range_s1(iova, granule, granule, cookie,
				  ARM_SMMU_CB_S1_TLBIVAL);
331
}
332

333 334 335
static void arm_smmu_tlb_inv_walk_s2(unsigned long iova, size_t size,
				     size_t granule, void *cookie)
{
336 337
	arm_smmu_tlb_inv_range_s2(iova, size, granule, cookie,
				  ARM_SMMU_CB_S2_TLBIIPAS2);
338
	arm_smmu_tlb_sync_context(cookie);
339 340
}

341 342 343 344
static void arm_smmu_tlb_add_page_s2(struct iommu_iotlb_gather *gather,
				     unsigned long iova, size_t granule,
				     void *cookie)
{
345 346
	arm_smmu_tlb_inv_range_s2(iova, granule, granule, cookie,
				  ARM_SMMU_CB_S2_TLBIIPAS2L);
347 348
}

349 350
static void arm_smmu_tlb_inv_walk_s2_v1(unsigned long iova, size_t size,
					size_t granule, void *cookie)
351 352 353 354 355 356 357 358 359 360 361 362 363
{
	arm_smmu_tlb_inv_context_s2(cookie);
}
/*
 * On MMU-401 at least, the cost of firing off multiple TLBIVMIDs appears
 * almost negligible, but the benefit of getting the first one in as far ahead
 * of the sync as possible is significant, hence we don't just make this a
 * no-op and call arm_smmu_tlb_inv_context_s2() from .iotlb_sync as you might
 * think.
 */
static void arm_smmu_tlb_add_page_s2_v1(struct iommu_iotlb_gather *gather,
					unsigned long iova, size_t granule,
					void *cookie)
364 365
{
	struct arm_smmu_domain *smmu_domain = cookie;
366
	struct arm_smmu_device *smmu = smmu_domain->smmu;
367

368 369 370 371
	if (smmu->features & ARM_SMMU_FEAT_COHERENT_WALK)
		wmb();

	arm_smmu_gr0_write(smmu, ARM_SMMU_GR0_TLBIVMID, smmu_domain->cfg.vmid);
372 373
}

374 375 376 377
static const struct iommu_flush_ops arm_smmu_s1_tlb_ops = {
	.tlb_flush_all	= arm_smmu_tlb_inv_context_s1,
	.tlb_flush_walk	= arm_smmu_tlb_inv_walk_s1,
	.tlb_add_page	= arm_smmu_tlb_add_page_s1,
378 379
};

380 381 382 383
static const struct iommu_flush_ops arm_smmu_s2_tlb_ops_v2 = {
	.tlb_flush_all	= arm_smmu_tlb_inv_context_s2,
	.tlb_flush_walk	= arm_smmu_tlb_inv_walk_s2,
	.tlb_add_page	= arm_smmu_tlb_add_page_s2,
384 385
};

386 387
static const struct iommu_flush_ops arm_smmu_s2_tlb_ops_v1 = {
	.tlb_flush_all	= arm_smmu_tlb_inv_context_s2,
388
	.tlb_flush_walk	= arm_smmu_tlb_inv_walk_s2_v1,
389
	.tlb_add_page	= arm_smmu_tlb_add_page_s2_v1,
390 391
};

392 393
static irqreturn_t arm_smmu_context_fault(int irq, void *dev)
{
394
	u32 fsr, fsynr, cbfrsynra;
395 396
	unsigned long iova;
	struct iommu_domain *domain = dev;
397
	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
398
	struct arm_smmu_device *smmu = smmu_domain->smmu;
399
	int idx = smmu_domain->cfg.cbndx;
400
	int ret;
401

402
	fsr = arm_smmu_cb_read(smmu, idx, ARM_SMMU_CB_FSR);
403
	if (!(fsr & ARM_SMMU_FSR_FAULT))
404 405
		return IRQ_NONE;

406 407 408
	fsynr = arm_smmu_cb_read(smmu, idx, ARM_SMMU_CB_FSYNR0);
	iova = arm_smmu_cb_readq(smmu, idx, ARM_SMMU_CB_FAR);
	cbfrsynra = arm_smmu_gr1_read(smmu, ARM_SMMU_GR1_CBFRSYNRA(idx));
409

410 411 412 413 414 415
	ret = report_iommu_fault(domain, NULL, iova,
		fsynr & ARM_SMMU_FSYNR0_WNR ? IOMMU_FAULT_WRITE : IOMMU_FAULT_READ);

	if (ret == -ENOSYS)
		dev_err_ratelimited(smmu->dev,
		"Unhandled context fault: fsr=0x%x, iova=0x%08lx, fsynr=0x%x, cbfrsynra=0x%x, cb=%d\n",
416
			    fsr, iova, fsynr, cbfrsynra, idx);
417

418
	arm_smmu_cb_write(smmu, idx, ARM_SMMU_CB_FSR, fsr);
419
	return IRQ_HANDLED;
420 421 422 423 424 425
}

static irqreturn_t arm_smmu_global_fault(int irq, void *dev)
{
	u32 gfsr, gfsynr0, gfsynr1, gfsynr2;
	struct arm_smmu_device *smmu = dev;
426 427
	static DEFINE_RATELIMIT_STATE(rs, DEFAULT_RATELIMIT_INTERVAL,
				      DEFAULT_RATELIMIT_BURST);
428

429 430 431 432
	gfsr = arm_smmu_gr0_read(smmu, ARM_SMMU_GR0_sGFSR);
	gfsynr0 = arm_smmu_gr0_read(smmu, ARM_SMMU_GR0_sGFSYNR0);
	gfsynr1 = arm_smmu_gr0_read(smmu, ARM_SMMU_GR0_sGFSYNR1);
	gfsynr2 = arm_smmu_gr0_read(smmu, ARM_SMMU_GR0_sGFSYNR2);
433

434 435 436
	if (!gfsr)
		return IRQ_NONE;

437 438
	if (__ratelimit(&rs)) {
		if (IS_ENABLED(CONFIG_ARM_SMMU_DISABLE_BYPASS_BY_DEFAULT) &&
439
		    (gfsr & ARM_SMMU_sGFSR_USF))
440 441 442 443 444 445 446 447 448 449
			dev_err(smmu->dev,
				"Blocked unknown Stream ID 0x%hx; boot with \"arm-smmu.disable_bypass=0\" to allow, but this may have security implications\n",
				(u16)gfsynr1);
		else
			dev_err(smmu->dev,
				"Unexpected global fault, this could be serious\n");
		dev_err(smmu->dev,
			"\tGFSR 0x%08x, GFSYNR0 0x%08x, GFSYNR1 0x%08x, GFSYNR2 0x%08x\n",
			gfsr, gfsynr0, gfsynr1, gfsynr2);
	}
450

451
	arm_smmu_gr0_write(smmu, ARM_SMMU_GR0_sGFSR, gfsr);
452
	return IRQ_HANDLED;
453 454
}

455 456
static void arm_smmu_init_context_bank(struct arm_smmu_domain *smmu_domain,
				       struct io_pgtable_cfg *pgtbl_cfg)
457
{
458
	struct arm_smmu_cfg *cfg = &smmu_domain->cfg;
459 460 461 462 463
	struct arm_smmu_cb *cb = &smmu_domain->smmu->cbs[cfg->cbndx];
	bool stage1 = cfg->cbar != CBAR_TYPE_S2_TRANS;

	cb->cfg = cfg;

464
	/* TCR */
465 466 467 468
	if (stage1) {
		if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH32_S) {
			cb->tcr[0] = pgtbl_cfg->arm_v7s_cfg.tcr;
		} else {
469 470
			cb->tcr[0] = arm_smmu_lpae_tcr(pgtbl_cfg);
			cb->tcr[1] = arm_smmu_lpae_tcr2(pgtbl_cfg);
471
			if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH64)
472
				cb->tcr[1] |= ARM_SMMU_TCR2_AS;
473
			else
474
				cb->tcr[0] |= ARM_SMMU_TCR_EAE;
475 476
		}
	} else {
477
		cb->tcr[0] = arm_smmu_lpae_vtcr(pgtbl_cfg);
478 479 480 481 482
	}

	/* TTBRs */
	if (stage1) {
		if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH32_S) {
483 484
			cb->ttbr[0] = pgtbl_cfg->arm_v7s_cfg.ttbr;
			cb->ttbr[1] = 0;
485
		} else {
486 487
			cb->ttbr[0] = FIELD_PREP(ARM_SMMU_TTBRn_ASID,
						 cfg->asid);
488 489
			cb->ttbr[1] = FIELD_PREP(ARM_SMMU_TTBRn_ASID,
						 cfg->asid);
490 491 492 493 494

			if (pgtbl_cfg->quirks & IO_PGTABLE_QUIRK_ARM_TTBR1)
				cb->ttbr[1] |= pgtbl_cfg->arm_lpae_s1_cfg.ttbr;
			else
				cb->ttbr[0] |= pgtbl_cfg->arm_lpae_s1_cfg.ttbr;
495 496 497 498 499 500 501 502 503 504 505
		}
	} else {
		cb->ttbr[0] = pgtbl_cfg->arm_lpae_s2_cfg.vttbr;
	}

	/* MAIRs (stage-1 only) */
	if (stage1) {
		if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH32_S) {
			cb->mair[0] = pgtbl_cfg->arm_v7s_cfg.prrr;
			cb->mair[1] = pgtbl_cfg->arm_v7s_cfg.nmrr;
		} else {
506 507
			cb->mair[0] = pgtbl_cfg->arm_lpae_s1_cfg.mair;
			cb->mair[1] = pgtbl_cfg->arm_lpae_s1_cfg.mair >> 32;
508 509 510 511
		}
	}
}

512
void arm_smmu_write_context_bank(struct arm_smmu_device *smmu, int idx)
513 514 515 516 517 518 519 520
{
	u32 reg;
	bool stage1;
	struct arm_smmu_cb *cb = &smmu->cbs[idx];
	struct arm_smmu_cfg *cfg = cb->cfg;

	/* Unassigned context banks only need disabling */
	if (!cfg) {
521
		arm_smmu_cb_write(smmu, idx, ARM_SMMU_CB_SCTLR, 0);
522 523 524
		return;
	}

525
	stage1 = cfg->cbar != CBAR_TYPE_S2_TRANS;
526

527
	/* CBA2R */
528
	if (smmu->version > ARM_SMMU_V1) {
529
		if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH64)
530
			reg = ARM_SMMU_CBA2R_VA64;
531
		else
532
			reg = 0;
533 534
		/* 16-bit VMIDs live in CBA2R */
		if (smmu->features & ARM_SMMU_FEAT_VMID16)
535
			reg |= FIELD_PREP(ARM_SMMU_CBA2R_VMID16, cfg->vmid);
536

537
		arm_smmu_gr1_write(smmu, ARM_SMMU_GR1_CBA2R(idx), reg);
538 539
	}

540
	/* CBAR */
541
	reg = FIELD_PREP(ARM_SMMU_CBAR_TYPE, cfg->cbar);
542
	if (smmu->version < ARM_SMMU_V2)
543
		reg |= FIELD_PREP(ARM_SMMU_CBAR_IRPTNDX, cfg->irptndx);
544

545 546 547 548 549
	/*
	 * Use the weakest shareability/memory types, so they are
	 * overridden by the ttbcr/pte.
	 */
	if (stage1) {
550 551 552 553
		reg |= FIELD_PREP(ARM_SMMU_CBAR_S1_BPSHCFG,
				  ARM_SMMU_CBAR_S1_BPSHCFG_NSH) |
		       FIELD_PREP(ARM_SMMU_CBAR_S1_MEMATTR,
				  ARM_SMMU_CBAR_S1_MEMATTR_WB);
554 555
	} else if (!(smmu->features & ARM_SMMU_FEAT_VMID16)) {
		/* 8-bit VMIDs live in CBAR */
556
		reg |= FIELD_PREP(ARM_SMMU_CBAR_VMID, cfg->vmid);
557
	}
558
	arm_smmu_gr1_write(smmu, ARM_SMMU_GR1_CBAR(idx), reg);
559

560
	/*
561
	 * TCR
562 563 564
	 * We must write this before the TTBRs, since it determines the
	 * access behaviour of some fields (in particular, ASID[15:8]).
	 */
565
	if (stage1 && smmu->version > ARM_SMMU_V1)
566 567
		arm_smmu_cb_write(smmu, idx, ARM_SMMU_CB_TCR2, cb->tcr[1]);
	arm_smmu_cb_write(smmu, idx, ARM_SMMU_CB_TCR, cb->tcr[0]);
568

569
	/* TTBRs */
570
	if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH32_S) {
571 572 573
		arm_smmu_cb_write(smmu, idx, ARM_SMMU_CB_CONTEXTIDR, cfg->asid);
		arm_smmu_cb_write(smmu, idx, ARM_SMMU_CB_TTBR0, cb->ttbr[0]);
		arm_smmu_cb_write(smmu, idx, ARM_SMMU_CB_TTBR1, cb->ttbr[1]);
574
	} else {
575
		arm_smmu_cb_writeq(smmu, idx, ARM_SMMU_CB_TTBR0, cb->ttbr[0]);
576
		if (stage1)
577 578
			arm_smmu_cb_writeq(smmu, idx, ARM_SMMU_CB_TTBR1,
					   cb->ttbr[1]);
579
	}
580

581
	/* MAIRs (stage-1 only) */
582
	if (stage1) {
583 584
		arm_smmu_cb_write(smmu, idx, ARM_SMMU_CB_S1_MAIR0, cb->mair[0]);
		arm_smmu_cb_write(smmu, idx, ARM_SMMU_CB_S1_MAIR1, cb->mair[1]);
585 586 587
	}

	/* SCTLR */
588 589
	reg = ARM_SMMU_SCTLR_CFIE | ARM_SMMU_SCTLR_CFRE | ARM_SMMU_SCTLR_AFE |
	      ARM_SMMU_SCTLR_TRE | ARM_SMMU_SCTLR_M;
590
	if (stage1)
591
		reg |= ARM_SMMU_SCTLR_S1_ASIDPNE;
592
	if (IS_ENABLED(CONFIG_CPU_BIG_ENDIAN))
593
		reg |= ARM_SMMU_SCTLR_E;
594

595 596 597 598
	if (smmu->impl && smmu->impl->write_sctlr)
		smmu->impl->write_sctlr(smmu, idx, reg);
	else
		arm_smmu_cb_write(smmu, idx, ARM_SMMU_CB_SCTLR, reg);
599 600
}

601 602 603 604 605 606 607 608 609 610
static int arm_smmu_alloc_context_bank(struct arm_smmu_domain *smmu_domain,
				       struct arm_smmu_device *smmu,
				       struct device *dev, unsigned int start)
{
	if (smmu->impl && smmu->impl->alloc_context_bank)
		return smmu->impl->alloc_context_bank(smmu_domain, smmu, dev, start);

	return __arm_smmu_alloc_bitmap(smmu->context_map, start, smmu->num_context_banks);
}

611
static int arm_smmu_init_domain_context(struct iommu_domain *domain,
612 613
					struct arm_smmu_device *smmu,
					struct device *dev)
614
{
615
	int irq, start, ret = 0;
616 617 618 619
	unsigned long ias, oas;
	struct io_pgtable_ops *pgtbl_ops;
	struct io_pgtable_cfg pgtbl_cfg;
	enum io_pgtable_fmt fmt;
620
	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
621
	struct arm_smmu_cfg *cfg = &smmu_domain->cfg;
622
	irqreturn_t (*context_fault)(int irq, void *dev);
623

624
	mutex_lock(&smmu_domain->init_mutex);
625 626 627
	if (smmu_domain->smmu)
		goto out_unlock;

628 629 630 631 632 633
	if (domain->type == IOMMU_DOMAIN_IDENTITY) {
		smmu_domain->stage = ARM_SMMU_DOMAIN_BYPASS;
		smmu_domain->smmu = smmu;
		goto out_unlock;
	}

634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656
	/*
	 * Mapping the requested stage onto what we support is surprisingly
	 * complicated, mainly because the spec allows S1+S2 SMMUs without
	 * support for nested translation. That means we end up with the
	 * following table:
	 *
	 * Requested        Supported        Actual
	 *     S1               N              S1
	 *     S1             S1+S2            S1
	 *     S1               S2             S2
	 *     S1               S1             S1
	 *     N                N              N
	 *     N              S1+S2            S2
	 *     N                S2             S2
	 *     N                S1             S1
	 *
	 * Note that you can't actually request stage-2 mappings.
	 */
	if (!(smmu->features & ARM_SMMU_FEAT_TRANS_S1))
		smmu_domain->stage = ARM_SMMU_DOMAIN_S2;
	if (!(smmu->features & ARM_SMMU_FEAT_TRANS_S2))
		smmu_domain->stage = ARM_SMMU_DOMAIN_S1;

657 658 659 660 661 662 663 664 665 666
	/*
	 * Choosing a suitable context format is even more fiddly. Until we
	 * grow some way for the caller to express a preference, and/or move
	 * the decision into the io-pgtable code where it arguably belongs,
	 * just aim for the closest thing to the rest of the system, and hope
	 * that the hardware isn't esoteric enough that we can't assume AArch64
	 * support to be a superset of AArch32 support...
	 */
	if (smmu->features & ARM_SMMU_FEAT_FMT_AARCH32_L)
		cfg->fmt = ARM_SMMU_CTX_FMT_AARCH32_L;
667 668 669 670 671
	if (IS_ENABLED(CONFIG_IOMMU_IO_PGTABLE_ARMV7S) &&
	    !IS_ENABLED(CONFIG_64BIT) && !IS_ENABLED(CONFIG_ARM_LPAE) &&
	    (smmu->features & ARM_SMMU_FEAT_FMT_AARCH32_S) &&
	    (smmu_domain->stage == ARM_SMMU_DOMAIN_S1))
		cfg->fmt = ARM_SMMU_CTX_FMT_AARCH32_S;
672 673 674 675 676 677 678 679 680 681 682
	if ((IS_ENABLED(CONFIG_64BIT) || cfg->fmt == ARM_SMMU_CTX_FMT_NONE) &&
	    (smmu->features & (ARM_SMMU_FEAT_FMT_AARCH64_64K |
			       ARM_SMMU_FEAT_FMT_AARCH64_16K |
			       ARM_SMMU_FEAT_FMT_AARCH64_4K)))
		cfg->fmt = ARM_SMMU_CTX_FMT_AARCH64;

	if (cfg->fmt == ARM_SMMU_CTX_FMT_NONE) {
		ret = -EINVAL;
		goto out_unlock;
	}

683 684 685 686
	switch (smmu_domain->stage) {
	case ARM_SMMU_DOMAIN_S1:
		cfg->cbar = CBAR_TYPE_S1_TRANS_S2_BYPASS;
		start = smmu->num_s2_context_banks;
687 688
		ias = smmu->va_size;
		oas = smmu->ipa_size;
689
		if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH64) {
690
			fmt = ARM_64_LPAE_S1;
691
		} else if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH32_L) {
692
			fmt = ARM_32_LPAE_S1;
693 694
			ias = min(ias, 32UL);
			oas = min(oas, 40UL);
695 696 697 698
		} else {
			fmt = ARM_V7S;
			ias = min(ias, 32UL);
			oas = min(oas, 32UL);
699
		}
700
		smmu_domain->flush_ops = &arm_smmu_s1_tlb_ops;
701 702
		break;
	case ARM_SMMU_DOMAIN_NESTED:
703 704 705 706
		/*
		 * We will likely want to change this if/when KVM gets
		 * involved.
		 */
707
	case ARM_SMMU_DOMAIN_S2:
708 709
		cfg->cbar = CBAR_TYPE_S2_TRANS;
		start = 0;
710 711
		ias = smmu->ipa_size;
		oas = smmu->pa_size;
712
		if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH64) {
713
			fmt = ARM_64_LPAE_S2;
714
		} else {
715
			fmt = ARM_32_LPAE_S2;
716 717 718
			ias = min(ias, 40UL);
			oas = min(oas, 40UL);
		}
719
		if (smmu->version == ARM_SMMU_V2)
720
			smmu_domain->flush_ops = &arm_smmu_s2_tlb_ops_v2;
721
		else
722
			smmu_domain->flush_ops = &arm_smmu_s2_tlb_ops_v1;
723 724 725 726
		break;
	default:
		ret = -EINVAL;
		goto out_unlock;
727
	}
728 729 730

	ret = arm_smmu_alloc_context_bank(smmu_domain, smmu, dev, start);
	if (ret < 0) {
731
		goto out_unlock;
732 733 734
	}

	smmu_domain->smmu = smmu;
735

736
	cfg->cbndx = ret;
737
	if (smmu->version < ARM_SMMU_V2) {
738 739
		cfg->irptndx = atomic_inc_return(&smmu->irptndx);
		cfg->irptndx %= smmu->num_context_irqs;
740
	} else {
741
		cfg->irptndx = cfg->cbndx;
742 743
	}

744
	if (smmu_domain->stage == ARM_SMMU_DOMAIN_S2)
745
		cfg->vmid = cfg->cbndx + 1;
746
	else
747 748
		cfg->asid = cfg->cbndx;

749
	pgtbl_cfg = (struct io_pgtable_cfg) {
750
		.pgsize_bitmap	= smmu->pgsize_bitmap,
751 752
		.ias		= ias,
		.oas		= oas,
753
		.coherent_walk	= smmu->features & ARM_SMMU_FEAT_COHERENT_WALK,
754
		.tlb		= smmu_domain->flush_ops,
755
		.iommu_dev	= smmu->dev,
756 757
	};

758
	if (smmu->impl && smmu->impl->init_context) {
759
		ret = smmu->impl->init_context(smmu_domain, &pgtbl_cfg, dev);
760 761 762 763
		if (ret)
			goto out_clear_smmu;
	}

764 765
	if (smmu_domain->pgtbl_quirks)
		pgtbl_cfg.quirks |= smmu_domain->pgtbl_quirks;
766

767 768 769 770 771 772
	pgtbl_ops = alloc_io_pgtable_ops(fmt, &pgtbl_cfg, smmu_domain);
	if (!pgtbl_ops) {
		ret = -ENOMEM;
		goto out_clear_smmu;
	}

773 774
	/* Update the domain's page sizes to reflect the page table format */
	domain->pgsize_bitmap = pgtbl_cfg.pgsize_bitmap;
775 776 777 778 779 780 781 782

	if (pgtbl_cfg.quirks & IO_PGTABLE_QUIRK_ARM_TTBR1) {
		domain->geometry.aperture_start = ~0UL << ias;
		domain->geometry.aperture_end = ~0UL;
	} else {
		domain->geometry.aperture_end = (1UL << ias) - 1;
	}

783
	domain->geometry.force_aperture = true;
784

785 786
	/* Initialise the context bank with our page table cfg */
	arm_smmu_init_context_bank(smmu_domain, &pgtbl_cfg);
787
	arm_smmu_write_context_bank(smmu, cfg->cbndx);
788 789 790 791 792

	/*
	 * Request context fault interrupt. Do this last to avoid the
	 * handler seeing a half-initialised domain state.
	 */
793
	irq = smmu->irqs[cfg->irptndx];
794 795 796 797 798 799 800

	if (smmu->impl && smmu->impl->context_fault)
		context_fault = smmu->impl->context_fault;
	else
		context_fault = arm_smmu_context_fault;

	ret = devm_request_irq(smmu->dev, irq, context_fault,
801
			       IRQF_SHARED, "arm-smmu-context-fault", domain);
802
	if (ret < 0) {
803
		dev_err(smmu->dev, "failed to request context IRQ %d (%u)\n",
804
			cfg->irptndx, irq);
805
		cfg->irptndx = ARM_SMMU_INVALID_IRPTNDX;
806 807
	}

808 809 810 811
	mutex_unlock(&smmu_domain->init_mutex);

	/* Publish page table ops for map/unmap */
	smmu_domain->pgtbl_ops = pgtbl_ops;
812
	return 0;
813

814
out_clear_smmu:
815
	__arm_smmu_free_bitmap(smmu->context_map, cfg->cbndx);
816
	smmu_domain->smmu = NULL;
817
out_unlock:
818
	mutex_unlock(&smmu_domain->init_mutex);
819 820 821 822 823
	return ret;
}

static void arm_smmu_destroy_domain_context(struct iommu_domain *domain)
{
824
	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
825 826
	struct arm_smmu_device *smmu = smmu_domain->smmu;
	struct arm_smmu_cfg *cfg = &smmu_domain->cfg;
827
	int ret, irq;
828

829
	if (!smmu || domain->type == IOMMU_DOMAIN_IDENTITY)
830 831
		return;

832 833 834 835
	ret = arm_smmu_rpm_get(smmu);
	if (ret < 0)
		return;

836 837 838 839
	/*
	 * Disable the context bank and free the page tables before freeing
	 * it.
	 */
840 841
	smmu->cbs[cfg->cbndx].cfg = NULL;
	arm_smmu_write_context_bank(smmu, cfg->cbndx);
842

843
	if (cfg->irptndx != ARM_SMMU_INVALID_IRPTNDX) {
844
		irq = smmu->irqs[cfg->irptndx];
845
		devm_free_irq(smmu->dev, irq, domain);
846 847
	}

848
	free_io_pgtable_ops(smmu_domain->pgtbl_ops);
849
	__arm_smmu_free_bitmap(smmu->context_map, cfg->cbndx);
850 851

	arm_smmu_rpm_put(smmu);
852 853
}

854
static struct iommu_domain *arm_smmu_domain_alloc(unsigned type)
855 856 857
{
	struct arm_smmu_domain *smmu_domain;

858
	if (type != IOMMU_DOMAIN_UNMANAGED && type != IOMMU_DOMAIN_IDENTITY) {
859 860
		if (using_legacy_binding ||
		    (type != IOMMU_DOMAIN_DMA && type != IOMMU_DOMAIN_DMA_FQ))
861 862
			return NULL;
	}
863 864 865 866 867 868 869
	/*
	 * Allocate the domain and initialise some of its data structures.
	 * We can't really do anything meaningful until we've added a
	 * master.
	 */
	smmu_domain = kzalloc(sizeof(*smmu_domain), GFP_KERNEL);
	if (!smmu_domain)
870
		return NULL;
871

872
	mutex_init(&smmu_domain->init_mutex);
873
	spin_lock_init(&smmu_domain->cb_lock);
874 875

	return &smmu_domain->domain;
876 877
}

878
static void arm_smmu_domain_free(struct iommu_domain *domain)
879
{
880
	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
881 882 883 884 885

	/*
	 * Free the domain resources. We assume that all devices have
	 * already been detached.
	 */
886 887 888 889
	arm_smmu_destroy_domain_context(domain);
	kfree(smmu_domain);
}

890 891 892
static void arm_smmu_write_smr(struct arm_smmu_device *smmu, int idx)
{
	struct arm_smmu_smr *smr = smmu->smrs + idx;
893 894
	u32 reg = FIELD_PREP(ARM_SMMU_SMR_ID, smr->id) |
		  FIELD_PREP(ARM_SMMU_SMR_MASK, smr->mask);
895

896
	if (!(smmu->features & ARM_SMMU_FEAT_EXIDS) && smr->valid)
897
		reg |= ARM_SMMU_SMR_VALID;
898
	arm_smmu_gr0_write(smmu, ARM_SMMU_GR0_SMR(idx), reg);
899 900
}

901 902 903
static void arm_smmu_write_s2cr(struct arm_smmu_device *smmu, int idx)
{
	struct arm_smmu_s2cr *s2cr = smmu->s2crs + idx;
904 905 906 907 908 909 910 911 912 913
	u32 reg;

	if (smmu->impl && smmu->impl->write_s2cr) {
		smmu->impl->write_s2cr(smmu, idx);
		return;
	}

	reg = FIELD_PREP(ARM_SMMU_S2CR_TYPE, s2cr->type) |
	      FIELD_PREP(ARM_SMMU_S2CR_CBNDX, s2cr->cbndx) |
	      FIELD_PREP(ARM_SMMU_S2CR_PRIVCFG, s2cr->privcfg);
914

915 916
	if (smmu->features & ARM_SMMU_FEAT_EXIDS && smmu->smrs &&
	    smmu->smrs[idx].valid)
917
		reg |= ARM_SMMU_S2CR_EXIDVALID;
918
	arm_smmu_gr0_write(smmu, ARM_SMMU_GR0_S2CR(idx), reg);
919 920 921 922 923 924 925 926 927
}

static void arm_smmu_write_sme(struct arm_smmu_device *smmu, int idx)
{
	arm_smmu_write_s2cr(smmu, idx);
	if (smmu->smrs)
		arm_smmu_write_smr(smmu, idx);
}

928 929 930 931 932 933 934
/*
 * The width of SMR's mask field depends on sCR0_EXIDENABLE, so this function
 * should be called after sCR0 is written.
 */
static void arm_smmu_test_smr_masks(struct arm_smmu_device *smmu)
{
	u32 smr;
935
	int i;
936 937 938

	if (!smmu->smrs)
		return;
939 940 941 942 943 944 945 946 947 948 949 950 951
	/*
	 * If we've had to accommodate firmware memory regions, we may
	 * have live SMRs by now; tread carefully...
	 *
	 * Somewhat perversely, not having a free SMR for this test implies we
	 * can get away without it anyway, as we'll only be able to 'allocate'
	 * these SMRs for the ID/mask values we're already trusting to be OK.
	 */
	for (i = 0; i < smmu->num_mapping_groups; i++)
		if (!smmu->smrs[i].valid)
			goto smr_ok;
	return;
smr_ok:
952 953 954 955 956
	/*
	 * SMR.ID bits may not be preserved if the corresponding MASK
	 * bits are set, so check each one separately. We can reject
	 * masters later if they try to claim IDs outside these masks.
	 */
957
	smr = FIELD_PREP(ARM_SMMU_SMR_ID, smmu->streamid_mask);
958 959
	arm_smmu_gr0_write(smmu, ARM_SMMU_GR0_SMR(i), smr);
	smr = arm_smmu_gr0_read(smmu, ARM_SMMU_GR0_SMR(i));
960
	smmu->streamid_mask = FIELD_GET(ARM_SMMU_SMR_ID, smr);
961

962
	smr = FIELD_PREP(ARM_SMMU_SMR_MASK, smmu->streamid_mask);
963 964
	arm_smmu_gr0_write(smmu, ARM_SMMU_GR0_SMR(i), smr);
	smr = arm_smmu_gr0_read(smmu, ARM_SMMU_GR0_SMR(i));
965
	smmu->smr_mask_mask = FIELD_GET(ARM_SMMU_SMR_MASK, smr);
966 967
}

968
static int arm_smmu_find_sme(struct arm_smmu_device *smmu, u16 id, u16 mask)
969 970
{
	struct arm_smmu_smr *smrs = smmu->smrs;
971
	int i, free_idx = -ENOSPC;
972

973 974 975 976 977 978 979 980 981 982 983 984 985
	/* Stream indexing is blissfully easy */
	if (!smrs)
		return id;

	/* Validating SMRs is... less so */
	for (i = 0; i < smmu->num_mapping_groups; ++i) {
		if (!smrs[i].valid) {
			/*
			 * Note the first free entry we come across, which
			 * we'll claim in the end if nothing else matches.
			 */
			if (free_idx < 0)
				free_idx = i;
986 987
			continue;
		}
988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005
		/*
		 * If the new entry is _entirely_ matched by an existing entry,
		 * then reuse that, with the guarantee that there also cannot
		 * be any subsequent conflicting entries. In normal use we'd
		 * expect simply identical entries for this case, but there's
		 * no harm in accommodating the generalisation.
		 */
		if ((mask & smrs[i].mask) == mask &&
		    !((id ^ smrs[i].id) & ~smrs[i].mask))
			return i;
		/*
		 * If the new entry has any other overlap with an existing one,
		 * though, then there always exists at least one stream ID
		 * which would cause a conflict, and we can't allow that risk.
		 */
		if (!((id ^ smrs[i].id) & ~(smrs[i].mask | mask)))
			return -EINVAL;
	}
1006

1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023
	return free_idx;
}

static bool arm_smmu_free_sme(struct arm_smmu_device *smmu, int idx)
{
	if (--smmu->s2crs[idx].count)
		return false;

	smmu->s2crs[idx] = s2cr_init_val;
	if (smmu->smrs)
		smmu->smrs[idx].valid = false;

	return true;
}

static int arm_smmu_master_alloc_smes(struct device *dev)
{
1024
	struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
1025
	struct arm_smmu_master_cfg *cfg = dev_iommu_priv_get(dev);
1026 1027 1028 1029 1030 1031
	struct arm_smmu_device *smmu = cfg->smmu;
	struct arm_smmu_smr *smrs = smmu->smrs;
	int i, idx, ret;

	mutex_lock(&smmu->stream_map_mutex);
	/* Figure out a viable stream map entry allocation */
1032
	for_each_cfg_sme(cfg, fwspec, i, idx) {
1033 1034
		u16 sid = FIELD_GET(ARM_SMMU_SMR_ID, fwspec->ids[i]);
		u16 mask = FIELD_GET(ARM_SMMU_SMR_MASK, fwspec->ids[i]);
1035

1036 1037 1038
		if (idx != INVALID_SMENDX) {
			ret = -EEXIST;
			goto out_err;
1039 1040
		}

1041
		ret = arm_smmu_find_sme(smmu, sid, mask);
1042 1043 1044 1045 1046
		if (ret < 0)
			goto out_err;

		idx = ret;
		if (smrs && smmu->s2crs[idx].count == 0) {
1047 1048
			smrs[idx].id = sid;
			smrs[idx].mask = mask;
1049 1050 1051 1052
			smrs[idx].valid = true;
		}
		smmu->s2crs[idx].count++;
		cfg->smendx[i] = (s16)idx;
1053 1054 1055
	}

	/* It worked! Now, poke the actual hardware */
1056
	for_each_cfg_sme(cfg, fwspec, i, idx)
1057
		arm_smmu_write_sme(smmu, idx);
1058

1059
	mutex_unlock(&smmu->stream_map_mutex);
1060 1061
	return 0;

1062
out_err:
1063
	while (i--) {
1064
		arm_smmu_free_sme(smmu, cfg->smendx[i]);
1065 1066
		cfg->smendx[i] = INVALID_SMENDX;
	}
1067 1068
	mutex_unlock(&smmu->stream_map_mutex);
	return ret;
1069 1070
}

1071 1072
static void arm_smmu_master_free_smes(struct arm_smmu_master_cfg *cfg,
				      struct iommu_fwspec *fwspec)
1073
{
1074
	struct arm_smmu_device *smmu = cfg->smmu;
1075
	int i, idx;
1076

1077
	mutex_lock(&smmu->stream_map_mutex);
1078
	for_each_cfg_sme(cfg, fwspec, i, idx) {
1079 1080
		if (arm_smmu_free_sme(smmu, idx))
			arm_smmu_write_sme(smmu, idx);
1081
		cfg->smendx[i] = INVALID_SMENDX;
1082
	}
1083
	mutex_unlock(&smmu->stream_map_mutex);
1084 1085 1086
}

static int arm_smmu_domain_add_master(struct arm_smmu_domain *smmu_domain,
1087
				      struct arm_smmu_master_cfg *cfg,
1088
				      struct iommu_fwspec *fwspec)
1089
{
1090
	struct arm_smmu_device *smmu = smmu_domain->smmu;
1091 1092
	struct arm_smmu_s2cr *s2cr = smmu->s2crs;
	u8 cbndx = smmu_domain->cfg.cbndx;
1093
	enum arm_smmu_s2cr_type type;
1094
	int i, idx;
1095

1096 1097 1098 1099 1100
	if (smmu_domain->stage == ARM_SMMU_DOMAIN_BYPASS)
		type = S2CR_TYPE_BYPASS;
	else
		type = S2CR_TYPE_TRANS;

1101
	for_each_cfg_sme(cfg, fwspec, i, idx) {
1102
		if (type == s2cr[idx].type && cbndx == s2cr[idx].cbndx)
1103
			continue;
1104

1105
		s2cr[idx].type = type;
1106
		s2cr[idx].privcfg = S2CR_PRIVCFG_DEFAULT;
1107 1108
		s2cr[idx].cbndx = cbndx;
		arm_smmu_write_s2cr(smmu, idx);
1109
	}
1110
	return 0;
1111 1112
}

1113 1114
static int arm_smmu_attach_dev(struct iommu_domain *domain, struct device *dev)
{
1115
	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1116
	struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
1117 1118
	struct arm_smmu_master_cfg *cfg;
	struct arm_smmu_device *smmu;
1119
	int ret;
1120

1121
	if (!fwspec || fwspec->ops != &arm_smmu_ops) {
1122 1123 1124 1125
		dev_err(dev, "cannot attach to SMMU, is it on the same bus?\n");
		return -ENXIO;
	}

1126 1127
	/*
	 * FIXME: The arch/arm DMA API code tries to attach devices to its own
1128
	 * domains between of_xlate() and probe_device() - we have no way to cope
1129 1130 1131 1132
	 * with that, so until ARM gets converted to rely on groups and default
	 * domains, just say no (but more politely than by dereferencing NULL).
	 * This should be at least a WARN_ON once that's sorted.
	 */
1133
	cfg = dev_iommu_priv_get(dev);
1134
	if (!cfg)
1135 1136
		return -ENODEV;

1137
	smmu = cfg->smmu;
1138 1139 1140 1141 1142

	ret = arm_smmu_rpm_get(smmu);
	if (ret < 0)
		return ret;

1143
	/* Ensure that the domain is finalised */
1144
	ret = arm_smmu_init_domain_context(domain, smmu, dev);
1145
	if (ret < 0)
1146
		goto rpm_put;
1147

1148
	/*
1149 1150
	 * Sanity check the domain. We don't support domains across
	 * different SMMUs.
1151
	 */
1152
	if (smmu_domain->smmu != smmu) {
1153 1154
		dev_err(dev,
			"cannot attach to SMMU %s whilst already attached to domain on SMMU %s\n",
1155
			dev_name(smmu_domain->smmu->dev), dev_name(smmu->dev));
1156 1157
		ret = -EINVAL;
		goto rpm_put;
1158 1159 1160
	}

	/* Looks ok, so add the device to the domain */
1161
	ret = arm_smmu_domain_add_master(smmu_domain, cfg, fwspec);
1162

1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176
	/*
	 * Setup an autosuspend delay to avoid bouncing runpm state.
	 * Otherwise, if a driver for a suspended consumer device
	 * unmaps buffers, it will runpm resume/suspend for each one.
	 *
	 * For example, when used by a GPU device, when an application
	 * or game exits, it can trigger unmapping 100s or 1000s of
	 * buffers.  With a runpm cycle for each buffer, that adds up
	 * to 5-10sec worth of reprogramming the context bank, while
	 * the system appears to be locked up to the user.
	 */
	pm_runtime_set_autosuspend_delay(smmu->dev, 20);
	pm_runtime_use_autosuspend(smmu->dev);

1177 1178 1179
rpm_put:
	arm_smmu_rpm_put(smmu);
	return ret;
1180 1181
}

1182 1183 1184
static int arm_smmu_map_pages(struct iommu_domain *domain, unsigned long iova,
			      phys_addr_t paddr, size_t pgsize, size_t pgcount,
			      int prot, gfp_t gfp, size_t *mapped)
1185
{
1186
	struct io_pgtable_ops *ops = to_smmu_domain(domain)->pgtbl_ops;
1187 1188
	struct arm_smmu_device *smmu = to_smmu_domain(domain)->smmu;
	int ret;
1189

1190
	if (!ops)
1191 1192
		return -ENODEV;

1193
	arm_smmu_rpm_get(smmu);
1194
	ret = ops->map_pages(ops, iova, paddr, pgsize, pgcount, prot, gfp, mapped);
1195 1196 1197
	arm_smmu_rpm_put(smmu);

	return ret;
1198 1199
}

1200 1201 1202
static size_t arm_smmu_unmap_pages(struct iommu_domain *domain, unsigned long iova,
				   size_t pgsize, size_t pgcount,
				   struct iommu_iotlb_gather *iotlb_gather)
1203
{
1204
	struct io_pgtable_ops *ops = to_smmu_domain(domain)->pgtbl_ops;
1205 1206
	struct arm_smmu_device *smmu = to_smmu_domain(domain)->smmu;
	size_t ret;
1207

1208 1209 1210
	if (!ops)
		return 0;

1211
	arm_smmu_rpm_get(smmu);
1212
	ret = ops->unmap_pages(ops, iova, pgsize, pgcount, iotlb_gather);
1213 1214 1215
	arm_smmu_rpm_put(smmu);

	return ret;
1216 1217
}

1218 1219 1220
static void arm_smmu_flush_iotlb_all(struct iommu_domain *domain)
{
	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1221
	struct arm_smmu_device *smmu = smmu_domain->smmu;
1222

1223
	if (smmu_domain->flush_ops) {
1224
		arm_smmu_rpm_get(smmu);
1225
		smmu_domain->flush_ops->tlb_flush_all(smmu_domain);
1226 1227
		arm_smmu_rpm_put(smmu);
	}
1228 1229
}

1230 1231
static void arm_smmu_iotlb_sync(struct iommu_domain *domain,
				struct iommu_iotlb_gather *gather)
1232 1233
{
	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1234
	struct arm_smmu_device *smmu = smmu_domain->smmu;
1235

1236 1237 1238 1239 1240 1241 1242 1243 1244 1245
	if (!smmu)
		return;

	arm_smmu_rpm_get(smmu);
	if (smmu->version == ARM_SMMU_V2 ||
	    smmu_domain->stage == ARM_SMMU_DOMAIN_S1)
		arm_smmu_tlb_sync_context(smmu_domain);
	else
		arm_smmu_tlb_sync_global(smmu);
	arm_smmu_rpm_put(smmu);
1246 1247
}

1248 1249 1250
static phys_addr_t arm_smmu_iova_to_phys_hard(struct iommu_domain *domain,
					      dma_addr_t iova)
{
1251
	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1252 1253 1254 1255
	struct arm_smmu_device *smmu = smmu_domain->smmu;
	struct arm_smmu_cfg *cfg = &smmu_domain->cfg;
	struct io_pgtable_ops *ops= smmu_domain->pgtbl_ops;
	struct device *dev = smmu->dev;
1256
	void __iomem *reg;
1257 1258
	u32 tmp;
	u64 phys;
1259
	unsigned long va, flags;
1260
	int ret, idx = cfg->cbndx;
1261
	phys_addr_t addr = 0;
1262 1263 1264 1265

	ret = arm_smmu_rpm_get(smmu);
	if (ret < 0)
		return 0;
1266

1267
	spin_lock_irqsave(&smmu_domain->cb_lock, flags);
1268
	va = iova & ~0xfffUL;
1269
	if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH64)
1270
		arm_smmu_cb_writeq(smmu, idx, ARM_SMMU_CB_ATS1PR, va);
1271
	else
1272
		arm_smmu_cb_write(smmu, idx, ARM_SMMU_CB_ATS1PR, va);
1273

1274
	reg = arm_smmu_page(smmu, ARM_SMMU_CB(smmu, idx)) + ARM_SMMU_CB_ATSR;
1275 1276
	if (readl_poll_timeout_atomic(reg, tmp, !(tmp & ARM_SMMU_ATSR_ACTIVE),
				      5, 50)) {
1277
		spin_unlock_irqrestore(&smmu_domain->cb_lock, flags);
1278
		dev_err(dev,
1279
			"iova to phys timed out on %pad. Falling back to software table walk.\n",
1280
			&iova);
1281
		arm_smmu_rpm_put(smmu);
1282 1283 1284
		return ops->iova_to_phys(ops, iova);
	}

1285
	phys = arm_smmu_cb_readq(smmu, idx, ARM_SMMU_CB_PAR);
1286
	spin_unlock_irqrestore(&smmu_domain->cb_lock, flags);
1287
	if (phys & ARM_SMMU_CB_PAR_F) {
1288 1289
		dev_err(dev, "translation fault!\n");
		dev_err(dev, "PAR = 0x%llx\n", phys);
1290
		goto out;
1291 1292
	}

1293 1294
	addr = (phys & GENMASK_ULL(39, 12)) | (iova & 0xfff);
out:
1295 1296
	arm_smmu_rpm_put(smmu);

1297
	return addr;
1298 1299
}

1300
static phys_addr_t arm_smmu_iova_to_phys(struct iommu_domain *domain,
1301
					dma_addr_t iova)
1302
{
1303
	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1304
	struct io_pgtable_ops *ops = smmu_domain->pgtbl_ops;
1305

1306
	if (!ops)
1307
		return 0;
1308

1309
	if (smmu_domain->smmu->features & ARM_SMMU_FEAT_TRANS_OPS &&
1310 1311
			smmu_domain->stage == ARM_SMMU_DOMAIN_S1)
		return arm_smmu_iova_to_phys_hard(domain, iova);
1312

1313
	return ops->iova_to_phys(ops, iova);
1314 1315
}

R
Robin Murphy 已提交
1316
static bool arm_smmu_capable(struct device *dev, enum iommu_cap cap)
1317
{
1318 1319
	struct arm_smmu_master_cfg *cfg = dev_iommu_priv_get(dev);

1320 1321
	switch (cap) {
	case IOMMU_CAP_CACHE_COHERENCY:
1322 1323
		/* Assume that a coherent TCU implies coherent TBUs */
		return cfg->smmu->features & ARM_SMMU_FEAT_COHERENT_WALK;
1324 1325
	case IOMMU_CAP_NOEXEC:
		return true;
1326
	default:
1327
		return false;
1328
	}
1329 1330
}

1331 1332
static
struct arm_smmu_device *arm_smmu_get_by_fwnode(struct fwnode_handle *fwnode)
1333
{
1334 1335
	struct device *dev = driver_find_device_by_fwnode(&arm_smmu_driver.driver,
							  fwnode);
1336 1337 1338 1339
	put_device(dev);
	return dev ? dev_get_drvdata(dev) : NULL;
}

1340
static struct iommu_device *arm_smmu_probe_device(struct device *dev)
1341
{
1342
	struct arm_smmu_device *smmu = NULL;
1343
	struct arm_smmu_master_cfg *cfg;
1344
	struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
1345
	int i, ret;
1346

1347 1348
	if (using_legacy_binding) {
		ret = arm_smmu_register_legacy_master(dev, &smmu);
1349 1350 1351 1352 1353 1354

		/*
		 * If dev->iommu_fwspec is initally NULL, arm_smmu_register_legacy_master()
		 * will allocate/initialise a new one. Thus we need to update fwspec for
		 * later use.
		 */
1355
		fwspec = dev_iommu_fwspec_get(dev);
1356 1357
		if (ret)
			goto out_free;
1358
	} else if (fwspec && fwspec->ops == &arm_smmu_ops) {
1359
		smmu = arm_smmu_get_by_fwnode(fwspec->iommu_fwnode);
1360
	} else {
1361
		return ERR_PTR(-ENODEV);
1362
	}
1363

1364
	ret = -EINVAL;
1365
	for (i = 0; i < fwspec->num_ids; i++) {
1366 1367
		u16 sid = FIELD_GET(ARM_SMMU_SMR_ID, fwspec->ids[i]);
		u16 mask = FIELD_GET(ARM_SMMU_SMR_MASK, fwspec->ids[i]);
1368

1369
		if (sid & ~smmu->streamid_mask) {
1370
			dev_err(dev, "stream ID 0x%x out of range for SMMU (0x%x)\n",
1371 1372 1373 1374 1375
				sid, smmu->streamid_mask);
			goto out_free;
		}
		if (mask & ~smmu->smr_mask_mask) {
			dev_err(dev, "SMR mask 0x%x out of range for SMMU (0x%x)\n",
P
Peng Fan 已提交
1376
				mask, smmu->smr_mask_mask);
1377 1378
			goto out_free;
		}
1379
	}
1380

1381 1382 1383 1384 1385 1386 1387
	ret = -ENOMEM;
	cfg = kzalloc(offsetof(struct arm_smmu_master_cfg, smendx[i]),
		      GFP_KERNEL);
	if (!cfg)
		goto out_free;

	cfg->smmu = smmu;
1388
	dev_iommu_priv_set(dev, cfg);
1389 1390 1391
	while (i--)
		cfg->smendx[i] = INVALID_SMENDX;

1392 1393 1394 1395
	ret = arm_smmu_rpm_get(smmu);
	if (ret < 0)
		goto out_cfg_free;

1396
	ret = arm_smmu_master_alloc_smes(dev);
1397 1398
	arm_smmu_rpm_put(smmu);

1399
	if (ret)
1400
		goto out_cfg_free;
1401

1402 1403 1404
	device_link_add(dev, smmu->dev,
			DL_FLAG_PM_RUNTIME | DL_FLAG_AUTOREMOVE_SUPPLIER);

1405
	return &smmu->iommu;
1406

1407 1408
out_cfg_free:
	kfree(cfg);
1409
out_free:
1410
	iommu_fwspec_free(dev);
1411
	return ERR_PTR(ret);
1412 1413
}

1414
static void arm_smmu_release_device(struct device *dev)
1415
{
1416
	struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
1417
	struct arm_smmu_master_cfg *cfg = dev_iommu_priv_get(dev);
1418
	int ret;
1419

1420
	ret = arm_smmu_rpm_get(cfg->smmu);
1421 1422 1423
	if (ret < 0)
		return;

1424
	arm_smmu_master_free_smes(cfg, fwspec);
1425

1426
	arm_smmu_rpm_put(cfg->smmu);
1427

1428 1429
	dev_iommu_priv_set(dev, NULL);
	kfree(cfg);
1430 1431
}

1432 1433 1434 1435 1436 1437 1438 1439
static void arm_smmu_probe_finalize(struct device *dev)
{
	struct arm_smmu_master_cfg *cfg;
	struct arm_smmu_device *smmu;

	cfg = dev_iommu_priv_get(dev);
	smmu = cfg->smmu;

1440
	if (smmu->impl && smmu->impl->probe_finalize)
1441 1442 1443
		smmu->impl->probe_finalize(smmu, dev);
}

1444 1445
static struct iommu_group *arm_smmu_device_group(struct device *dev)
{
1446
	struct arm_smmu_master_cfg *cfg = dev_iommu_priv_get(dev);
1447
	struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
1448
	struct arm_smmu_device *smmu = cfg->smmu;
1449 1450 1451
	struct iommu_group *group = NULL;
	int i, idx;

1452
	mutex_lock(&smmu->stream_map_mutex);
1453
	for_each_cfg_sme(cfg, fwspec, i, idx) {
1454
		if (group && smmu->s2crs[idx].group &&
1455 1456
		    group != smmu->s2crs[idx].group) {
			mutex_unlock(&smmu->stream_map_mutex);
1457
			return ERR_PTR(-EINVAL);
1458
		}
1459 1460 1461 1462

		group = smmu->s2crs[idx].group;
	}

1463 1464
	if (group) {
		mutex_unlock(&smmu->stream_map_mutex);
1465
		return iommu_group_ref_get(group);
1466
	}
1467 1468 1469

	if (dev_is_pci(dev))
		group = pci_device_group(dev);
1470 1471
	else if (dev_is_fsl_mc(dev))
		group = fsl_mc_device_group(dev);
1472 1473 1474
	else
		group = generic_device_group(dev);

1475 1476 1477 1478 1479
	/* Remember group for faster lookups */
	if (!IS_ERR(group))
		for_each_cfg_sme(cfg, fwspec, i, idx)
			smmu->s2crs[idx].group = group;

1480
	mutex_unlock(&smmu->stream_map_mutex);
1481 1482 1483
	return group;
}

1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498
static int arm_smmu_enable_nesting(struct iommu_domain *domain)
{
	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
	int ret = 0;

	mutex_lock(&smmu_domain->init_mutex);
	if (smmu_domain->smmu)
		ret = -EPERM;
	else
		smmu_domain->stage = ARM_SMMU_DOMAIN_NESTED;
	mutex_unlock(&smmu_domain->init_mutex);

	return ret;
}

1499 1500
static int arm_smmu_set_pgtable_quirks(struct iommu_domain *domain,
		unsigned long quirks)
1501
{
1502
	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1503
	int ret = 0;
1504

1505
	mutex_lock(&smmu_domain->init_mutex);
1506 1507 1508 1509
	if (smmu_domain->smmu)
		ret = -EPERM;
	else
		smmu_domain->pgtbl_quirks = quirks;
1510
	mutex_unlock(&smmu_domain->init_mutex);
1511

1512
	return ret;
1513 1514
}

1515 1516
static int arm_smmu_of_xlate(struct device *dev, struct of_phandle_args *args)
{
1517
	u32 mask, fwid = 0;
1518 1519

	if (args->args_count > 0)
1520
		fwid |= FIELD_PREP(ARM_SMMU_SMR_ID, args->args[0]);
1521 1522

	if (args->args_count > 1)
1523
		fwid |= FIELD_PREP(ARM_SMMU_SMR_MASK, args->args[1]);
1524
	else if (!of_property_read_u32(args->np, "stream-match-mask", &mask))
1525
		fwid |= FIELD_PREP(ARM_SMMU_SMR_MASK, mask);
1526 1527 1528 1529

	return iommu_fwspec_add_ids(dev, &fwid, 1);
}

1530 1531 1532 1533 1534 1535 1536
static void arm_smmu_get_resv_regions(struct device *dev,
				      struct list_head *head)
{
	struct iommu_resv_region *region;
	int prot = IOMMU_WRITE | IOMMU_NOEXEC | IOMMU_MMIO;

	region = iommu_alloc_resv_region(MSI_IOVA_BASE, MSI_IOVA_LENGTH,
1537
					 prot, IOMMU_RESV_SW_MSI, GFP_KERNEL);
1538 1539 1540 1541
	if (!region)
		return;

	list_add_tail(&region->list, head);
1542 1543

	iommu_dma_get_resv_regions(dev, head);
1544 1545
}

1546 1547 1548 1549 1550
static int arm_smmu_def_domain_type(struct device *dev)
{
	struct arm_smmu_master_cfg *cfg = dev_iommu_priv_get(dev);
	const struct arm_smmu_impl *impl = cfg->smmu->impl;

1551 1552 1553
	if (using_legacy_binding)
		return IOMMU_DOMAIN_IDENTITY;

1554 1555 1556 1557 1558 1559
	if (impl && impl->def_domain_type)
		return impl->def_domain_type(dev);

	return 0;
}

1560
static struct iommu_ops arm_smmu_ops = {
1561
	.capable		= arm_smmu_capable,
1562
	.domain_alloc		= arm_smmu_domain_alloc,
1563 1564
	.probe_device		= arm_smmu_probe_device,
	.release_device		= arm_smmu_release_device,
1565
	.probe_finalize		= arm_smmu_probe_finalize,
1566
	.device_group		= arm_smmu_device_group,
1567
	.of_xlate		= arm_smmu_of_xlate,
1568
	.get_resv_regions	= arm_smmu_get_resv_regions,
1569
	.def_domain_type	= arm_smmu_def_domain_type,
1570
	.pgsize_bitmap		= -1UL, /* Restricted during device attach */
1571
	.owner			= THIS_MODULE,
L
Lu Baolu 已提交
1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582
	.default_domain_ops = &(const struct iommu_domain_ops) {
		.attach_dev		= arm_smmu_attach_dev,
		.map_pages		= arm_smmu_map_pages,
		.unmap_pages		= arm_smmu_unmap_pages,
		.flush_iotlb_all	= arm_smmu_flush_iotlb_all,
		.iotlb_sync		= arm_smmu_iotlb_sync,
		.iova_to_phys		= arm_smmu_iova_to_phys,
		.enable_nesting		= arm_smmu_enable_nesting,
		.set_pgtable_quirks	= arm_smmu_set_pgtable_quirks,
		.free			= arm_smmu_domain_free,
	}
1583 1584 1585 1586
};

static void arm_smmu_device_reset(struct arm_smmu_device *smmu)
{
1587
	int i;
1588
	u32 reg;
1589

1590
	/* clear global FSR */
1591 1592
	reg = arm_smmu_gr0_read(smmu, ARM_SMMU_GR0_sGFSR);
	arm_smmu_gr0_write(smmu, ARM_SMMU_GR0_sGFSR, reg);
1593

1594 1595 1596 1597
	/*
	 * Reset stream mapping groups: Initial values mark all SMRn as
	 * invalid and all S2CRn as bypass unless overridden.
	 */
1598 1599
	for (i = 0; i < smmu->num_mapping_groups; ++i)
		arm_smmu_write_sme(smmu, i);
1600

1601 1602
	/* Make sure all context banks are disabled and clear CB_FSR  */
	for (i = 0; i < smmu->num_context_banks; ++i) {
1603
		arm_smmu_write_context_bank(smmu, i);
1604
		arm_smmu_cb_write(smmu, i, ARM_SMMU_CB_FSR, ARM_SMMU_FSR_FAULT);
1605
	}
1606

1607
	/* Invalidate the TLB, just in case */
1608 1609
	arm_smmu_gr0_write(smmu, ARM_SMMU_GR0_TLBIALLH, QCOM_DUMMY_VAL);
	arm_smmu_gr0_write(smmu, ARM_SMMU_GR0_TLBIALLNSNH, QCOM_DUMMY_VAL);
1610

1611
	reg = arm_smmu_gr0_read(smmu, ARM_SMMU_GR0_sCR0);
1612

1613
	/* Enable fault reporting */
1614 1615
	reg |= (ARM_SMMU_sCR0_GFRE | ARM_SMMU_sCR0_GFIE |
		ARM_SMMU_sCR0_GCFGFRE | ARM_SMMU_sCR0_GCFGFIE);
1616 1617

	/* Disable TLB broadcasting. */
1618
	reg |= (ARM_SMMU_sCR0_VMIDPNE | ARM_SMMU_sCR0_PTM);
1619

1620
	/* Enable client access, handling unmatched streams as appropriate */
1621
	reg &= ~ARM_SMMU_sCR0_CLIENTPD;
1622
	if (disable_bypass)
1623
		reg |= ARM_SMMU_sCR0_USFCFG;
1624
	else
1625
		reg &= ~ARM_SMMU_sCR0_USFCFG;
1626 1627

	/* Disable forced broadcasting */
1628
	reg &= ~ARM_SMMU_sCR0_FB;
1629 1630

	/* Don't upgrade barriers */
1631
	reg &= ~(ARM_SMMU_sCR0_BSU);
1632

1633
	if (smmu->features & ARM_SMMU_FEAT_VMID16)
1634
		reg |= ARM_SMMU_sCR0_VMID16EN;
1635

1636
	if (smmu->features & ARM_SMMU_FEAT_EXIDS)
1637
		reg |= ARM_SMMU_sCR0_EXIDENABLE;
1638

1639 1640 1641
	if (smmu->impl && smmu->impl->reset)
		smmu->impl->reset(smmu);

1642
	/* Push the button */
1643
	arm_smmu_tlb_sync_global(smmu);
1644
	arm_smmu_gr0_write(smmu, ARM_SMMU_GR0_sCR0, reg);
1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667
}

static int arm_smmu_id_size_to_bits(int size)
{
	switch (size) {
	case 0:
		return 32;
	case 1:
		return 36;
	case 2:
		return 40;
	case 3:
		return 42;
	case 4:
		return 44;
	case 5:
	default:
		return 48;
	}
}

static int arm_smmu_device_cfg_probe(struct arm_smmu_device *smmu)
{
1668
	unsigned int size;
1669
	u32 id;
1670
	bool cttw_reg, cttw_fw = smmu->features & ARM_SMMU_FEAT_COHERENT_WALK;
1671
	int i, ret;
1672 1673

	dev_notice(smmu->dev, "probing hardware configuration...\n");
1674 1675
	dev_notice(smmu->dev, "SMMUv%d with:\n",
			smmu->version == ARM_SMMU_V2 ? 2 : 1);
1676 1677

	/* ID0 */
1678
	id = arm_smmu_gr0_read(smmu, ARM_SMMU_GR0_ID0);
1679 1680 1681

	/* Restrict available stages based on module parameter */
	if (force_stage == 1)
1682
		id &= ~(ARM_SMMU_ID0_S2TS | ARM_SMMU_ID0_NTS);
1683
	else if (force_stage == 2)
1684
		id &= ~(ARM_SMMU_ID0_S1TS | ARM_SMMU_ID0_NTS);
1685

1686
	if (id & ARM_SMMU_ID0_S1TS) {
1687 1688 1689 1690
		smmu->features |= ARM_SMMU_FEAT_TRANS_S1;
		dev_notice(smmu->dev, "\tstage 1 translation\n");
	}

1691
	if (id & ARM_SMMU_ID0_S2TS) {
1692 1693 1694 1695
		smmu->features |= ARM_SMMU_FEAT_TRANS_S2;
		dev_notice(smmu->dev, "\tstage 2 translation\n");
	}

1696
	if (id & ARM_SMMU_ID0_NTS) {
1697 1698 1699 1700 1701
		smmu->features |= ARM_SMMU_FEAT_TRANS_NESTED;
		dev_notice(smmu->dev, "\tnested translation\n");
	}

	if (!(smmu->features &
1702
		(ARM_SMMU_FEAT_TRANS_S1 | ARM_SMMU_FEAT_TRANS_S2))) {
1703 1704 1705 1706
		dev_err(smmu->dev, "\tno translation support!\n");
		return -ENODEV;
	}

1707 1708
	if ((id & ARM_SMMU_ID0_S1TS) &&
	    ((smmu->version < ARM_SMMU_V2) || !(id & ARM_SMMU_ID0_ATOSNS))) {
1709 1710 1711 1712
		smmu->features |= ARM_SMMU_FEAT_TRANS_OPS;
		dev_notice(smmu->dev, "\taddress translation ops\n");
	}

1713 1714
	/*
	 * In order for DMA API calls to work properly, we must defer to what
1715
	 * the FW says about coherency, regardless of what the hardware claims.
1716 1717 1718
	 * Fortunately, this also opens up a workaround for systems where the
	 * ID register value has ended up configured incorrectly.
	 */
1719
	cttw_reg = !!(id & ARM_SMMU_ID0_CTTW);
1720
	if (cttw_fw || cttw_reg)
1721
		dev_notice(smmu->dev, "\t%scoherent table walk\n",
1722 1723
			   cttw_fw ? "" : "non-");
	if (cttw_fw != cttw_reg)
1724
		dev_notice(smmu->dev,
1725
			   "\t(IDR0.CTTW overridden by FW configuration)\n");
1726

1727
	/* Max. number of entries we have for stream matching/indexing */
1728
	if (smmu->version == ARM_SMMU_V2 && id & ARM_SMMU_ID0_EXIDS) {
1729 1730 1731
		smmu->features |= ARM_SMMU_FEAT_EXIDS;
		size = 1 << 16;
	} else {
1732
		size = 1 << FIELD_GET(ARM_SMMU_ID0_NUMSIDB, id);
1733
	}
1734
	smmu->streamid_mask = size - 1;
1735
	if (id & ARM_SMMU_ID0_SMS) {
1736
		smmu->features |= ARM_SMMU_FEAT_STREAM_MATCH;
1737
		size = FIELD_GET(ARM_SMMU_ID0_NUMSMRG, id);
1738
		if (size == 0) {
1739 1740 1741 1742 1743
			dev_err(smmu->dev,
				"stream-matching supported, but no SMRs present!\n");
			return -ENODEV;
		}

1744 1745 1746 1747 1748 1749
		/* Zero-initialised to mark as invalid */
		smmu->smrs = devm_kcalloc(smmu->dev, size, sizeof(*smmu->smrs),
					  GFP_KERNEL);
		if (!smmu->smrs)
			return -ENOMEM;

1750
		dev_notice(smmu->dev,
1751
			   "\tstream matching with %u register groups", size);
1752
	}
1753 1754 1755 1756 1757 1758 1759 1760
	/* s2cr->type == 0 means translation, so initialise explicitly */
	smmu->s2crs = devm_kmalloc_array(smmu->dev, size, sizeof(*smmu->s2crs),
					 GFP_KERNEL);
	if (!smmu->s2crs)
		return -ENOMEM;
	for (i = 0; i < size; i++)
		smmu->s2crs[i] = s2cr_init_val;

1761
	smmu->num_mapping_groups = size;
1762
	mutex_init(&smmu->stream_map_mutex);
1763
	spin_lock_init(&smmu->global_sync_lock);
1764

1765 1766
	if (smmu->version < ARM_SMMU_V2 ||
	    !(id & ARM_SMMU_ID0_PTFS_NO_AARCH32)) {
1767
		smmu->features |= ARM_SMMU_FEAT_FMT_AARCH32_L;
1768
		if (!(id & ARM_SMMU_ID0_PTFS_NO_AARCH32S))
1769 1770 1771
			smmu->features |= ARM_SMMU_FEAT_FMT_AARCH32_S;
	}

1772
	/* ID1 */
1773
	id = arm_smmu_gr0_read(smmu, ARM_SMMU_GR0_ID1);
1774
	smmu->pgshift = (id & ARM_SMMU_ID1_PAGESIZE) ? 16 : 12;
1775

1776
	/* Check for size mismatch of SMMU address space from mapped region */
1777
	size = 1 << (FIELD_GET(ARM_SMMU_ID1_NUMPAGENDXB, id) + 1);
1778
	if (smmu->numpage != 2 * size << smmu->pgshift)
1779
		dev_warn(smmu->dev,
1780 1781 1782 1783
			"SMMU address space size (0x%x) differs from mapped region size (0x%x)!\n",
			2 * size << smmu->pgshift, smmu->numpage);
	/* Now properly encode NUMPAGE to subsequently derive SMMU_CB_BASE */
	smmu->numpage = size;
1784

1785 1786
	smmu->num_s2_context_banks = FIELD_GET(ARM_SMMU_ID1_NUMS2CB, id);
	smmu->num_context_banks = FIELD_GET(ARM_SMMU_ID1_NUMCB, id);
1787 1788 1789 1790 1791 1792
	if (smmu->num_s2_context_banks > smmu->num_context_banks) {
		dev_err(smmu->dev, "impossible number of S2 context banks!\n");
		return -ENODEV;
	}
	dev_notice(smmu->dev, "\t%u context banks (%u stage-2 only)\n",
		   smmu->num_context_banks, smmu->num_s2_context_banks);
1793 1794 1795 1796
	smmu->cbs = devm_kcalloc(smmu->dev, smmu->num_context_banks,
				 sizeof(*smmu->cbs), GFP_KERNEL);
	if (!smmu->cbs)
		return -ENOMEM;
1797 1798

	/* ID2 */
1799
	id = arm_smmu_gr0_read(smmu, ARM_SMMU_GR0_ID2);
1800
	size = arm_smmu_id_size_to_bits(FIELD_GET(ARM_SMMU_ID2_IAS, id));
1801
	smmu->ipa_size = size;
1802

1803
	/* The output mask is also applied for bypass */
1804
	size = arm_smmu_id_size_to_bits(FIELD_GET(ARM_SMMU_ID2_OAS, id));
1805
	smmu->pa_size = size;
1806

1807
	if (id & ARM_SMMU_ID2_VMID16)
1808 1809
		smmu->features |= ARM_SMMU_FEAT_VMID16;

1810 1811 1812 1813 1814 1815 1816 1817 1818
	/*
	 * What the page table walker can address actually depends on which
	 * descriptor format is in use, but since a) we don't know that yet,
	 * and b) it can vary per context bank, this will have to do...
	 */
	if (dma_set_mask_and_coherent(smmu->dev, DMA_BIT_MASK(size)))
		dev_warn(smmu->dev,
			 "failed to set DMA mask for table walker\n");

1819
	if (smmu->version < ARM_SMMU_V2) {
1820
		smmu->va_size = smmu->ipa_size;
1821 1822
		if (smmu->version == ARM_SMMU_V1_64K)
			smmu->features |= ARM_SMMU_FEAT_FMT_AARCH64_64K;
1823
	} else {
1824
		size = FIELD_GET(ARM_SMMU_ID2_UBS, id);
1825
		smmu->va_size = arm_smmu_id_size_to_bits(size);
1826
		if (id & ARM_SMMU_ID2_PTFS_4K)
1827
			smmu->features |= ARM_SMMU_FEAT_FMT_AARCH64_4K;
1828
		if (id & ARM_SMMU_ID2_PTFS_16K)
1829
			smmu->features |= ARM_SMMU_FEAT_FMT_AARCH64_16K;
1830
		if (id & ARM_SMMU_ID2_PTFS_64K)
1831
			smmu->features |= ARM_SMMU_FEAT_FMT_AARCH64_64K;
1832 1833
	}

1834 1835 1836 1837 1838 1839
	if (smmu->impl && smmu->impl->cfg_probe) {
		ret = smmu->impl->cfg_probe(smmu);
		if (ret)
			return ret;
	}

1840 1841
	/* Now we've corralled the various formats, what'll it do? */
	if (smmu->features & ARM_SMMU_FEAT_FMT_AARCH32_S)
1842
		smmu->pgsize_bitmap |= SZ_4K | SZ_64K | SZ_1M | SZ_16M;
1843 1844
	if (smmu->features &
	    (ARM_SMMU_FEAT_FMT_AARCH32_L | ARM_SMMU_FEAT_FMT_AARCH64_4K))
1845
		smmu->pgsize_bitmap |= SZ_4K | SZ_2M | SZ_1G;
1846
	if (smmu->features & ARM_SMMU_FEAT_FMT_AARCH64_16K)
1847
		smmu->pgsize_bitmap |= SZ_16K | SZ_32M;
1848
	if (smmu->features & ARM_SMMU_FEAT_FMT_AARCH64_64K)
1849 1850 1851 1852 1853 1854 1855 1856
		smmu->pgsize_bitmap |= SZ_64K | SZ_512M;

	if (arm_smmu_ops.pgsize_bitmap == -1UL)
		arm_smmu_ops.pgsize_bitmap = smmu->pgsize_bitmap;
	else
		arm_smmu_ops.pgsize_bitmap |= smmu->pgsize_bitmap;
	dev_notice(smmu->dev, "\tSupported page sizes: 0x%08lx\n",
		   smmu->pgsize_bitmap);
1857

1858

1859 1860
	if (smmu->features & ARM_SMMU_FEAT_TRANS_S1)
		dev_notice(smmu->dev, "\tStage-1: %lu-bit VA -> %lu-bit IPA\n",
1861
			   smmu->va_size, smmu->ipa_size);
1862 1863 1864

	if (smmu->features & ARM_SMMU_FEAT_TRANS_S2)
		dev_notice(smmu->dev, "\tStage-2: %lu-bit IPA -> %lu-bit PA\n",
1865
			   smmu->ipa_size, smmu->pa_size);
1866

1867 1868 1869
	return 0;
}

1870 1871 1872 1873 1874 1875
struct arm_smmu_match_data {
	enum arm_smmu_arch_version version;
	enum arm_smmu_implementation model;
};

#define ARM_SMMU_MATCH_DATA(name, ver, imp)	\
1876
static const struct arm_smmu_match_data name = { .version = ver, .model = imp }
1877 1878 1879

ARM_SMMU_MATCH_DATA(smmu_generic_v1, ARM_SMMU_V1, GENERIC_SMMU);
ARM_SMMU_MATCH_DATA(smmu_generic_v2, ARM_SMMU_V2, GENERIC_SMMU);
1880
ARM_SMMU_MATCH_DATA(arm_mmu401, ARM_SMMU_V1_64K, GENERIC_SMMU);
1881
ARM_SMMU_MATCH_DATA(arm_mmu500, ARM_SMMU_V2, ARM_MMU500);
1882
ARM_SMMU_MATCH_DATA(cavium_smmuv2, ARM_SMMU_V2, CAVIUM_SMMUV2);
1883
ARM_SMMU_MATCH_DATA(qcom_smmuv2, ARM_SMMU_V2, QCOM_SMMUV2);
1884

1885
static const struct of_device_id arm_smmu_of_match[] = {
1886 1887 1888
	{ .compatible = "arm,smmu-v1", .data = &smmu_generic_v1 },
	{ .compatible = "arm,smmu-v2", .data = &smmu_generic_v2 },
	{ .compatible = "arm,mmu-400", .data = &smmu_generic_v1 },
1889
	{ .compatible = "arm,mmu-401", .data = &arm_mmu401 },
1890
	{ .compatible = "arm,mmu-500", .data = &arm_mmu500 },
1891
	{ .compatible = "cavium,smmu-v2", .data = &cavium_smmuv2 },
1892
	{ .compatible = "nvidia,smmu-500", .data = &arm_mmu500 },
1893
	{ .compatible = "qcom,smmu-v2", .data = &qcom_smmuv2 },
1894 1895
	{ },
};
1896
MODULE_DEVICE_TABLE(of, arm_smmu_of_match);
1897

1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908
#ifdef CONFIG_ACPI
static int acpi_smmu_get_data(u32 model, struct arm_smmu_device *smmu)
{
	int ret = 0;

	switch (model) {
	case ACPI_IORT_SMMU_V1:
	case ACPI_IORT_SMMU_CORELINK_MMU400:
		smmu->version = ARM_SMMU_V1;
		smmu->model = GENERIC_SMMU;
		break;
1909 1910 1911 1912
	case ACPI_IORT_SMMU_CORELINK_MMU401:
		smmu->version = ARM_SMMU_V1_64K;
		smmu->model = GENERIC_SMMU;
		break;
1913 1914 1915 1916 1917 1918 1919 1920
	case ACPI_IORT_SMMU_V2:
		smmu->version = ARM_SMMU_V2;
		smmu->model = GENERIC_SMMU;
		break;
	case ACPI_IORT_SMMU_CORELINK_MMU500:
		smmu->version = ARM_SMMU_V2;
		smmu->model = ARM_MMU500;
		break;
1921 1922 1923 1924
	case ACPI_IORT_SMMU_CAVIUM_THUNDERX:
		smmu->version = ARM_SMMU_V2;
		smmu->model = CAVIUM_SMMUV2;
		break;
1925 1926 1927 1928 1929 1930 1931
	default:
		ret = -ENODEV;
	}

	return ret;
}

1932 1933
static int arm_smmu_device_acpi_probe(struct arm_smmu_device *smmu,
				      u32 *global_irqs, u32 *pmu_irqs)
1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948
{
	struct device *dev = smmu->dev;
	struct acpi_iort_node *node =
		*(struct acpi_iort_node **)dev_get_platdata(dev);
	struct acpi_iort_smmu *iort_smmu;
	int ret;

	/* Retrieve SMMU1/2 specific data */
	iort_smmu = (struct acpi_iort_smmu *)node->node_data;

	ret = acpi_smmu_get_data(iort_smmu->model, smmu);
	if (ret < 0)
		return ret;

	/* Ignore the configuration access interrupt */
1949 1950
	*global_irqs = 1;
	*pmu_irqs = 0;
1951 1952 1953 1954 1955 1956 1957

	if (iort_smmu->flags & ACPI_IORT_SMMU_COHERENT_WALK)
		smmu->features |= ARM_SMMU_FEAT_COHERENT_WALK;

	return 0;
}
#else
1958 1959
static inline int arm_smmu_device_acpi_probe(struct arm_smmu_device *smmu,
					     u32 *global_irqs, u32 *pmu_irqs)
1960 1961 1962 1963 1964
{
	return -ENODEV;
}
#endif

1965 1966
static int arm_smmu_device_dt_probe(struct arm_smmu_device *smmu,
				    u32 *global_irqs, u32 *pmu_irqs)
1967
{
1968
	const struct arm_smmu_match_data *data;
1969
	struct device *dev = smmu->dev;
1970 1971
	bool legacy_binding;

1972 1973 1974 1975
	if (of_property_read_u32(dev->of_node, "#global-interrupts", global_irqs))
		return dev_err_probe(dev, -ENODEV,
				     "missing #global-interrupts property\n");
	*pmu_irqs = 0;
1976 1977 1978 1979 1980

	data = of_device_get_match_data(dev);
	smmu->version = data->version;
	smmu->model = data->model;

1981 1982
	legacy_binding = of_find_property(dev->of_node, "mmu-masters", NULL);
	if (legacy_binding && !using_generic_binding) {
1983 1984 1985 1986
		if (!using_legacy_binding) {
			pr_notice("deprecated \"mmu-masters\" DT property in use; %s support unavailable\n",
				  IS_ENABLED(CONFIG_ARM_SMMU_LEGACY_DT_BINDINGS) ? "DMA API" : "SMMU");
		}
1987 1988 1989 1990 1991 1992 1993
		using_legacy_binding = true;
	} else if (!legacy_binding && !using_legacy_binding) {
		using_generic_binding = true;
	} else {
		dev_err(dev, "not probing due to mismatched DT properties\n");
		return -ENODEV;
	}
1994

1995 1996 1997 1998 1999 2000
	if (of_dma_is_coherent(dev->of_node))
		smmu->features |= ARM_SMMU_FEAT_COHERENT_WALK;

	return 0;
}

2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048
static void arm_smmu_rmr_install_bypass_smr(struct arm_smmu_device *smmu)
{
	struct list_head rmr_list;
	struct iommu_resv_region *e;
	int idx, cnt = 0;
	u32 reg;

	INIT_LIST_HEAD(&rmr_list);
	iort_get_rmr_sids(dev_fwnode(smmu->dev), &rmr_list);

	/*
	 * Rather than trying to look at existing mappings that
	 * are setup by the firmware and then invalidate the ones
	 * that do no have matching RMR entries, just disable the
	 * SMMU until it gets enabled again in the reset routine.
	 */
	reg = arm_smmu_gr0_read(smmu, ARM_SMMU_GR0_sCR0);
	reg |= ARM_SMMU_sCR0_CLIENTPD;
	arm_smmu_gr0_write(smmu, ARM_SMMU_GR0_sCR0, reg);

	list_for_each_entry(e, &rmr_list, list) {
		struct iommu_iort_rmr_data *rmr;
		int i;

		rmr = container_of(e, struct iommu_iort_rmr_data, rr);
		for (i = 0; i < rmr->num_sids; i++) {
			idx = arm_smmu_find_sme(smmu, rmr->sids[i], ~0);
			if (idx < 0)
				continue;

			if (smmu->s2crs[idx].count == 0) {
				smmu->smrs[idx].id = rmr->sids[i];
				smmu->smrs[idx].mask = 0;
				smmu->smrs[idx].valid = true;
			}
			smmu->s2crs[idx].count++;
			smmu->s2crs[idx].type = S2CR_TYPE_BYPASS;
			smmu->s2crs[idx].privcfg = S2CR_PRIVCFG_DEFAULT;

			cnt++;
		}
	}

	dev_notice(smmu->dev, "\tpreserved %d boot mapping%s\n", cnt,
		   cnt == 1 ? "" : "s");
	iort_put_rmr_sids(dev_fwnode(smmu->dev), &rmr_list);
}

2049 2050 2051 2052 2053 2054
static int arm_smmu_device_probe(struct platform_device *pdev)
{
	struct resource *res;
	struct arm_smmu_device *smmu;
	struct device *dev = &pdev->dev;
	int num_irqs, i, err;
2055
	u32 global_irqs, pmu_irqs;
2056
	irqreturn_t (*global_fault)(int irq, void *dev);
2057

2058 2059 2060 2061 2062 2063 2064
	smmu = devm_kzalloc(dev, sizeof(*smmu), GFP_KERNEL);
	if (!smmu) {
		dev_err(dev, "failed to allocate arm_smmu_device\n");
		return -ENOMEM;
	}
	smmu->dev = dev;

2065
	if (dev->of_node)
2066
		err = arm_smmu_device_dt_probe(smmu, &global_irqs, &pmu_irqs);
2067
	else
2068
		err = arm_smmu_device_acpi_probe(smmu, &global_irqs, &pmu_irqs);
2069 2070
	if (err)
		return err;
2071

2072
	smmu->base = devm_platform_get_and_ioremap_resource(pdev, 0, &res);
2073 2074
	if (IS_ERR(smmu->base))
		return PTR_ERR(smmu->base);
2075 2076
	smmu->ioaddr = res->start;

2077 2078 2079 2080 2081
	/*
	 * The resource size should effectively match the value of SMMU_TOP;
	 * stash that temporarily until we know PAGESIZE to validate it with.
	 */
	smmu->numpage = resource_size(res);
2082

2083 2084 2085 2086
	smmu = arm_smmu_impl_init(smmu);
	if (IS_ERR(smmu))
		return PTR_ERR(smmu);

2087
	num_irqs = platform_irq_count(pdev);
2088

2089 2090 2091 2092 2093
	smmu->num_context_irqs = num_irqs - global_irqs - pmu_irqs;
	if (smmu->num_context_irqs <= 0)
		return dev_err_probe(dev, -ENODEV,
				"found %d interrupts but expected at least %d\n",
				num_irqs, global_irqs + pmu_irqs + 1);
2094

2095 2096 2097 2098 2099
	smmu->irqs = devm_kcalloc(dev, smmu->num_context_irqs,
				  sizeof(*smmu->irqs), GFP_KERNEL);
	if (!smmu->irqs)
		return dev_err_probe(dev, -ENOMEM, "failed to allocate %d irqs\n",
				     smmu->num_context_irqs);
2100

2101 2102
	for (i = 0; i < smmu->num_context_irqs; i++) {
		int irq = platform_get_irq(pdev, global_irqs + pmu_irqs + i);
2103

2104
		if (irq < 0)
2105
			return irq;
2106 2107 2108
		smmu->irqs[i] = irq;
	}

2109 2110 2111 2112 2113 2114 2115 2116 2117 2118 2119
	err = devm_clk_bulk_get_all(dev, &smmu->clks);
	if (err < 0) {
		dev_err(dev, "failed to get clocks %d\n", err);
		return err;
	}
	smmu->num_clks = err;

	err = clk_bulk_prepare_enable(smmu->num_clks, smmu->clks);
	if (err)
		return err;

2120 2121 2122 2123
	err = arm_smmu_device_cfg_probe(smmu);
	if (err)
		return err;

2124 2125 2126 2127 2128 2129 2130 2131 2132 2133
	if (smmu->version == ARM_SMMU_V2) {
		if (smmu->num_context_banks > smmu->num_context_irqs) {
			dev_err(dev,
			      "found only %d context irq(s) but %d required\n",
			      smmu->num_context_irqs, smmu->num_context_banks);
			return -ENODEV;
		}

		/* Ignore superfluous interrupts */
		smmu->num_context_irqs = smmu->num_context_banks;
2134 2135
	}

2136 2137 2138 2139 2140
	if (smmu->impl && smmu->impl->global_fault)
		global_fault = smmu->impl->global_fault;
	else
		global_fault = arm_smmu_global_fault;

2141 2142 2143 2144 2145 2146 2147 2148 2149 2150 2151 2152
	for (i = 0; i < global_irqs; i++) {
		int irq = platform_get_irq(pdev, i);

		if (irq < 0)
			return irq;

		err = devm_request_irq(dev, irq, global_fault, IRQF_SHARED,
				       "arm-smmu global fault", smmu);
		if (err)
			return dev_err_probe(dev, err,
					"failed to request global IRQ %d (%u)\n",
					i, irq);
2153 2154
	}

2155
	err = iommu_device_sysfs_add(&smmu->iommu, smmu->dev, NULL,
2156
				     "smmu.%pa", &smmu->ioaddr);
2157 2158 2159 2160 2161
	if (err) {
		dev_err(dev, "Failed to register iommu in sysfs\n");
		return err;
	}

2162
	err = iommu_device_register(&smmu->iommu, &arm_smmu_ops, dev);
2163 2164
	if (err) {
		dev_err(dev, "Failed to register iommu\n");
2165 2166
		iommu_device_sysfs_remove(&smmu->iommu);
		return err;
2167 2168
	}

2169
	platform_set_drvdata(pdev, smmu);
2170 2171 2172 2173

	/* Check for RMRs and install bypass SMRs if any */
	arm_smmu_rmr_install_bypass_smr(smmu);

2174
	arm_smmu_device_reset(smmu);
2175
	arm_smmu_test_smr_masks(smmu);
2176

2177 2178 2179 2180 2181 2182 2183 2184 2185 2186 2187
	/*
	 * We want to avoid touching dev->power.lock in fastpaths unless
	 * it's really going to do something useful - pm_runtime_enabled()
	 * can serve as an ideal proxy for that decision. So, conditionally
	 * enable pm_runtime.
	 */
	if (dev->pm_domain) {
		pm_runtime_set_active(dev);
		pm_runtime_enable(dev);
	}

2188 2189 2190
	return 0;
}

2191
static int arm_smmu_device_remove(struct platform_device *pdev)
2192
{
2193
	struct arm_smmu_device *smmu = platform_get_drvdata(pdev);
2194 2195

	if (!smmu)
2196
		return -ENODEV;
2197

2198
	if (!bitmap_empty(smmu->context_map, ARM_SMMU_MAX_CBS))
2199
		dev_notice(&pdev->dev, "disabling translation\n");
2200

2201 2202 2203
	iommu_device_unregister(&smmu->iommu);
	iommu_device_sysfs_remove(&smmu->iommu);

2204
	arm_smmu_rpm_get(smmu);
2205
	/* Turn the thing off */
2206
	arm_smmu_gr0_write(smmu, ARM_SMMU_GR0_sCR0, ARM_SMMU_sCR0_CLIENTPD);
2207 2208 2209 2210 2211 2212
	arm_smmu_rpm_put(smmu);

	if (pm_runtime_enabled(smmu->dev))
		pm_runtime_force_suspend(smmu->dev);
	else
		clk_bulk_disable(smmu->num_clks, smmu->clks);
2213

2214
	clk_bulk_unprepare(smmu->num_clks, smmu->clks);
2215 2216 2217 2218 2219 2220
	return 0;
}

static void arm_smmu_device_shutdown(struct platform_device *pdev)
{
	arm_smmu_device_remove(pdev);
2221 2222
}

2223
static int __maybe_unused arm_smmu_runtime_resume(struct device *dev)
2224
{
2225
	struct arm_smmu_device *smmu = dev_get_drvdata(dev);
2226 2227 2228 2229 2230
	int ret;

	ret = clk_bulk_enable(smmu->num_clks, smmu->clks);
	if (ret)
		return ret;
2231 2232

	arm_smmu_device_reset(smmu);
2233

2234
	return 0;
2235 2236
}

2237
static int __maybe_unused arm_smmu_runtime_suspend(struct device *dev)
2238 2239 2240
{
	struct arm_smmu_device *smmu = dev_get_drvdata(dev);

2241 2242
	clk_bulk_disable(smmu->num_clks, smmu->clks);

2243 2244 2245
	return 0;
}

2246 2247
static int __maybe_unused arm_smmu_pm_resume(struct device *dev)
{
2248 2249 2250 2251 2252 2253 2254
	int ret;
	struct arm_smmu_device *smmu = dev_get_drvdata(dev);

	ret = clk_bulk_prepare(smmu->num_clks, smmu->clks);
	if (ret)
		return ret;

2255 2256 2257
	if (pm_runtime_suspended(dev))
		return 0;

2258 2259 2260 2261 2262
	ret = arm_smmu_runtime_resume(dev);
	if (ret)
		clk_bulk_unprepare(smmu->num_clks, smmu->clks);

	return ret;
2263 2264 2265 2266
}

static int __maybe_unused arm_smmu_pm_suspend(struct device *dev)
{
2267 2268 2269
	int ret = 0;
	struct arm_smmu_device *smmu = dev_get_drvdata(dev);

2270
	if (pm_runtime_suspended(dev))
2271
		goto clk_unprepare;
2272

2273 2274 2275
	ret = arm_smmu_runtime_suspend(dev);
	if (ret)
		return ret;
2276

2277 2278 2279
clk_unprepare:
	clk_bulk_unprepare(smmu->num_clks, smmu->clks);
	return ret;
2280 2281 2282 2283 2284 2285 2286
}

static const struct dev_pm_ops arm_smmu_pm_ops = {
	SET_SYSTEM_SLEEP_PM_OPS(arm_smmu_pm_suspend, arm_smmu_pm_resume)
	SET_RUNTIME_PM_OPS(arm_smmu_runtime_suspend,
			   arm_smmu_runtime_resume, NULL)
};
2287

2288 2289
static struct platform_driver arm_smmu_driver = {
	.driver	= {
2290
		.name			= "arm-smmu",
2291
		.of_match_table		= arm_smmu_of_match,
2292
		.pm			= &arm_smmu_pm_ops,
2293
		.suppress_bind_attrs    = true,
2294
	},
2295
	.probe	= arm_smmu_device_probe,
2296
	.remove	= arm_smmu_device_remove,
2297
	.shutdown = arm_smmu_device_shutdown,
2298
};
2299 2300 2301
module_platform_driver(arm_smmu_driver);

MODULE_DESCRIPTION("IOMMU API for ARM architected SMMU implementations");
2302
MODULE_AUTHOR("Will Deacon <will@kernel.org>");
2303
MODULE_ALIAS("platform:arm-smmu");
2304
MODULE_LICENSE("GPL v2");