arm-smmu.c 61.2 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26
/*
 * IOMMU API for ARM architected SMMU implementations.
 *
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License version 2 as
 * published by the Free Software Foundation.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, write to the Free Software
 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
 *
 * Copyright (C) 2013 ARM Limited
 *
 * Author: Will Deacon <will.deacon@arm.com>
 *
 * This driver currently supports:
 *	- SMMUv1 and v2 implementations
 *	- Stream-matching and stream-indexing
 *	- v7/v8 long-descriptor format
 *	- Non-secure access to the SMMU
 *	- Context fault reporting
27
 *	- Extended Stream ID (16 bit)
28 29 30 31
 */

#define pr_fmt(fmt) "arm-smmu: " fmt

32 33
#include <linux/acpi.h>
#include <linux/acpi_iort.h>
34
#include <linux/atomic.h>
35
#include <linux/delay.h>
36
#include <linux/dma-iommu.h>
37 38 39 40
#include <linux/dma-mapping.h>
#include <linux/err.h>
#include <linux/interrupt.h>
#include <linux/io.h>
41
#include <linux/io-64-nonatomic-hi-lo.h>
42
#include <linux/iommu.h>
43
#include <linux/iopoll.h>
44 45
#include <linux/init.h>
#include <linux/moduleparam.h>
46
#include <linux/of.h>
47
#include <linux/of_address.h>
48
#include <linux/of_device.h>
49
#include <linux/of_iommu.h>
50
#include <linux/pci.h>
51 52 53 54 55
#include <linux/platform_device.h>
#include <linux/slab.h>
#include <linux/spinlock.h>

#include <linux/amba/bus.h>
56
#include <linux/fsl/mc.h>
57

58
#include "io-pgtable.h"
59 60 61 62 63
#include "arm-smmu-regs.h"

#define ARM_MMU500_ACTLR_CPRE		(1 << 1)

#define ARM_MMU500_ACR_CACHE_LOCK	(1 << 26)
64
#define ARM_MMU500_ACR_S2CRB_TLBEN	(1 << 10)
65 66 67 68
#define ARM_MMU500_ACR_SMTNMB_TLBEN	(1 << 8)

#define TLB_LOOP_TIMEOUT		1000000	/* 1s! */
#define TLB_SPIN_COUNT			10
69 70 71 72 73 74

/* Maximum number of context banks per SMMU */
#define ARM_SMMU_MAX_CBS		128

/* SMMU global address space */
#define ARM_SMMU_GR0(smmu)		((smmu)->base)
75
#define ARM_SMMU_GR1(smmu)		((smmu)->base + (1 << (smmu)->pgshift))
76

77 78 79 80 81 82 83 84 85 86
/*
 * SMMU global address space with conditional offset to access secure
 * aliases of non-secure registers (e.g. nsCR0: 0x400, nsGFSR: 0x448,
 * nsGFSYNR0: 0x450)
 */
#define ARM_SMMU_GR0_NS(smmu)						\
	((smmu)->base +							\
		((smmu->options & ARM_SMMU_OPT_SECURE_CFG_ACCESS)	\
			? 0x400 : 0))

87 88 89 90 91
/*
 * Some 64-bit registers only make sense to write atomically, but in such
 * cases all the data relevant to AArch32 formats lies within the lower word,
 * therefore this actually makes more sense than it might first appear.
 */
92
#ifdef CONFIG_64BIT
93
#define smmu_write_atomic_lq		writeq_relaxed
94
#else
95
#define smmu_write_atomic_lq		writel_relaxed
96 97
#endif

98
/* Translation context bank */
99
#define ARM_SMMU_CB(smmu, n)	((smmu)->cb_base + ((n) << (smmu)->pgshift))
100

101 102 103
#define MSI_IOVA_BASE			0x8000000
#define MSI_IOVA_LENGTH			0x100000

104
static int force_stage;
105 106 107 108
/*
 * not really modular, but the easiest way to keep compat with existing
 * bootargs behaviour is to continue using module_param() here.
 */
109
module_param(force_stage, int, S_IRUGO);
110 111
MODULE_PARM_DESC(force_stage,
	"Force SMMU mappings to be installed at a particular stage of translation. A value of '1' or '2' forces the corresponding stage. All other values are ignored (i.e. no stage is forced). Note that selecting a specific stage will disable support for nested translation.");
112 113 114 115
static bool disable_bypass;
module_param(disable_bypass, bool, S_IRUGO);
MODULE_PARM_DESC(disable_bypass,
	"Disable bypass streams such that incoming transactions from devices that are not attached to an iommu domain will report an abort back to the device and will not be allowed to pass through the SMMU.");
116

117
enum arm_smmu_arch_version {
118 119
	ARM_SMMU_V1,
	ARM_SMMU_V1_64K,
120 121 122
	ARM_SMMU_V2,
};

123 124
enum arm_smmu_implementation {
	GENERIC_SMMU,
125
	ARM_MMU500,
126
	CAVIUM_SMMUV2,
127 128
};

129
struct arm_smmu_s2cr {
130 131
	struct iommu_group		*group;
	int				count;
132 133 134 135 136 137 138 139 140
	enum arm_smmu_s2cr_type		type;
	enum arm_smmu_s2cr_privcfg	privcfg;
	u8				cbndx;
};

#define s2cr_init_val (struct arm_smmu_s2cr){				\
	.type = disable_bypass ? S2CR_TYPE_FAULT : S2CR_TYPE_BYPASS,	\
}

141 142 143
struct arm_smmu_smr {
	u16				mask;
	u16				id;
144
	bool				valid;
145 146
};

147 148 149 150 151 152 153
struct arm_smmu_cb {
	u64				ttbr[2];
	u32				tcr[2];
	u32				mair[2];
	struct arm_smmu_cfg		*cfg;
};

154
struct arm_smmu_master_cfg {
155
	struct arm_smmu_device		*smmu;
156
	s16				smendx[];
157
};
158
#define INVALID_SMENDX			-1
159 160
#define __fwspec_cfg(fw) ((struct arm_smmu_master_cfg *)fw->iommu_priv)
#define fwspec_smmu(fw)  (__fwspec_cfg(fw)->smmu)
161 162
#define fwspec_smendx(fw, i) \
	(i >= fw->num_ids ? INVALID_SMENDX : __fwspec_cfg(fw)->smendx[i])
163
#define for_each_cfg_sme(fw, i, idx) \
164
	for (i = 0; idx = fwspec_smendx(fw, i), i < fw->num_ids; ++i)
165 166 167 168 169

struct arm_smmu_device {
	struct device			*dev;

	void __iomem			*base;
170
	void __iomem			*cb_base;
171
	unsigned long			pgshift;
172 173 174 175 176 177

#define ARM_SMMU_FEAT_COHERENT_WALK	(1 << 0)
#define ARM_SMMU_FEAT_STREAM_MATCH	(1 << 1)
#define ARM_SMMU_FEAT_TRANS_S1		(1 << 2)
#define ARM_SMMU_FEAT_TRANS_S2		(1 << 3)
#define ARM_SMMU_FEAT_TRANS_NESTED	(1 << 4)
178
#define ARM_SMMU_FEAT_TRANS_OPS		(1 << 5)
179
#define ARM_SMMU_FEAT_VMID16		(1 << 6)
180 181 182 183 184
#define ARM_SMMU_FEAT_FMT_AARCH64_4K	(1 << 7)
#define ARM_SMMU_FEAT_FMT_AARCH64_16K	(1 << 8)
#define ARM_SMMU_FEAT_FMT_AARCH64_64K	(1 << 9)
#define ARM_SMMU_FEAT_FMT_AARCH32_L	(1 << 10)
#define ARM_SMMU_FEAT_FMT_AARCH32_S	(1 << 11)
185
#define ARM_SMMU_FEAT_EXIDS		(1 << 12)
186
	u32				features;
187 188 189

#define ARM_SMMU_OPT_SECURE_CFG_ACCESS (1 << 0)
	u32				options;
190
	enum arm_smmu_arch_version	version;
191
	enum arm_smmu_implementation	model;
192 193 194 195

	u32				num_context_banks;
	u32				num_s2_context_banks;
	DECLARE_BITMAP(context_map, ARM_SMMU_MAX_CBS);
196
	struct arm_smmu_cb		*cbs;
197 198 199
	atomic_t			irptndx;

	u32				num_mapping_groups;
200 201
	u16				streamid_mask;
	u16				smr_mask_mask;
202
	struct arm_smmu_smr		*smrs;
203
	struct arm_smmu_s2cr		*s2crs;
204
	struct mutex			stream_map_mutex;
205

206 207 208
	unsigned long			va_size;
	unsigned long			ipa_size;
	unsigned long			pa_size;
209
	unsigned long			pgsize_bitmap;
210 211 212 213 214

	u32				num_global_irqs;
	u32				num_context_irqs;
	unsigned int			*irqs;

215
	u32				cavium_id_base; /* Specific to Cavium */
216

217 218
	spinlock_t			global_sync_lock;

219 220
	/* IOMMU core code handle */
	struct iommu_device		iommu;
221 222
};

223 224 225 226 227
enum arm_smmu_context_fmt {
	ARM_SMMU_CTX_FMT_NONE,
	ARM_SMMU_CTX_FMT_AARCH64,
	ARM_SMMU_CTX_FMT_AARCH32_L,
	ARM_SMMU_CTX_FMT_AARCH32_S,
228 229 230 231 232
};

struct arm_smmu_cfg {
	u8				cbndx;
	u8				irptndx;
233 234 235 236
	union {
		u16			asid;
		u16			vmid;
	};
237
	u32				cbar;
238
	enum arm_smmu_context_fmt	fmt;
239
};
240
#define INVALID_IRPTNDX			0xff
241

242 243 244 245
enum arm_smmu_domain_stage {
	ARM_SMMU_DOMAIN_S1 = 0,
	ARM_SMMU_DOMAIN_S2,
	ARM_SMMU_DOMAIN_NESTED,
246
	ARM_SMMU_DOMAIN_BYPASS,
247 248
};

249
struct arm_smmu_domain {
250
	struct arm_smmu_device		*smmu;
251
	struct io_pgtable_ops		*pgtbl_ops;
252
	const struct iommu_gather_ops	*tlb_ops;
253
	struct arm_smmu_cfg		cfg;
254
	enum arm_smmu_domain_stage	stage;
255
	bool				non_strict;
256
	struct mutex			init_mutex; /* Protects smmu pointer */
257
	spinlock_t			cb_lock; /* Serialises ATS1* ops and TLB syncs */
258
	struct iommu_domain		domain;
259 260
};

261 262 263 264 265
struct arm_smmu_option_prop {
	u32 opt;
	const char *prop;
};

266 267
static atomic_t cavium_smmu_context_count = ATOMIC_INIT(0);

268 269
static bool using_legacy_binding, using_generic_binding;

270
static struct arm_smmu_option_prop arm_smmu_options[] = {
271 272 273 274
	{ ARM_SMMU_OPT_SECURE_CFG_ACCESS, "calxeda,smmu-secure-config-access" },
	{ 0, NULL},
};

275 276 277 278 279
static struct arm_smmu_domain *to_smmu_domain(struct iommu_domain *dom)
{
	return container_of(dom, struct arm_smmu_domain, domain);
}

280 281 282
static void parse_driver_options(struct arm_smmu_device *smmu)
{
	int i = 0;
283

284 285 286 287 288 289 290 291 292 293
	do {
		if (of_property_read_bool(smmu->dev->of_node,
						arm_smmu_options[i].prop)) {
			smmu->options |= arm_smmu_options[i].opt;
			dev_notice(smmu->dev, "option %s\n",
				arm_smmu_options[i].prop);
		}
	} while (arm_smmu_options[++i].opt);
}

294
static struct device_node *dev_get_dev_node(struct device *dev)
295 296 297
{
	if (dev_is_pci(dev)) {
		struct pci_bus *bus = to_pci_dev(dev)->bus;
298

299 300
		while (!pci_is_root_bus(bus))
			bus = bus->parent;
301
		return of_node_get(bus->bridge->parent->of_node);
302 303
	}

304
	return of_node_get(dev->of_node);
305 306
}

307
static int __arm_smmu_get_pci_sid(struct pci_dev *pdev, u16 alias, void *data)
308
{
309 310
	*((__be32 *)data) = cpu_to_be32(alias);
	return 0; /* Continue walking */
311 312
}

313
static int __find_legacy_master_phandle(struct device *dev, void *data)
314
{
315 316 317 318 319 320 321 322 323 324 325 326
	struct of_phandle_iterator *it = *(void **)data;
	struct device_node *np = it->node;
	int err;

	of_for_each_phandle(it, err, dev->of_node, "mmu-masters",
			    "#stream-id-cells", 0)
		if (it->node == np) {
			*(void **)data = dev;
			return 1;
		}
	it->node = np;
	return err == -ENOENT ? 0 : err;
327 328
}

329
static struct platform_driver arm_smmu_driver;
330
static struct iommu_ops arm_smmu_ops;
331

332 333
static int arm_smmu_register_legacy_master(struct device *dev,
					   struct arm_smmu_device **smmu)
334
{
335
	struct device *smmu_dev;
336 337 338
	struct device_node *np;
	struct of_phandle_iterator it;
	void *data = &it;
339
	u32 *sids;
340 341
	__be32 pci_sid;
	int err;
342

343 344 345 346 347
	np = dev_get_dev_node(dev);
	if (!np || !of_find_property(np, "#stream-id-cells", NULL)) {
		of_node_put(np);
		return -ENODEV;
	}
348

349
	it.node = np;
350 351
	err = driver_for_each_device(&arm_smmu_driver.driver, NULL, &data,
				     __find_legacy_master_phandle);
352
	smmu_dev = data;
353 354 355 356 357
	of_node_put(np);
	if (err == 0)
		return -ENODEV;
	if (err < 0)
		return err;
358

359 360 361 362 363 364 365
	if (dev_is_pci(dev)) {
		/* "mmu-masters" assumes Stream ID == Requester ID */
		pci_for_each_dma_alias(to_pci_dev(dev), __arm_smmu_get_pci_sid,
				       &pci_sid);
		it.cur = &pci_sid;
		it.cur_count = 1;
	}
366

367 368 369 370
	err = iommu_fwspec_init(dev, &smmu_dev->of_node->fwnode,
				&arm_smmu_ops);
	if (err)
		return err;
371

372 373 374
	sids = kcalloc(it.cur_count, sizeof(*sids), GFP_KERNEL);
	if (!sids)
		return -ENOMEM;
375

376 377 378 379 380
	*smmu = dev_get_drvdata(smmu_dev);
	of_phandle_iterator_args(&it, sids, it.cur_count);
	err = iommu_fwspec_add_ids(dev, sids, it.cur_count);
	kfree(sids);
	return err;
381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401
}

static int __arm_smmu_alloc_bitmap(unsigned long *map, int start, int end)
{
	int idx;

	do {
		idx = find_next_zero_bit(map, end, start);
		if (idx == end)
			return -ENOSPC;
	} while (test_and_set_bit(idx, map));

	return idx;
}

static void __arm_smmu_free_bitmap(unsigned long *map, int idx)
{
	clear_bit(idx, map);
}

/* Wait for any pending TLB invalidations to complete */
402 403
static void __arm_smmu_tlb_sync(struct arm_smmu_device *smmu,
				void __iomem *sync, void __iomem *status)
404
{
405
	unsigned int spin_cnt, delay;
406

407
	writel_relaxed(0, sync);
408 409 410 411 412
	for (delay = 1; delay < TLB_LOOP_TIMEOUT; delay *= 2) {
		for (spin_cnt = TLB_SPIN_COUNT; spin_cnt > 0; spin_cnt--) {
			if (!(readl_relaxed(status) & sTLBGSTATUS_GSACTIVE))
				return;
			cpu_relax();
413
		}
414
		udelay(delay);
415
	}
416 417
	dev_err_ratelimited(smmu->dev,
			    "TLB sync timed out -- SMMU may be deadlocked\n");
418 419
}

420 421 422
static void arm_smmu_tlb_sync_global(struct arm_smmu_device *smmu)
{
	void __iomem *base = ARM_SMMU_GR0(smmu);
423
	unsigned long flags;
424

425
	spin_lock_irqsave(&smmu->global_sync_lock, flags);
426 427
	__arm_smmu_tlb_sync(smmu, base + ARM_SMMU_GR0_sTLBGSYNC,
			    base + ARM_SMMU_GR0_sTLBGSTATUS);
428
	spin_unlock_irqrestore(&smmu->global_sync_lock, flags);
429 430 431
}

static void arm_smmu_tlb_sync_context(void *cookie)
432 433
{
	struct arm_smmu_domain *smmu_domain = cookie;
434 435
	struct arm_smmu_device *smmu = smmu_domain->smmu;
	void __iomem *base = ARM_SMMU_CB(smmu, smmu_domain->cfg.cbndx);
436
	unsigned long flags;
437

438
	spin_lock_irqsave(&smmu_domain->cb_lock, flags);
439 440
	__arm_smmu_tlb_sync(smmu, base + ARM_SMMU_CB_TLBSYNC,
			    base + ARM_SMMU_CB_TLBSTATUS);
441
	spin_unlock_irqrestore(&smmu_domain->cb_lock, flags);
442 443
}

444
static void arm_smmu_tlb_sync_vmid(void *cookie)
445 446
{
	struct arm_smmu_domain *smmu_domain = cookie;
447 448

	arm_smmu_tlb_sync_global(smmu_domain->smmu);
449 450
}

451
static void arm_smmu_tlb_inv_context_s1(void *cookie)
452
{
453
	struct arm_smmu_domain *smmu_domain = cookie;
454
	struct arm_smmu_cfg *cfg = &smmu_domain->cfg;
455
	void __iomem *base = ARM_SMMU_CB(smmu_domain->smmu, cfg->cbndx);
456

457 458 459 460 461
	/*
	 * NOTE: this is not a relaxed write; it needs to guarantee that PTEs
	 * cleared by the current CPU are visible to the SMMU before the TLBI.
	 */
	writel(cfg->asid, base + ARM_SMMU_CB_S1_TLBIASID);
462 463
	arm_smmu_tlb_sync_context(cookie);
}
464

465 466 467 468 469
static void arm_smmu_tlb_inv_context_s2(void *cookie)
{
	struct arm_smmu_domain *smmu_domain = cookie;
	struct arm_smmu_device *smmu = smmu_domain->smmu;
	void __iomem *base = ARM_SMMU_GR0(smmu);
470

471 472
	/* NOTE: see above */
	writel(smmu_domain->cfg.vmid, base + ARM_SMMU_GR0_TLBIVMID);
473
	arm_smmu_tlb_sync_global(smmu);
474 475 476
}

static void arm_smmu_tlb_inv_range_nosync(unsigned long iova, size_t size,
477
					  size_t granule, bool leaf, void *cookie)
478 479 480 481
{
	struct arm_smmu_domain *smmu_domain = cookie;
	struct arm_smmu_cfg *cfg = &smmu_domain->cfg;
	bool stage1 = cfg->cbar != CBAR_TYPE_S2_TRANS;
482
	void __iomem *reg = ARM_SMMU_CB(smmu_domain->smmu, cfg->cbndx);
483

484 485 486
	if (smmu_domain->smmu->features & ARM_SMMU_FEAT_COHERENT_WALK)
		wmb();

487 488 489
	if (stage1) {
		reg += leaf ? ARM_SMMU_CB_S1_TLBIVAL : ARM_SMMU_CB_S1_TLBIVA;

490
		if (cfg->fmt != ARM_SMMU_CTX_FMT_AARCH64) {
491
			iova &= ~12UL;
492
			iova |= cfg->asid;
493 494 495 496
			do {
				writel_relaxed(iova, reg);
				iova += granule;
			} while (size -= granule);
497 498
		} else {
			iova >>= 12;
499
			iova |= (u64)cfg->asid << 48;
500 501 502 503
			do {
				writeq_relaxed(iova, reg);
				iova += granule >> 12;
			} while (size -= granule);
504
		}
505
	} else {
506 507
		reg += leaf ? ARM_SMMU_CB_S2_TLBIIPAS2L :
			      ARM_SMMU_CB_S2_TLBIIPAS2;
508 509
		iova >>= 12;
		do {
510
			smmu_write_atomic_lq(iova, reg);
511 512
			iova += granule >> 12;
		} while (size -= granule);
513 514 515
	}
}

516 517 518 519 520 521 522 523 524 525 526 527
/*
 * On MMU-401 at least, the cost of firing off multiple TLBIVMIDs appears
 * almost negligible, but the benefit of getting the first one in as far ahead
 * of the sync as possible is significant, hence we don't just make this a
 * no-op and set .tlb_sync to arm_smmu_inv_context_s2() as you might think.
 */
static void arm_smmu_tlb_inv_vmid_nosync(unsigned long iova, size_t size,
					 size_t granule, bool leaf, void *cookie)
{
	struct arm_smmu_domain *smmu_domain = cookie;
	void __iomem *base = ARM_SMMU_GR0(smmu_domain->smmu);

528 529 530
	if (smmu_domain->smmu->features & ARM_SMMU_FEAT_COHERENT_WALK)
		wmb();

531 532 533 534 535
	writel_relaxed(smmu_domain->cfg.vmid, base + ARM_SMMU_GR0_TLBIVMID);
}

static const struct iommu_gather_ops arm_smmu_s1_tlb_ops = {
	.tlb_flush_all	= arm_smmu_tlb_inv_context_s1,
536
	.tlb_add_flush	= arm_smmu_tlb_inv_range_nosync,
537 538 539 540 541
	.tlb_sync	= arm_smmu_tlb_sync_context,
};

static const struct iommu_gather_ops arm_smmu_s2_tlb_ops_v2 = {
	.tlb_flush_all	= arm_smmu_tlb_inv_context_s2,
542
	.tlb_add_flush	= arm_smmu_tlb_inv_range_nosync,
543 544 545 546 547 548 549
	.tlb_sync	= arm_smmu_tlb_sync_context,
};

static const struct iommu_gather_ops arm_smmu_s2_tlb_ops_v1 = {
	.tlb_flush_all	= arm_smmu_tlb_inv_context_s2,
	.tlb_add_flush	= arm_smmu_tlb_inv_vmid_nosync,
	.tlb_sync	= arm_smmu_tlb_sync_vmid,
550 551
};

552 553
static irqreturn_t arm_smmu_context_fault(int irq, void *dev)
{
554
	u32 fsr, fsynr;
555 556
	unsigned long iova;
	struct iommu_domain *domain = dev;
557
	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
558 559
	struct arm_smmu_cfg *cfg = &smmu_domain->cfg;
	struct arm_smmu_device *smmu = smmu_domain->smmu;
560 561
	void __iomem *cb_base;

562
	cb_base = ARM_SMMU_CB(smmu, cfg->cbndx);
563 564 565 566 567 568
	fsr = readl_relaxed(cb_base + ARM_SMMU_CB_FSR);

	if (!(fsr & FSR_FAULT))
		return IRQ_NONE;

	fsynr = readl_relaxed(cb_base + ARM_SMMU_CB_FSYNR0);
569
	iova = readq_relaxed(cb_base + ARM_SMMU_CB_FAR);
570

571 572 573
	dev_err_ratelimited(smmu->dev,
	"Unhandled context fault: fsr=0x%x, iova=0x%08lx, fsynr=0x%x, cb=%d\n",
			    fsr, iova, fsynr, cfg->cbndx);
574

575 576
	writel(fsr, cb_base + ARM_SMMU_CB_FSR);
	return IRQ_HANDLED;
577 578 579 580 581 582
}

static irqreturn_t arm_smmu_global_fault(int irq, void *dev)
{
	u32 gfsr, gfsynr0, gfsynr1, gfsynr2;
	struct arm_smmu_device *smmu = dev;
583
	void __iomem *gr0_base = ARM_SMMU_GR0_NS(smmu);
584 585 586 587 588 589

	gfsr = readl_relaxed(gr0_base + ARM_SMMU_GR0_sGFSR);
	gfsynr0 = readl_relaxed(gr0_base + ARM_SMMU_GR0_sGFSYNR0);
	gfsynr1 = readl_relaxed(gr0_base + ARM_SMMU_GR0_sGFSYNR1);
	gfsynr2 = readl_relaxed(gr0_base + ARM_SMMU_GR0_sGFSYNR2);

590 591 592
	if (!gfsr)
		return IRQ_NONE;

593 594 595 596 597 598 599
	dev_err_ratelimited(smmu->dev,
		"Unexpected global fault, this could be serious\n");
	dev_err_ratelimited(smmu->dev,
		"\tGFSR 0x%08x, GFSYNR0 0x%08x, GFSYNR1 0x%08x, GFSYNR2 0x%08x\n",
		gfsr, gfsynr0, gfsynr1, gfsynr2);

	writel(gfsr, gr0_base + ARM_SMMU_GR0_sGFSR);
600
	return IRQ_HANDLED;
601 602
}

603 604
static void arm_smmu_init_context_bank(struct arm_smmu_domain *smmu_domain,
				       struct io_pgtable_cfg *pgtbl_cfg)
605
{
606
	struct arm_smmu_cfg *cfg = &smmu_domain->cfg;
607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659
	struct arm_smmu_cb *cb = &smmu_domain->smmu->cbs[cfg->cbndx];
	bool stage1 = cfg->cbar != CBAR_TYPE_S2_TRANS;

	cb->cfg = cfg;

	/* TTBCR */
	if (stage1) {
		if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH32_S) {
			cb->tcr[0] = pgtbl_cfg->arm_v7s_cfg.tcr;
		} else {
			cb->tcr[0] = pgtbl_cfg->arm_lpae_s1_cfg.tcr;
			cb->tcr[1] = pgtbl_cfg->arm_lpae_s1_cfg.tcr >> 32;
			cb->tcr[1] |= TTBCR2_SEP_UPSTREAM;
			if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH64)
				cb->tcr[1] |= TTBCR2_AS;
		}
	} else {
		cb->tcr[0] = pgtbl_cfg->arm_lpae_s2_cfg.vtcr;
	}

	/* TTBRs */
	if (stage1) {
		if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH32_S) {
			cb->ttbr[0] = pgtbl_cfg->arm_v7s_cfg.ttbr[0];
			cb->ttbr[1] = pgtbl_cfg->arm_v7s_cfg.ttbr[1];
		} else {
			cb->ttbr[0] = pgtbl_cfg->arm_lpae_s1_cfg.ttbr[0];
			cb->ttbr[0] |= (u64)cfg->asid << TTBRn_ASID_SHIFT;
			cb->ttbr[1] = pgtbl_cfg->arm_lpae_s1_cfg.ttbr[1];
			cb->ttbr[1] |= (u64)cfg->asid << TTBRn_ASID_SHIFT;
		}
	} else {
		cb->ttbr[0] = pgtbl_cfg->arm_lpae_s2_cfg.vttbr;
	}

	/* MAIRs (stage-1 only) */
	if (stage1) {
		if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH32_S) {
			cb->mair[0] = pgtbl_cfg->arm_v7s_cfg.prrr;
			cb->mair[1] = pgtbl_cfg->arm_v7s_cfg.nmrr;
		} else {
			cb->mair[0] = pgtbl_cfg->arm_lpae_s1_cfg.mair[0];
			cb->mair[1] = pgtbl_cfg->arm_lpae_s1_cfg.mair[1];
		}
	}
}

static void arm_smmu_write_context_bank(struct arm_smmu_device *smmu, int idx)
{
	u32 reg;
	bool stage1;
	struct arm_smmu_cb *cb = &smmu->cbs[idx];
	struct arm_smmu_cfg *cfg = cb->cfg;
660
	void __iomem *cb_base, *gr1_base;
661

662 663 664 665 666 667 668 669
	cb_base = ARM_SMMU_CB(smmu, idx);

	/* Unassigned context banks only need disabling */
	if (!cfg) {
		writel_relaxed(0, cb_base + ARM_SMMU_CB_SCTLR);
		return;
	}

670
	gr1_base = ARM_SMMU_GR1(smmu);
671
	stage1 = cfg->cbar != CBAR_TYPE_S2_TRANS;
672

673
	/* CBA2R */
674
	if (smmu->version > ARM_SMMU_V1) {
675 676 677 678
		if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH64)
			reg = CBA2R_RW64_64BIT;
		else
			reg = CBA2R_RW64_32BIT;
679 680
		/* 16-bit VMIDs live in CBA2R */
		if (smmu->features & ARM_SMMU_FEAT_VMID16)
681
			reg |= cfg->vmid << CBA2R_VMID_SHIFT;
682

683
		writel_relaxed(reg, gr1_base + ARM_SMMU_GR1_CBA2R(idx));
684 685
	}

686
	/* CBAR */
687
	reg = cfg->cbar;
688
	if (smmu->version < ARM_SMMU_V2)
689
		reg |= cfg->irptndx << CBAR_IRPTNDX_SHIFT;
690

691 692 693 694 695 696 697
	/*
	 * Use the weakest shareability/memory types, so they are
	 * overridden by the ttbcr/pte.
	 */
	if (stage1) {
		reg |= (CBAR_S1_BPSHCFG_NSH << CBAR_S1_BPSHCFG_SHIFT) |
			(CBAR_S1_MEMATTR_WB << CBAR_S1_MEMATTR_SHIFT);
698 699
	} else if (!(smmu->features & ARM_SMMU_FEAT_VMID16)) {
		/* 8-bit VMIDs live in CBAR */
700
		reg |= cfg->vmid << CBAR_VMID_SHIFT;
701
	}
702
	writel_relaxed(reg, gr1_base + ARM_SMMU_GR1_CBAR(idx));
703

704 705 706 707 708
	/*
	 * TTBCR
	 * We must write this before the TTBRs, since it determines the
	 * access behaviour of some fields (in particular, ASID[15:8]).
	 */
709 710 711
	if (stage1 && smmu->version > ARM_SMMU_V1)
		writel_relaxed(cb->tcr[1], cb_base + ARM_SMMU_CB_TTBCR2);
	writel_relaxed(cb->tcr[0], cb_base + ARM_SMMU_CB_TTBCR);
712

713
	/* TTBRs */
714 715 716 717
	if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH32_S) {
		writel_relaxed(cfg->asid, cb_base + ARM_SMMU_CB_CONTEXTIDR);
		writel_relaxed(cb->ttbr[0], cb_base + ARM_SMMU_CB_TTBR0);
		writel_relaxed(cb->ttbr[1], cb_base + ARM_SMMU_CB_TTBR1);
718
	} else {
719 720 721
		writeq_relaxed(cb->ttbr[0], cb_base + ARM_SMMU_CB_TTBR0);
		if (stage1)
			writeq_relaxed(cb->ttbr[1], cb_base + ARM_SMMU_CB_TTBR1);
722
	}
723

724
	/* MAIRs (stage-1 only) */
725
	if (stage1) {
726 727
		writel_relaxed(cb->mair[0], cb_base + ARM_SMMU_CB_S1_MAIR0);
		writel_relaxed(cb->mair[1], cb_base + ARM_SMMU_CB_S1_MAIR1);
728 729 730
	}

	/* SCTLR */
731
	reg = SCTLR_CFIE | SCTLR_CFRE | SCTLR_AFE | SCTLR_TRE | SCTLR_M;
732 733
	if (stage1)
		reg |= SCTLR_S1_ASIDPNE;
734 735 736
	if (IS_ENABLED(CONFIG_CPU_BIG_ENDIAN))
		reg |= SCTLR_E;

737
	writel_relaxed(reg, cb_base + ARM_SMMU_CB_SCTLR);
738 739 740
}

static int arm_smmu_init_domain_context(struct iommu_domain *domain,
741
					struct arm_smmu_device *smmu)
742
{
743
	int irq, start, ret = 0;
744 745 746 747
	unsigned long ias, oas;
	struct io_pgtable_ops *pgtbl_ops;
	struct io_pgtable_cfg pgtbl_cfg;
	enum io_pgtable_fmt fmt;
748
	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
749
	struct arm_smmu_cfg *cfg = &smmu_domain->cfg;
750

751
	mutex_lock(&smmu_domain->init_mutex);
752 753 754
	if (smmu_domain->smmu)
		goto out_unlock;

755 756 757 758 759 760
	if (domain->type == IOMMU_DOMAIN_IDENTITY) {
		smmu_domain->stage = ARM_SMMU_DOMAIN_BYPASS;
		smmu_domain->smmu = smmu;
		goto out_unlock;
	}

761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783
	/*
	 * Mapping the requested stage onto what we support is surprisingly
	 * complicated, mainly because the spec allows S1+S2 SMMUs without
	 * support for nested translation. That means we end up with the
	 * following table:
	 *
	 * Requested        Supported        Actual
	 *     S1               N              S1
	 *     S1             S1+S2            S1
	 *     S1               S2             S2
	 *     S1               S1             S1
	 *     N                N              N
	 *     N              S1+S2            S2
	 *     N                S2             S2
	 *     N                S1             S1
	 *
	 * Note that you can't actually request stage-2 mappings.
	 */
	if (!(smmu->features & ARM_SMMU_FEAT_TRANS_S1))
		smmu_domain->stage = ARM_SMMU_DOMAIN_S2;
	if (!(smmu->features & ARM_SMMU_FEAT_TRANS_S2))
		smmu_domain->stage = ARM_SMMU_DOMAIN_S1;

784 785 786 787 788 789 790 791 792 793
	/*
	 * Choosing a suitable context format is even more fiddly. Until we
	 * grow some way for the caller to express a preference, and/or move
	 * the decision into the io-pgtable code where it arguably belongs,
	 * just aim for the closest thing to the rest of the system, and hope
	 * that the hardware isn't esoteric enough that we can't assume AArch64
	 * support to be a superset of AArch32 support...
	 */
	if (smmu->features & ARM_SMMU_FEAT_FMT_AARCH32_L)
		cfg->fmt = ARM_SMMU_CTX_FMT_AARCH32_L;
794 795 796 797 798
	if (IS_ENABLED(CONFIG_IOMMU_IO_PGTABLE_ARMV7S) &&
	    !IS_ENABLED(CONFIG_64BIT) && !IS_ENABLED(CONFIG_ARM_LPAE) &&
	    (smmu->features & ARM_SMMU_FEAT_FMT_AARCH32_S) &&
	    (smmu_domain->stage == ARM_SMMU_DOMAIN_S1))
		cfg->fmt = ARM_SMMU_CTX_FMT_AARCH32_S;
799 800 801 802 803 804 805 806 807 808 809
	if ((IS_ENABLED(CONFIG_64BIT) || cfg->fmt == ARM_SMMU_CTX_FMT_NONE) &&
	    (smmu->features & (ARM_SMMU_FEAT_FMT_AARCH64_64K |
			       ARM_SMMU_FEAT_FMT_AARCH64_16K |
			       ARM_SMMU_FEAT_FMT_AARCH64_4K)))
		cfg->fmt = ARM_SMMU_CTX_FMT_AARCH64;

	if (cfg->fmt == ARM_SMMU_CTX_FMT_NONE) {
		ret = -EINVAL;
		goto out_unlock;
	}

810 811 812 813
	switch (smmu_domain->stage) {
	case ARM_SMMU_DOMAIN_S1:
		cfg->cbar = CBAR_TYPE_S1_TRANS_S2_BYPASS;
		start = smmu->num_s2_context_banks;
814 815
		ias = smmu->va_size;
		oas = smmu->ipa_size;
816
		if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH64) {
817
			fmt = ARM_64_LPAE_S1;
818
		} else if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH32_L) {
819
			fmt = ARM_32_LPAE_S1;
820 821
			ias = min(ias, 32UL);
			oas = min(oas, 40UL);
822 823 824 825
		} else {
			fmt = ARM_V7S;
			ias = min(ias, 32UL);
			oas = min(oas, 32UL);
826
		}
827
		smmu_domain->tlb_ops = &arm_smmu_s1_tlb_ops;
828 829
		break;
	case ARM_SMMU_DOMAIN_NESTED:
830 831 832 833
		/*
		 * We will likely want to change this if/when KVM gets
		 * involved.
		 */
834
	case ARM_SMMU_DOMAIN_S2:
835 836
		cfg->cbar = CBAR_TYPE_S2_TRANS;
		start = 0;
837 838
		ias = smmu->ipa_size;
		oas = smmu->pa_size;
839
		if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH64) {
840
			fmt = ARM_64_LPAE_S2;
841
		} else {
842
			fmt = ARM_32_LPAE_S2;
843 844 845
			ias = min(ias, 40UL);
			oas = min(oas, 40UL);
		}
846
		if (smmu->version == ARM_SMMU_V2)
847
			smmu_domain->tlb_ops = &arm_smmu_s2_tlb_ops_v2;
848
		else
849
			smmu_domain->tlb_ops = &arm_smmu_s2_tlb_ops_v1;
850 851 852 853
		break;
	default:
		ret = -EINVAL;
		goto out_unlock;
854 855 856
	}
	ret = __arm_smmu_alloc_bitmap(smmu->context_map, start,
				      smmu->num_context_banks);
857
	if (ret < 0)
858
		goto out_unlock;
859

860
	cfg->cbndx = ret;
861
	if (smmu->version < ARM_SMMU_V2) {
862 863
		cfg->irptndx = atomic_inc_return(&smmu->irptndx);
		cfg->irptndx %= smmu->num_context_irqs;
864
	} else {
865
		cfg->irptndx = cfg->cbndx;
866 867
	}

868 869 870 871 872
	if (smmu_domain->stage == ARM_SMMU_DOMAIN_S2)
		cfg->vmid = cfg->cbndx + 1 + smmu->cavium_id_base;
	else
		cfg->asid = cfg->cbndx + smmu->cavium_id_base;

873
	pgtbl_cfg = (struct io_pgtable_cfg) {
874
		.pgsize_bitmap	= smmu->pgsize_bitmap,
875 876
		.ias		= ias,
		.oas		= oas,
877
		.tlb		= smmu_domain->tlb_ops,
878
		.iommu_dev	= smmu->dev,
879 880
	};

881 882 883
	if (smmu->features & ARM_SMMU_FEAT_COHERENT_WALK)
		pgtbl_cfg.quirks = IO_PGTABLE_QUIRK_NO_DMA;

884 885 886
	if (smmu_domain->non_strict)
		pgtbl_cfg.quirks |= IO_PGTABLE_QUIRK_NON_STRICT;

887 888 889 890 891 892 893
	smmu_domain->smmu = smmu;
	pgtbl_ops = alloc_io_pgtable_ops(fmt, &pgtbl_cfg, smmu_domain);
	if (!pgtbl_ops) {
		ret = -ENOMEM;
		goto out_clear_smmu;
	}

894 895
	/* Update the domain's page sizes to reflect the page table format */
	domain->pgsize_bitmap = pgtbl_cfg.pgsize_bitmap;
896 897
	domain->geometry.aperture_end = (1UL << ias) - 1;
	domain->geometry.force_aperture = true;
898

899 900
	/* Initialise the context bank with our page table cfg */
	arm_smmu_init_context_bank(smmu_domain, &pgtbl_cfg);
901
	arm_smmu_write_context_bank(smmu, cfg->cbndx);
902 903 904 905 906

	/*
	 * Request context fault interrupt. Do this last to avoid the
	 * handler seeing a half-initialised domain state.
	 */
907
	irq = smmu->irqs[smmu->num_global_irqs + cfg->irptndx];
908 909
	ret = devm_request_irq(smmu->dev, irq, arm_smmu_context_fault,
			       IRQF_SHARED, "arm-smmu-context-fault", domain);
910
	if (ret < 0) {
911
		dev_err(smmu->dev, "failed to request context IRQ %d (%u)\n",
912 913
			cfg->irptndx, irq);
		cfg->irptndx = INVALID_IRPTNDX;
914 915
	}

916 917 918 919
	mutex_unlock(&smmu_domain->init_mutex);

	/* Publish page table ops for map/unmap */
	smmu_domain->pgtbl_ops = pgtbl_ops;
920
	return 0;
921

922 923
out_clear_smmu:
	smmu_domain->smmu = NULL;
924
out_unlock:
925
	mutex_unlock(&smmu_domain->init_mutex);
926 927 928 929 930
	return ret;
}

static void arm_smmu_destroy_domain_context(struct iommu_domain *domain)
{
931
	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
932 933
	struct arm_smmu_device *smmu = smmu_domain->smmu;
	struct arm_smmu_cfg *cfg = &smmu_domain->cfg;
934 935
	int irq;

936
	if (!smmu || domain->type == IOMMU_DOMAIN_IDENTITY)
937 938
		return;

939 940 941 942
	/*
	 * Disable the context bank and free the page tables before freeing
	 * it.
	 */
943 944
	smmu->cbs[cfg->cbndx].cfg = NULL;
	arm_smmu_write_context_bank(smmu, cfg->cbndx);
945

946 947
	if (cfg->irptndx != INVALID_IRPTNDX) {
		irq = smmu->irqs[smmu->num_global_irqs + cfg->irptndx];
948
		devm_free_irq(smmu->dev, irq, domain);
949 950
	}

951
	free_io_pgtable_ops(smmu_domain->pgtbl_ops);
952
	__arm_smmu_free_bitmap(smmu->context_map, cfg->cbndx);
953 954
}

955
static struct iommu_domain *arm_smmu_domain_alloc(unsigned type)
956 957 958
{
	struct arm_smmu_domain *smmu_domain;

959 960 961
	if (type != IOMMU_DOMAIN_UNMANAGED &&
	    type != IOMMU_DOMAIN_DMA &&
	    type != IOMMU_DOMAIN_IDENTITY)
962
		return NULL;
963 964 965 966 967 968 969
	/*
	 * Allocate the domain and initialise some of its data structures.
	 * We can't really do anything meaningful until we've added a
	 * master.
	 */
	smmu_domain = kzalloc(sizeof(*smmu_domain), GFP_KERNEL);
	if (!smmu_domain)
970
		return NULL;
971

972 973
	if (type == IOMMU_DOMAIN_DMA && (using_legacy_binding ||
	    iommu_get_dma_cookie(&smmu_domain->domain))) {
974 975 976 977
		kfree(smmu_domain);
		return NULL;
	}

978
	mutex_init(&smmu_domain->init_mutex);
979
	spin_lock_init(&smmu_domain->cb_lock);
980 981

	return &smmu_domain->domain;
982 983
}

984
static void arm_smmu_domain_free(struct iommu_domain *domain)
985
{
986
	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
987 988 989 990 991

	/*
	 * Free the domain resources. We assume that all devices have
	 * already been detached.
	 */
992
	iommu_put_dma_cookie(domain);
993 994 995 996
	arm_smmu_destroy_domain_context(domain);
	kfree(smmu_domain);
}

997 998 999
static void arm_smmu_write_smr(struct arm_smmu_device *smmu, int idx)
{
	struct arm_smmu_smr *smr = smmu->smrs + idx;
1000
	u32 reg = smr->id << SMR_ID_SHIFT | smr->mask << SMR_MASK_SHIFT;
1001

1002
	if (!(smmu->features & ARM_SMMU_FEAT_EXIDS) && smr->valid)
1003 1004 1005 1006
		reg |= SMR_VALID;
	writel_relaxed(reg, ARM_SMMU_GR0(smmu) + ARM_SMMU_GR0_SMR(idx));
}

1007 1008 1009 1010 1011 1012 1013
static void arm_smmu_write_s2cr(struct arm_smmu_device *smmu, int idx)
{
	struct arm_smmu_s2cr *s2cr = smmu->s2crs + idx;
	u32 reg = (s2cr->type & S2CR_TYPE_MASK) << S2CR_TYPE_SHIFT |
		  (s2cr->cbndx & S2CR_CBNDX_MASK) << S2CR_CBNDX_SHIFT |
		  (s2cr->privcfg & S2CR_PRIVCFG_MASK) << S2CR_PRIVCFG_SHIFT;

1014 1015 1016
	if (smmu->features & ARM_SMMU_FEAT_EXIDS && smmu->smrs &&
	    smmu->smrs[idx].valid)
		reg |= S2CR_EXIDVALID;
1017 1018 1019 1020 1021 1022 1023 1024 1025 1026
	writel_relaxed(reg, ARM_SMMU_GR0(smmu) + ARM_SMMU_GR0_S2CR(idx));
}

static void arm_smmu_write_sme(struct arm_smmu_device *smmu, int idx)
{
	arm_smmu_write_s2cr(smmu, idx);
	if (smmu->smrs)
		arm_smmu_write_smr(smmu, idx);
}

1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054
/*
 * The width of SMR's mask field depends on sCR0_EXIDENABLE, so this function
 * should be called after sCR0 is written.
 */
static void arm_smmu_test_smr_masks(struct arm_smmu_device *smmu)
{
	void __iomem *gr0_base = ARM_SMMU_GR0(smmu);
	u32 smr;

	if (!smmu->smrs)
		return;

	/*
	 * SMR.ID bits may not be preserved if the corresponding MASK
	 * bits are set, so check each one separately. We can reject
	 * masters later if they try to claim IDs outside these masks.
	 */
	smr = smmu->streamid_mask << SMR_ID_SHIFT;
	writel_relaxed(smr, gr0_base + ARM_SMMU_GR0_SMR(0));
	smr = readl_relaxed(gr0_base + ARM_SMMU_GR0_SMR(0));
	smmu->streamid_mask = smr >> SMR_ID_SHIFT;

	smr = smmu->streamid_mask << SMR_MASK_SHIFT;
	writel_relaxed(smr, gr0_base + ARM_SMMU_GR0_SMR(0));
	smr = readl_relaxed(gr0_base + ARM_SMMU_GR0_SMR(0));
	smmu->smr_mask_mask = smr >> SMR_MASK_SHIFT;
}

1055
static int arm_smmu_find_sme(struct arm_smmu_device *smmu, u16 id, u16 mask)
1056 1057
{
	struct arm_smmu_smr *smrs = smmu->smrs;
1058
	int i, free_idx = -ENOSPC;
1059

1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072
	/* Stream indexing is blissfully easy */
	if (!smrs)
		return id;

	/* Validating SMRs is... less so */
	for (i = 0; i < smmu->num_mapping_groups; ++i) {
		if (!smrs[i].valid) {
			/*
			 * Note the first free entry we come across, which
			 * we'll claim in the end if nothing else matches.
			 */
			if (free_idx < 0)
				free_idx = i;
1073 1074
			continue;
		}
1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092
		/*
		 * If the new entry is _entirely_ matched by an existing entry,
		 * then reuse that, with the guarantee that there also cannot
		 * be any subsequent conflicting entries. In normal use we'd
		 * expect simply identical entries for this case, but there's
		 * no harm in accommodating the generalisation.
		 */
		if ((mask & smrs[i].mask) == mask &&
		    !((id ^ smrs[i].id) & ~smrs[i].mask))
			return i;
		/*
		 * If the new entry has any other overlap with an existing one,
		 * though, then there always exists at least one stream ID
		 * which would cause a conflict, and we can't allow that risk.
		 */
		if (!((id ^ smrs[i].id) & ~(smrs[i].mask | mask)))
			return -EINVAL;
	}
1093

1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110
	return free_idx;
}

static bool arm_smmu_free_sme(struct arm_smmu_device *smmu, int idx)
{
	if (--smmu->s2crs[idx].count)
		return false;

	smmu->s2crs[idx] = s2cr_init_val;
	if (smmu->smrs)
		smmu->smrs[idx].valid = false;

	return true;
}

static int arm_smmu_master_alloc_smes(struct device *dev)
{
1111
	struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
1112
	struct arm_smmu_master_cfg *cfg = fwspec->iommu_priv;
1113 1114 1115 1116 1117 1118 1119
	struct arm_smmu_device *smmu = cfg->smmu;
	struct arm_smmu_smr *smrs = smmu->smrs;
	struct iommu_group *group;
	int i, idx, ret;

	mutex_lock(&smmu->stream_map_mutex);
	/* Figure out a viable stream map entry allocation */
1120
	for_each_cfg_sme(fwspec, i, idx) {
1121 1122 1123
		u16 sid = fwspec->ids[i];
		u16 mask = fwspec->ids[i] >> SMR_MASK_SHIFT;

1124 1125 1126
		if (idx != INVALID_SMENDX) {
			ret = -EEXIST;
			goto out_err;
1127 1128
		}

1129
		ret = arm_smmu_find_sme(smmu, sid, mask);
1130 1131 1132 1133 1134
		if (ret < 0)
			goto out_err;

		idx = ret;
		if (smrs && smmu->s2crs[idx].count == 0) {
1135 1136
			smrs[idx].id = sid;
			smrs[idx].mask = mask;
1137 1138 1139 1140
			smrs[idx].valid = true;
		}
		smmu->s2crs[idx].count++;
		cfg->smendx[i] = (s16)idx;
1141 1142
	}

1143 1144 1145 1146 1147 1148 1149 1150
	group = iommu_group_get_for_dev(dev);
	if (!group)
		group = ERR_PTR(-ENOMEM);
	if (IS_ERR(group)) {
		ret = PTR_ERR(group);
		goto out_err;
	}
	iommu_group_put(group);
1151

1152
	/* It worked! Now, poke the actual hardware */
1153
	for_each_cfg_sme(fwspec, i, idx) {
1154 1155 1156
		arm_smmu_write_sme(smmu, idx);
		smmu->s2crs[idx].group = group;
	}
1157

1158
	mutex_unlock(&smmu->stream_map_mutex);
1159 1160
	return 0;

1161
out_err:
1162
	while (i--) {
1163
		arm_smmu_free_sme(smmu, cfg->smendx[i]);
1164 1165
		cfg->smendx[i] = INVALID_SMENDX;
	}
1166 1167
	mutex_unlock(&smmu->stream_map_mutex);
	return ret;
1168 1169
}

1170
static void arm_smmu_master_free_smes(struct iommu_fwspec *fwspec)
1171
{
1172 1173
	struct arm_smmu_device *smmu = fwspec_smmu(fwspec);
	struct arm_smmu_master_cfg *cfg = fwspec->iommu_priv;
1174
	int i, idx;
1175

1176
	mutex_lock(&smmu->stream_map_mutex);
1177
	for_each_cfg_sme(fwspec, i, idx) {
1178 1179
		if (arm_smmu_free_sme(smmu, idx))
			arm_smmu_write_sme(smmu, idx);
1180
		cfg->smendx[i] = INVALID_SMENDX;
1181
	}
1182
	mutex_unlock(&smmu->stream_map_mutex);
1183 1184 1185
}

static int arm_smmu_domain_add_master(struct arm_smmu_domain *smmu_domain,
1186
				      struct iommu_fwspec *fwspec)
1187
{
1188
	struct arm_smmu_device *smmu = smmu_domain->smmu;
1189 1190
	struct arm_smmu_s2cr *s2cr = smmu->s2crs;
	u8 cbndx = smmu_domain->cfg.cbndx;
1191
	enum arm_smmu_s2cr_type type;
1192
	int i, idx;
1193

1194 1195 1196 1197 1198
	if (smmu_domain->stage == ARM_SMMU_DOMAIN_BYPASS)
		type = S2CR_TYPE_BYPASS;
	else
		type = S2CR_TYPE_TRANS;

1199
	for_each_cfg_sme(fwspec, i, idx) {
1200
		if (type == s2cr[idx].type && cbndx == s2cr[idx].cbndx)
1201
			continue;
1202

1203
		s2cr[idx].type = type;
1204
		s2cr[idx].privcfg = S2CR_PRIVCFG_DEFAULT;
1205 1206
		s2cr[idx].cbndx = cbndx;
		arm_smmu_write_s2cr(smmu, idx);
1207
	}
1208
	return 0;
1209 1210
}

1211 1212
static int arm_smmu_attach_dev(struct iommu_domain *domain, struct device *dev)
{
1213
	int ret;
1214
	struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
1215
	struct arm_smmu_device *smmu;
1216
	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1217

1218
	if (!fwspec || fwspec->ops != &arm_smmu_ops) {
1219 1220 1221 1222
		dev_err(dev, "cannot attach to SMMU, is it on the same bus?\n");
		return -ENXIO;
	}

1223 1224 1225 1226 1227 1228 1229 1230 1231 1232
	/*
	 * FIXME: The arch/arm DMA API code tries to attach devices to its own
	 * domains between of_xlate() and add_device() - we have no way to cope
	 * with that, so until ARM gets converted to rely on groups and default
	 * domains, just say no (but more politely than by dereferencing NULL).
	 * This should be at least a WARN_ON once that's sorted.
	 */
	if (!fwspec->iommu_priv)
		return -ENODEV;

1233
	smmu = fwspec_smmu(fwspec);
1234
	/* Ensure that the domain is finalised */
1235
	ret = arm_smmu_init_domain_context(domain, smmu);
1236
	if (ret < 0)
1237 1238
		return ret;

1239
	/*
1240 1241
	 * Sanity check the domain. We don't support domains across
	 * different SMMUs.
1242
	 */
1243
	if (smmu_domain->smmu != smmu) {
1244 1245
		dev_err(dev,
			"cannot attach to SMMU %s whilst already attached to domain on SMMU %s\n",
1246
			dev_name(smmu_domain->smmu->dev), dev_name(smmu->dev));
1247
		return -EINVAL;
1248 1249 1250
	}

	/* Looks ok, so add the device to the domain */
1251
	return arm_smmu_domain_add_master(smmu_domain, fwspec);
1252 1253 1254
}

static int arm_smmu_map(struct iommu_domain *domain, unsigned long iova,
1255
			phys_addr_t paddr, size_t size, int prot)
1256
{
1257
	struct io_pgtable_ops *ops = to_smmu_domain(domain)->pgtbl_ops;
1258

1259
	if (!ops)
1260 1261
		return -ENODEV;

1262
	return ops->map(ops, iova, paddr, size, prot);
1263 1264 1265 1266 1267
}

static size_t arm_smmu_unmap(struct iommu_domain *domain, unsigned long iova,
			     size_t size)
{
1268
	struct io_pgtable_ops *ops = to_smmu_domain(domain)->pgtbl_ops;
1269

1270 1271 1272
	if (!ops)
		return 0;

1273
	return ops->unmap(ops, iova, size);
1274 1275
}

1276 1277 1278 1279 1280 1281 1282 1283
static void arm_smmu_flush_iotlb_all(struct iommu_domain *domain)
{
	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);

	if (smmu_domain->tlb_ops)
		smmu_domain->tlb_ops->tlb_flush_all(smmu_domain);
}

1284 1285 1286 1287 1288 1289 1290 1291
static void arm_smmu_iotlb_sync(struct iommu_domain *domain)
{
	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);

	if (smmu_domain->tlb_ops)
		smmu_domain->tlb_ops->tlb_sync(smmu_domain);
}

1292 1293 1294
static phys_addr_t arm_smmu_iova_to_phys_hard(struct iommu_domain *domain,
					      dma_addr_t iova)
{
1295
	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1296 1297 1298 1299 1300 1301 1302
	struct arm_smmu_device *smmu = smmu_domain->smmu;
	struct arm_smmu_cfg *cfg = &smmu_domain->cfg;
	struct io_pgtable_ops *ops= smmu_domain->pgtbl_ops;
	struct device *dev = smmu->dev;
	void __iomem *cb_base;
	u32 tmp;
	u64 phys;
1303
	unsigned long va, flags;
1304

1305
	cb_base = ARM_SMMU_CB(smmu, cfg->cbndx);
1306

1307
	spin_lock_irqsave(&smmu_domain->cb_lock, flags);
1308 1309 1310
	/* ATS1 registers can only be written atomically */
	va = iova & ~0xfffUL;
	if (smmu->version == ARM_SMMU_V2)
1311 1312
		smmu_write_atomic_lq(va, cb_base + ARM_SMMU_CB_ATS1PR);
	else /* Register is only 32-bit in v1 */
1313
		writel_relaxed(va, cb_base + ARM_SMMU_CB_ATS1PR);
1314 1315 1316

	if (readl_poll_timeout_atomic(cb_base + ARM_SMMU_CB_ATSR, tmp,
				      !(tmp & ATSR_ACTIVE), 5, 50)) {
1317
		spin_unlock_irqrestore(&smmu_domain->cb_lock, flags);
1318
		dev_err(dev,
1319
			"iova to phys timed out on %pad. Falling back to software table walk.\n",
1320 1321 1322 1323
			&iova);
		return ops->iova_to_phys(ops, iova);
	}

1324
	phys = readq_relaxed(cb_base + ARM_SMMU_CB_PAR);
1325
	spin_unlock_irqrestore(&smmu_domain->cb_lock, flags);
1326 1327 1328 1329 1330 1331 1332 1333 1334
	if (phys & CB_PAR_F) {
		dev_err(dev, "translation fault!\n");
		dev_err(dev, "PAR = 0x%llx\n", phys);
		return 0;
	}

	return (phys & GENMASK_ULL(39, 12)) | (iova & 0xfff);
}

1335
static phys_addr_t arm_smmu_iova_to_phys(struct iommu_domain *domain,
1336
					dma_addr_t iova)
1337
{
1338
	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1339
	struct io_pgtable_ops *ops = smmu_domain->pgtbl_ops;
1340

1341 1342 1343
	if (domain->type == IOMMU_DOMAIN_IDENTITY)
		return iova;

1344
	if (!ops)
1345
		return 0;
1346

1347
	if (smmu_domain->smmu->features & ARM_SMMU_FEAT_TRANS_OPS &&
1348 1349
			smmu_domain->stage == ARM_SMMU_DOMAIN_S1)
		return arm_smmu_iova_to_phys_hard(domain, iova);
1350

1351
	return ops->iova_to_phys(ops, iova);
1352 1353
}

1354
static bool arm_smmu_capable(enum iommu_cap cap)
1355
{
1356 1357
	switch (cap) {
	case IOMMU_CAP_CACHE_COHERENCY:
1358 1359 1360 1361 1362
		/*
		 * Return true here as the SMMU can always send out coherent
		 * requests.
		 */
		return true;
1363 1364
	case IOMMU_CAP_NOEXEC:
		return true;
1365
	default:
1366
		return false;
1367
	}
1368 1369
}

1370 1371
static int arm_smmu_match_node(struct device *dev, void *data)
{
1372
	return dev->fwnode == data;
1373 1374
}

1375 1376
static
struct arm_smmu_device *arm_smmu_get_by_fwnode(struct fwnode_handle *fwnode)
1377 1378
{
	struct device *dev = driver_find_device(&arm_smmu_driver.driver, NULL,
1379
						fwnode, arm_smmu_match_node);
1380 1381 1382 1383
	put_device(dev);
	return dev ? dev_get_drvdata(dev) : NULL;
}

1384
static int arm_smmu_add_device(struct device *dev)
1385
{
1386
	struct arm_smmu_device *smmu;
1387
	struct arm_smmu_master_cfg *cfg;
1388
	struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
1389
	int i, ret;
1390

1391 1392
	if (using_legacy_binding) {
		ret = arm_smmu_register_legacy_master(dev, &smmu);
1393 1394 1395 1396 1397 1398

		/*
		 * If dev->iommu_fwspec is initally NULL, arm_smmu_register_legacy_master()
		 * will allocate/initialise a new one. Thus we need to update fwspec for
		 * later use.
		 */
1399
		fwspec = dev_iommu_fwspec_get(dev);
1400 1401
		if (ret)
			goto out_free;
1402
	} else if (fwspec && fwspec->ops == &arm_smmu_ops) {
1403
		smmu = arm_smmu_get_by_fwnode(fwspec->iommu_fwnode);
1404 1405 1406
	} else {
		return -ENODEV;
	}
1407

1408
	ret = -EINVAL;
1409 1410
	for (i = 0; i < fwspec->num_ids; i++) {
		u16 sid = fwspec->ids[i];
1411
		u16 mask = fwspec->ids[i] >> SMR_MASK_SHIFT;
1412

1413
		if (sid & ~smmu->streamid_mask) {
1414
			dev_err(dev, "stream ID 0x%x out of range for SMMU (0x%x)\n",
1415 1416 1417 1418 1419
				sid, smmu->streamid_mask);
			goto out_free;
		}
		if (mask & ~smmu->smr_mask_mask) {
			dev_err(dev, "SMR mask 0x%x out of range for SMMU (0x%x)\n",
P
Peng Fan 已提交
1420
				mask, smmu->smr_mask_mask);
1421 1422
			goto out_free;
		}
1423
	}
1424

1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435
	ret = -ENOMEM;
	cfg = kzalloc(offsetof(struct arm_smmu_master_cfg, smendx[i]),
		      GFP_KERNEL);
	if (!cfg)
		goto out_free;

	cfg->smmu = smmu;
	fwspec->iommu_priv = cfg;
	while (i--)
		cfg->smendx[i] = INVALID_SMENDX;

1436
	ret = arm_smmu_master_alloc_smes(dev);
1437
	if (ret)
1438
		goto out_cfg_free;
1439

1440 1441
	iommu_device_link(&smmu->iommu, dev);

1442
	return 0;
1443

1444 1445
out_cfg_free:
	kfree(cfg);
1446
out_free:
1447
	iommu_fwspec_free(dev);
1448
	return ret;
1449 1450
}

1451 1452
static void arm_smmu_remove_device(struct device *dev)
{
1453
	struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
1454 1455 1456
	struct arm_smmu_master_cfg *cfg;
	struct arm_smmu_device *smmu;

1457

1458
	if (!fwspec || fwspec->ops != &arm_smmu_ops)
1459
		return;
1460

1461 1462 1463 1464
	cfg  = fwspec->iommu_priv;
	smmu = cfg->smmu;

	iommu_device_unlink(&smmu->iommu, dev);
1465
	arm_smmu_master_free_smes(fwspec);
1466
	iommu_group_remove_device(dev);
1467 1468
	kfree(fwspec->iommu_priv);
	iommu_fwspec_free(dev);
1469 1470
}

1471 1472
static struct iommu_group *arm_smmu_device_group(struct device *dev)
{
1473
	struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
1474
	struct arm_smmu_device *smmu = fwspec_smmu(fwspec);
1475 1476 1477
	struct iommu_group *group = NULL;
	int i, idx;

1478
	for_each_cfg_sme(fwspec, i, idx) {
1479 1480 1481 1482 1483 1484 1485 1486
		if (group && smmu->s2crs[idx].group &&
		    group != smmu->s2crs[idx].group)
			return ERR_PTR(-EINVAL);

		group = smmu->s2crs[idx].group;
	}

	if (group)
1487
		return iommu_group_ref_get(group);
1488 1489 1490

	if (dev_is_pci(dev))
		group = pci_device_group(dev);
1491 1492
	else if (dev_is_fsl_mc(dev))
		group = fsl_mc_device_group(dev);
1493 1494 1495 1496 1497 1498
	else
		group = generic_device_group(dev);

	return group;
}

1499 1500 1501
static int arm_smmu_domain_get_attr(struct iommu_domain *domain,
				    enum iommu_attr attr, void *data)
{
1502
	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1503

1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522
	switch(domain->type) {
	case IOMMU_DOMAIN_UNMANAGED:
		switch (attr) {
		case DOMAIN_ATTR_NESTING:
			*(int *)data = (smmu_domain->stage == ARM_SMMU_DOMAIN_NESTED);
			return 0;
		default:
			return -ENODEV;
		}
		break;
	case IOMMU_DOMAIN_DMA:
		switch (attr) {
		case DOMAIN_ATTR_DMA_USE_FLUSH_QUEUE:
			*(int *)data = smmu_domain->non_strict;
			return 0;
		default:
			return -ENODEV;
		}
		break;
1523
	default:
1524
		return -EINVAL;
1525 1526 1527 1528 1529 1530
	}
}

static int arm_smmu_domain_set_attr(struct iommu_domain *domain,
				    enum iommu_attr attr, void *data)
{
1531
	int ret = 0;
1532
	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1533

1534 1535
	mutex_lock(&smmu_domain->init_mutex);

1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560
	switch(domain->type) {
	case IOMMU_DOMAIN_UNMANAGED:
		switch (attr) {
		case DOMAIN_ATTR_NESTING:
			if (smmu_domain->smmu) {
				ret = -EPERM;
				goto out_unlock;
			}

			if (*(int *)data)
				smmu_domain->stage = ARM_SMMU_DOMAIN_NESTED;
			else
				smmu_domain->stage = ARM_SMMU_DOMAIN_S1;
			break;
		default:
			ret = -ENODEV;
		}
		break;
	case IOMMU_DOMAIN_DMA:
		switch (attr) {
		case DOMAIN_ATTR_DMA_USE_FLUSH_QUEUE:
			smmu_domain->non_strict = *(int *)data;
			break;
		default:
			ret = -ENODEV;
1561 1562
		}
		break;
1563
	default:
1564
		ret = -EINVAL;
1565
	}
1566 1567 1568
out_unlock:
	mutex_unlock(&smmu_domain->init_mutex);
	return ret;
1569 1570
}

1571 1572
static int arm_smmu_of_xlate(struct device *dev, struct of_phandle_args *args)
{
1573
	u32 mask, fwid = 0;
1574 1575 1576 1577 1578 1579

	if (args->args_count > 0)
		fwid |= (u16)args->args[0];

	if (args->args_count > 1)
		fwid |= (u16)args->args[1] << SMR_MASK_SHIFT;
1580 1581
	else if (!of_property_read_u32(args->np, "stream-match-mask", &mask))
		fwid |= (u16)mask << SMR_MASK_SHIFT;
1582 1583 1584 1585

	return iommu_fwspec_add_ids(dev, &fwid, 1);
}

1586 1587 1588 1589 1590 1591 1592
static void arm_smmu_get_resv_regions(struct device *dev,
				      struct list_head *head)
{
	struct iommu_resv_region *region;
	int prot = IOMMU_WRITE | IOMMU_NOEXEC | IOMMU_MMIO;

	region = iommu_alloc_resv_region(MSI_IOVA_BASE, MSI_IOVA_LENGTH,
1593
					 prot, IOMMU_RESV_SW_MSI);
1594 1595 1596 1597
	if (!region)
		return;

	list_add_tail(&region->list, head);
1598 1599

	iommu_dma_get_resv_regions(dev, head);
1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610
}

static void arm_smmu_put_resv_regions(struct device *dev,
				      struct list_head *head)
{
	struct iommu_resv_region *entry, *next;

	list_for_each_entry_safe(entry, next, head, list)
		kfree(entry);
}

1611
static struct iommu_ops arm_smmu_ops = {
1612
	.capable		= arm_smmu_capable,
1613 1614
	.domain_alloc		= arm_smmu_domain_alloc,
	.domain_free		= arm_smmu_domain_free,
1615 1616 1617
	.attach_dev		= arm_smmu_attach_dev,
	.map			= arm_smmu_map,
	.unmap			= arm_smmu_unmap,
1618
	.flush_iotlb_all	= arm_smmu_flush_iotlb_all,
1619
	.iotlb_sync		= arm_smmu_iotlb_sync,
1620 1621 1622
	.iova_to_phys		= arm_smmu_iova_to_phys,
	.add_device		= arm_smmu_add_device,
	.remove_device		= arm_smmu_remove_device,
1623
	.device_group		= arm_smmu_device_group,
1624 1625
	.domain_get_attr	= arm_smmu_domain_get_attr,
	.domain_set_attr	= arm_smmu_domain_set_attr,
1626
	.of_xlate		= arm_smmu_of_xlate,
1627 1628
	.get_resv_regions	= arm_smmu_get_resv_regions,
	.put_resv_regions	= arm_smmu_put_resv_regions,
1629
	.pgsize_bitmap		= -1UL, /* Restricted during device attach */
1630 1631 1632 1633 1634
};

static void arm_smmu_device_reset(struct arm_smmu_device *smmu)
{
	void __iomem *gr0_base = ARM_SMMU_GR0(smmu);
1635
	int i;
1636
	u32 reg, major;
1637

1638 1639 1640
	/* clear global FSR */
	reg = readl_relaxed(ARM_SMMU_GR0_NS(smmu) + ARM_SMMU_GR0_sGFSR);
	writel(reg, ARM_SMMU_GR0_NS(smmu) + ARM_SMMU_GR0_sGFSR);
1641

1642 1643 1644 1645
	/*
	 * Reset stream mapping groups: Initial values mark all SMRn as
	 * invalid and all S2CRn as bypass unless overridden.
	 */
1646 1647
	for (i = 0; i < smmu->num_mapping_groups; ++i)
		arm_smmu_write_sme(smmu, i);
1648

1649 1650 1651 1652 1653 1654 1655 1656
	if (smmu->model == ARM_MMU500) {
		/*
		 * Before clearing ARM_MMU500_ACTLR_CPRE, need to
		 * clear CACHE_LOCK bit of ACR first. And, CACHE_LOCK
		 * bit is only present in MMU-500r2 onwards.
		 */
		reg = readl_relaxed(gr0_base + ARM_SMMU_GR0_ID7);
		major = (reg >> ID7_MAJOR_SHIFT) & ID7_MAJOR_MASK;
1657
		reg = readl_relaxed(gr0_base + ARM_SMMU_GR0_sACR);
1658 1659 1660 1661 1662 1663
		if (major >= 2)
			reg &= ~ARM_MMU500_ACR_CACHE_LOCK;
		/*
		 * Allow unmatched Stream IDs to allocate bypass
		 * TLB entries for reduced latency.
		 */
1664
		reg |= ARM_MMU500_ACR_SMTNMB_TLBEN | ARM_MMU500_ACR_S2CRB_TLBEN;
1665 1666 1667
		writel_relaxed(reg, gr0_base + ARM_SMMU_GR0_sACR);
	}

1668 1669
	/* Make sure all context banks are disabled and clear CB_FSR  */
	for (i = 0; i < smmu->num_context_banks; ++i) {
1670 1671 1672
		void __iomem *cb_base = ARM_SMMU_CB(smmu, i);

		arm_smmu_write_context_bank(smmu, i);
1673
		writel_relaxed(FSR_FAULT, cb_base + ARM_SMMU_CB_FSR);
1674 1675 1676 1677 1678 1679 1680 1681 1682
		/*
		 * Disable MMU-500's not-particularly-beneficial next-page
		 * prefetcher for the sake of errata #841119 and #826419.
		 */
		if (smmu->model == ARM_MMU500) {
			reg = readl_relaxed(cb_base + ARM_SMMU_CB_ACTLR);
			reg &= ~ARM_MMU500_ACTLR_CPRE;
			writel_relaxed(reg, cb_base + ARM_SMMU_CB_ACTLR);
		}
1683
	}
1684

1685 1686 1687 1688
	/* Invalidate the TLB, just in case */
	writel_relaxed(0, gr0_base + ARM_SMMU_GR0_TLBIALLH);
	writel_relaxed(0, gr0_base + ARM_SMMU_GR0_TLBIALLNSNH);

1689
	reg = readl_relaxed(ARM_SMMU_GR0_NS(smmu) + ARM_SMMU_GR0_sCR0);
1690

1691
	/* Enable fault reporting */
1692
	reg |= (sCR0_GFRE | sCR0_GFIE | sCR0_GCFGFRE | sCR0_GCFGFIE);
1693 1694

	/* Disable TLB broadcasting. */
1695
	reg |= (sCR0_VMIDPNE | sCR0_PTM);
1696

1697 1698 1699 1700 1701 1702
	/* Enable client access, handling unmatched streams as appropriate */
	reg &= ~sCR0_CLIENTPD;
	if (disable_bypass)
		reg |= sCR0_USFCFG;
	else
		reg &= ~sCR0_USFCFG;
1703 1704

	/* Disable forced broadcasting */
1705
	reg &= ~sCR0_FB;
1706 1707

	/* Don't upgrade barriers */
1708
	reg &= ~(sCR0_BSU_MASK << sCR0_BSU_SHIFT);
1709

1710 1711 1712
	if (smmu->features & ARM_SMMU_FEAT_VMID16)
		reg |= sCR0_VMID16EN;

1713 1714 1715
	if (smmu->features & ARM_SMMU_FEAT_EXIDS)
		reg |= sCR0_EXIDENABLE;

1716
	/* Push the button */
1717
	arm_smmu_tlb_sync_global(smmu);
1718
	writel(reg, ARM_SMMU_GR0_NS(smmu) + ARM_SMMU_GR0_sCR0);
1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744
}

static int arm_smmu_id_size_to_bits(int size)
{
	switch (size) {
	case 0:
		return 32;
	case 1:
		return 36;
	case 2:
		return 40;
	case 3:
		return 42;
	case 4:
		return 44;
	case 5:
	default:
		return 48;
	}
}

static int arm_smmu_device_cfg_probe(struct arm_smmu_device *smmu)
{
	unsigned long size;
	void __iomem *gr0_base = ARM_SMMU_GR0(smmu);
	u32 id;
1745
	bool cttw_reg, cttw_fw = smmu->features & ARM_SMMU_FEAT_COHERENT_WALK;
1746
	int i;
1747 1748

	dev_notice(smmu->dev, "probing hardware configuration...\n");
1749 1750
	dev_notice(smmu->dev, "SMMUv%d with:\n",
			smmu->version == ARM_SMMU_V2 ? 2 : 1);
1751 1752 1753

	/* ID0 */
	id = readl_relaxed(gr0_base + ARM_SMMU_GR0_ID0);
1754 1755 1756 1757 1758 1759 1760

	/* Restrict available stages based on module parameter */
	if (force_stage == 1)
		id &= ~(ID0_S2TS | ID0_NTS);
	else if (force_stage == 2)
		id &= ~(ID0_S1TS | ID0_NTS);

1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776
	if (id & ID0_S1TS) {
		smmu->features |= ARM_SMMU_FEAT_TRANS_S1;
		dev_notice(smmu->dev, "\tstage 1 translation\n");
	}

	if (id & ID0_S2TS) {
		smmu->features |= ARM_SMMU_FEAT_TRANS_S2;
		dev_notice(smmu->dev, "\tstage 2 translation\n");
	}

	if (id & ID0_NTS) {
		smmu->features |= ARM_SMMU_FEAT_TRANS_NESTED;
		dev_notice(smmu->dev, "\tnested translation\n");
	}

	if (!(smmu->features &
1777
		(ARM_SMMU_FEAT_TRANS_S1 | ARM_SMMU_FEAT_TRANS_S2))) {
1778 1779 1780 1781
		dev_err(smmu->dev, "\tno translation support!\n");
		return -ENODEV;
	}

1782 1783
	if ((id & ID0_S1TS) &&
		((smmu->version < ARM_SMMU_V2) || !(id & ID0_ATOSNS))) {
1784 1785 1786 1787
		smmu->features |= ARM_SMMU_FEAT_TRANS_OPS;
		dev_notice(smmu->dev, "\taddress translation ops\n");
	}

1788 1789
	/*
	 * In order for DMA API calls to work properly, we must defer to what
1790
	 * the FW says about coherency, regardless of what the hardware claims.
1791 1792 1793 1794
	 * Fortunately, this also opens up a workaround for systems where the
	 * ID register value has ended up configured incorrectly.
	 */
	cttw_reg = !!(id & ID0_CTTW);
1795
	if (cttw_fw || cttw_reg)
1796
		dev_notice(smmu->dev, "\t%scoherent table walk\n",
1797 1798
			   cttw_fw ? "" : "non-");
	if (cttw_fw != cttw_reg)
1799
		dev_notice(smmu->dev,
1800
			   "\t(IDR0.CTTW overridden by FW configuration)\n");
1801

1802
	/* Max. number of entries we have for stream matching/indexing */
1803 1804 1805 1806 1807 1808
	if (smmu->version == ARM_SMMU_V2 && id & ID0_EXIDS) {
		smmu->features |= ARM_SMMU_FEAT_EXIDS;
		size = 1 << 16;
	} else {
		size = 1 << ((id >> ID0_NUMSIDB_SHIFT) & ID0_NUMSIDB_MASK);
	}
1809
	smmu->streamid_mask = size - 1;
1810 1811
	if (id & ID0_SMS) {
		smmu->features |= ARM_SMMU_FEAT_STREAM_MATCH;
1812 1813
		size = (id >> ID0_NUMSMRG_SHIFT) & ID0_NUMSMRG_MASK;
		if (size == 0) {
1814 1815 1816 1817 1818
			dev_err(smmu->dev,
				"stream-matching supported, but no SMRs present!\n");
			return -ENODEV;
		}

1819 1820 1821 1822 1823 1824
		/* Zero-initialised to mark as invalid */
		smmu->smrs = devm_kcalloc(smmu->dev, size, sizeof(*smmu->smrs),
					  GFP_KERNEL);
		if (!smmu->smrs)
			return -ENOMEM;

1825
		dev_notice(smmu->dev,
1826
			   "\tstream matching with %lu register groups", size);
1827
	}
1828 1829 1830 1831 1832 1833 1834 1835
	/* s2cr->type == 0 means translation, so initialise explicitly */
	smmu->s2crs = devm_kmalloc_array(smmu->dev, size, sizeof(*smmu->s2crs),
					 GFP_KERNEL);
	if (!smmu->s2crs)
		return -ENOMEM;
	for (i = 0; i < size; i++)
		smmu->s2crs[i] = s2cr_init_val;

1836
	smmu->num_mapping_groups = size;
1837
	mutex_init(&smmu->stream_map_mutex);
1838
	spin_lock_init(&smmu->global_sync_lock);
1839

1840 1841 1842 1843 1844 1845
	if (smmu->version < ARM_SMMU_V2 || !(id & ID0_PTFS_NO_AARCH32)) {
		smmu->features |= ARM_SMMU_FEAT_FMT_AARCH32_L;
		if (!(id & ID0_PTFS_NO_AARCH32S))
			smmu->features |= ARM_SMMU_FEAT_FMT_AARCH32_S;
	}

1846 1847
	/* ID1 */
	id = readl_relaxed(gr0_base + ARM_SMMU_GR0_ID1);
1848
	smmu->pgshift = (id & ID1_PAGESIZE) ? 16 : 12;
1849

1850
	/* Check for size mismatch of SMMU address space from mapped region */
1851
	size = 1 << (((id >> ID1_NUMPAGENDXB_SHIFT) & ID1_NUMPAGENDXB_MASK) + 1);
1852 1853
	size <<= smmu->pgshift;
	if (smmu->cb_base != gr0_base + size)
1854
		dev_warn(smmu->dev,
1855 1856
			"SMMU address space size (0x%lx) differs from mapped region size (0x%tx)!\n",
			size * 2, (smmu->cb_base - gr0_base) * 2);
1857

1858
	smmu->num_s2_context_banks = (id >> ID1_NUMS2CB_SHIFT) & ID1_NUMS2CB_MASK;
1859 1860 1861 1862 1863 1864 1865
	smmu->num_context_banks = (id >> ID1_NUMCB_SHIFT) & ID1_NUMCB_MASK;
	if (smmu->num_s2_context_banks > smmu->num_context_banks) {
		dev_err(smmu->dev, "impossible number of S2 context banks!\n");
		return -ENODEV;
	}
	dev_notice(smmu->dev, "\t%u context banks (%u stage-2 only)\n",
		   smmu->num_context_banks, smmu->num_s2_context_banks);
1866 1867 1868 1869 1870 1871 1872 1873 1874 1875
	/*
	 * Cavium CN88xx erratum #27704.
	 * Ensure ASID and VMID allocation is unique across all SMMUs in
	 * the system.
	 */
	if (smmu->model == CAVIUM_SMMUV2) {
		smmu->cavium_id_base =
			atomic_add_return(smmu->num_context_banks,
					  &cavium_smmu_context_count);
		smmu->cavium_id_base -= smmu->num_context_banks;
1876
		dev_notice(smmu->dev, "\tenabling workaround for Cavium erratum 27704\n");
1877
	}
1878 1879 1880 1881
	smmu->cbs = devm_kcalloc(smmu->dev, smmu->num_context_banks,
				 sizeof(*smmu->cbs), GFP_KERNEL);
	if (!smmu->cbs)
		return -ENOMEM;
1882 1883 1884 1885

	/* ID2 */
	id = readl_relaxed(gr0_base + ARM_SMMU_GR0_ID2);
	size = arm_smmu_id_size_to_bits((id >> ID2_IAS_SHIFT) & ID2_IAS_MASK);
1886
	smmu->ipa_size = size;
1887

1888
	/* The output mask is also applied for bypass */
1889
	size = arm_smmu_id_size_to_bits((id >> ID2_OAS_SHIFT) & ID2_OAS_MASK);
1890
	smmu->pa_size = size;
1891

1892 1893 1894
	if (id & ID2_VMID16)
		smmu->features |= ARM_SMMU_FEAT_VMID16;

1895 1896 1897 1898 1899 1900 1901 1902 1903
	/*
	 * What the page table walker can address actually depends on which
	 * descriptor format is in use, but since a) we don't know that yet,
	 * and b) it can vary per context bank, this will have to do...
	 */
	if (dma_set_mask_and_coherent(smmu->dev, DMA_BIT_MASK(size)))
		dev_warn(smmu->dev,
			 "failed to set DMA mask for table walker\n");

1904
	if (smmu->version < ARM_SMMU_V2) {
1905
		smmu->va_size = smmu->ipa_size;
1906 1907
		if (smmu->version == ARM_SMMU_V1_64K)
			smmu->features |= ARM_SMMU_FEAT_FMT_AARCH64_64K;
1908 1909
	} else {
		size = (id >> ID2_UBS_SHIFT) & ID2_UBS_MASK;
1910 1911
		smmu->va_size = arm_smmu_id_size_to_bits(size);
		if (id & ID2_PTFS_4K)
1912
			smmu->features |= ARM_SMMU_FEAT_FMT_AARCH64_4K;
1913
		if (id & ID2_PTFS_16K)
1914
			smmu->features |= ARM_SMMU_FEAT_FMT_AARCH64_16K;
1915
		if (id & ID2_PTFS_64K)
1916
			smmu->features |= ARM_SMMU_FEAT_FMT_AARCH64_64K;
1917 1918
	}

1919 1920
	/* Now we've corralled the various formats, what'll it do? */
	if (smmu->features & ARM_SMMU_FEAT_FMT_AARCH32_S)
1921
		smmu->pgsize_bitmap |= SZ_4K | SZ_64K | SZ_1M | SZ_16M;
1922 1923
	if (smmu->features &
	    (ARM_SMMU_FEAT_FMT_AARCH32_L | ARM_SMMU_FEAT_FMT_AARCH64_4K))
1924
		smmu->pgsize_bitmap |= SZ_4K | SZ_2M | SZ_1G;
1925
	if (smmu->features & ARM_SMMU_FEAT_FMT_AARCH64_16K)
1926
		smmu->pgsize_bitmap |= SZ_16K | SZ_32M;
1927
	if (smmu->features & ARM_SMMU_FEAT_FMT_AARCH64_64K)
1928 1929 1930 1931 1932 1933 1934 1935
		smmu->pgsize_bitmap |= SZ_64K | SZ_512M;

	if (arm_smmu_ops.pgsize_bitmap == -1UL)
		arm_smmu_ops.pgsize_bitmap = smmu->pgsize_bitmap;
	else
		arm_smmu_ops.pgsize_bitmap |= smmu->pgsize_bitmap;
	dev_notice(smmu->dev, "\tSupported page sizes: 0x%08lx\n",
		   smmu->pgsize_bitmap);
1936

1937

1938 1939
	if (smmu->features & ARM_SMMU_FEAT_TRANS_S1)
		dev_notice(smmu->dev, "\tStage-1: %lu-bit VA -> %lu-bit IPA\n",
1940
			   smmu->va_size, smmu->ipa_size);
1941 1942 1943

	if (smmu->features & ARM_SMMU_FEAT_TRANS_S2)
		dev_notice(smmu->dev, "\tStage-2: %lu-bit IPA -> %lu-bit PA\n",
1944
			   smmu->ipa_size, smmu->pa_size);
1945

1946 1947 1948
	return 0;
}

1949 1950 1951 1952 1953 1954 1955 1956 1957 1958
struct arm_smmu_match_data {
	enum arm_smmu_arch_version version;
	enum arm_smmu_implementation model;
};

#define ARM_SMMU_MATCH_DATA(name, ver, imp)	\
static struct arm_smmu_match_data name = { .version = ver, .model = imp }

ARM_SMMU_MATCH_DATA(smmu_generic_v1, ARM_SMMU_V1, GENERIC_SMMU);
ARM_SMMU_MATCH_DATA(smmu_generic_v2, ARM_SMMU_V2, GENERIC_SMMU);
1959
ARM_SMMU_MATCH_DATA(arm_mmu401, ARM_SMMU_V1_64K, GENERIC_SMMU);
1960
ARM_SMMU_MATCH_DATA(arm_mmu500, ARM_SMMU_V2, ARM_MMU500);
1961
ARM_SMMU_MATCH_DATA(cavium_smmuv2, ARM_SMMU_V2, CAVIUM_SMMUV2);
1962

1963
static const struct of_device_id arm_smmu_of_match[] = {
1964 1965 1966
	{ .compatible = "arm,smmu-v1", .data = &smmu_generic_v1 },
	{ .compatible = "arm,smmu-v2", .data = &smmu_generic_v2 },
	{ .compatible = "arm,mmu-400", .data = &smmu_generic_v1 },
1967
	{ .compatible = "arm,mmu-401", .data = &arm_mmu401 },
1968
	{ .compatible = "arm,mmu-500", .data = &arm_mmu500 },
1969
	{ .compatible = "cavium,smmu-v2", .data = &cavium_smmuv2 },
1970 1971 1972
	{ },
};

1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983
#ifdef CONFIG_ACPI
static int acpi_smmu_get_data(u32 model, struct arm_smmu_device *smmu)
{
	int ret = 0;

	switch (model) {
	case ACPI_IORT_SMMU_V1:
	case ACPI_IORT_SMMU_CORELINK_MMU400:
		smmu->version = ARM_SMMU_V1;
		smmu->model = GENERIC_SMMU;
		break;
1984 1985 1986 1987
	case ACPI_IORT_SMMU_CORELINK_MMU401:
		smmu->version = ARM_SMMU_V1_64K;
		smmu->model = GENERIC_SMMU;
		break;
1988 1989 1990 1991 1992 1993 1994 1995
	case ACPI_IORT_SMMU_V2:
		smmu->version = ARM_SMMU_V2;
		smmu->model = GENERIC_SMMU;
		break;
	case ACPI_IORT_SMMU_CORELINK_MMU500:
		smmu->version = ARM_SMMU_V2;
		smmu->model = ARM_MMU500;
		break;
1996 1997 1998 1999
	case ACPI_IORT_SMMU_CAVIUM_THUNDERX:
		smmu->version = ARM_SMMU_V2;
		smmu->model = CAVIUM_SMMUV2;
		break;
2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038
	default:
		ret = -ENODEV;
	}

	return ret;
}

static int arm_smmu_device_acpi_probe(struct platform_device *pdev,
				      struct arm_smmu_device *smmu)
{
	struct device *dev = smmu->dev;
	struct acpi_iort_node *node =
		*(struct acpi_iort_node **)dev_get_platdata(dev);
	struct acpi_iort_smmu *iort_smmu;
	int ret;

	/* Retrieve SMMU1/2 specific data */
	iort_smmu = (struct acpi_iort_smmu *)node->node_data;

	ret = acpi_smmu_get_data(iort_smmu->model, smmu);
	if (ret < 0)
		return ret;

	/* Ignore the configuration access interrupt */
	smmu->num_global_irqs = 1;

	if (iort_smmu->flags & ACPI_IORT_SMMU_COHERENT_WALK)
		smmu->features |= ARM_SMMU_FEAT_COHERENT_WALK;

	return 0;
}
#else
static inline int arm_smmu_device_acpi_probe(struct platform_device *pdev,
					     struct arm_smmu_device *smmu)
{
	return -ENODEV;
}
#endif

2039 2040
static int arm_smmu_device_dt_probe(struct platform_device *pdev,
				    struct arm_smmu_device *smmu)
2041
{
2042
	const struct arm_smmu_match_data *data;
2043
	struct device *dev = &pdev->dev;
2044 2045
	bool legacy_binding;

2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057
	if (of_property_read_u32(dev->of_node, "#global-interrupts",
				 &smmu->num_global_irqs)) {
		dev_err(dev, "missing #global-interrupts property\n");
		return -ENODEV;
	}

	data = of_device_get_match_data(dev);
	smmu->version = data->version;
	smmu->model = data->model;

	parse_driver_options(smmu);

2058 2059 2060 2061 2062 2063 2064 2065 2066 2067 2068
	legacy_binding = of_find_property(dev->of_node, "mmu-masters", NULL);
	if (legacy_binding && !using_generic_binding) {
		if (!using_legacy_binding)
			pr_notice("deprecated \"mmu-masters\" DT property in use; DMA API support unavailable\n");
		using_legacy_binding = true;
	} else if (!legacy_binding && !using_legacy_binding) {
		using_generic_binding = true;
	} else {
		dev_err(dev, "not probing due to mismatched DT properties\n");
		return -ENODEV;
	}
2069

2070 2071 2072 2073 2074 2075
	if (of_dma_is_coherent(dev->of_node))
		smmu->features |= ARM_SMMU_FEAT_COHERENT_WALK;

	return 0;
}

2076 2077 2078 2079 2080 2081 2082 2083 2084 2085 2086 2087 2088 2089 2090
static void arm_smmu_bus_init(void)
{
	/* Oh, for a proper bus abstraction */
	if (!iommu_present(&platform_bus_type))
		bus_set_iommu(&platform_bus_type, &arm_smmu_ops);
#ifdef CONFIG_ARM_AMBA
	if (!iommu_present(&amba_bustype))
		bus_set_iommu(&amba_bustype, &arm_smmu_ops);
#endif
#ifdef CONFIG_PCI
	if (!iommu_present(&pci_bus_type)) {
		pci_request_acs();
		bus_set_iommu(&pci_bus_type, &arm_smmu_ops);
	}
#endif
2091 2092 2093 2094
#ifdef CONFIG_FSL_MC_BUS
	if (!iommu_present(&fsl_mc_bus_type))
		bus_set_iommu(&fsl_mc_bus_type, &arm_smmu_ops);
#endif
2095 2096
}

2097 2098 2099
static int arm_smmu_device_probe(struct platform_device *pdev)
{
	struct resource *res;
2100
	resource_size_t ioaddr;
2101 2102 2103 2104
	struct arm_smmu_device *smmu;
	struct device *dev = &pdev->dev;
	int num_irqs, i, err;

2105 2106 2107 2108 2109 2110 2111
	smmu = devm_kzalloc(dev, sizeof(*smmu), GFP_KERNEL);
	if (!smmu) {
		dev_err(dev, "failed to allocate arm_smmu_device\n");
		return -ENOMEM;
	}
	smmu->dev = dev;

2112 2113 2114 2115 2116
	if (dev->of_node)
		err = arm_smmu_device_dt_probe(pdev, smmu);
	else
		err = arm_smmu_device_acpi_probe(pdev, smmu);

2117 2118
	if (err)
		return err;
2119

2120
	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
2121
	ioaddr = res->start;
2122 2123 2124
	smmu->base = devm_ioremap_resource(dev, res);
	if (IS_ERR(smmu->base))
		return PTR_ERR(smmu->base);
2125
	smmu->cb_base = smmu->base + resource_size(res) / 2;
2126 2127 2128 2129 2130 2131 2132 2133

	num_irqs = 0;
	while ((res = platform_get_resource(pdev, IORESOURCE_IRQ, num_irqs))) {
		num_irqs++;
		if (num_irqs > smmu->num_global_irqs)
			smmu->num_context_irqs++;
	}

2134 2135 2136 2137
	if (!smmu->num_context_irqs) {
		dev_err(dev, "found %d interrupts but expected at least %d\n",
			num_irqs, smmu->num_global_irqs + 1);
		return -ENODEV;
2138 2139
	}

2140
	smmu->irqs = devm_kcalloc(dev, num_irqs, sizeof(*smmu->irqs),
2141 2142 2143 2144 2145 2146 2147 2148
				  GFP_KERNEL);
	if (!smmu->irqs) {
		dev_err(dev, "failed to allocate %d irqs\n", num_irqs);
		return -ENOMEM;
	}

	for (i = 0; i < num_irqs; ++i) {
		int irq = platform_get_irq(pdev, i);
2149

2150 2151 2152 2153 2154 2155 2156
		if (irq < 0) {
			dev_err(dev, "failed to get irq index %d\n", i);
			return -ENODEV;
		}
		smmu->irqs[i] = irq;
	}

2157 2158 2159 2160
	err = arm_smmu_device_cfg_probe(smmu);
	if (err)
		return err;

2161 2162 2163 2164 2165 2166 2167 2168 2169 2170
	if (smmu->version == ARM_SMMU_V2) {
		if (smmu->num_context_banks > smmu->num_context_irqs) {
			dev_err(dev,
			      "found only %d context irq(s) but %d required\n",
			      smmu->num_context_irqs, smmu->num_context_banks);
			return -ENODEV;
		}

		/* Ignore superfluous interrupts */
		smmu->num_context_irqs = smmu->num_context_banks;
2171 2172 2173
	}

	for (i = 0; i < smmu->num_global_irqs; ++i) {
2174 2175 2176 2177 2178
		err = devm_request_irq(smmu->dev, smmu->irqs[i],
				       arm_smmu_global_fault,
				       IRQF_SHARED,
				       "arm-smmu global fault",
				       smmu);
2179 2180 2181
		if (err) {
			dev_err(dev, "failed to request global IRQ %d (%u)\n",
				i, smmu->irqs[i]);
2182
			return err;
2183 2184 2185
		}
	}

2186 2187 2188 2189 2190 2191 2192 2193 2194 2195 2196 2197 2198 2199 2200 2201
	err = iommu_device_sysfs_add(&smmu->iommu, smmu->dev, NULL,
				     "smmu.%pa", &ioaddr);
	if (err) {
		dev_err(dev, "Failed to register iommu in sysfs\n");
		return err;
	}

	iommu_device_set_ops(&smmu->iommu, &arm_smmu_ops);
	iommu_device_set_fwnode(&smmu->iommu, dev->fwnode);

	err = iommu_device_register(&smmu->iommu);
	if (err) {
		dev_err(dev, "Failed to register iommu\n");
		return err;
	}

2202
	platform_set_drvdata(pdev, smmu);
2203
	arm_smmu_device_reset(smmu);
2204
	arm_smmu_test_smr_masks(smmu);
2205

2206 2207 2208 2209 2210 2211 2212 2213
	/*
	 * For ACPI and generic DT bindings, an SMMU will be probed before
	 * any device which might need it, so we want the bus ops in place
	 * ready to handle default domain setup as soon as any SMMU exists.
	 */
	if (!using_legacy_binding)
		arm_smmu_bus_init();

2214 2215 2216
	return 0;
}

2217 2218 2219 2220 2221 2222 2223 2224 2225 2226
/*
 * With the legacy DT binding in play, though, we have no guarantees about
 * probe order, but then we're also not doing default domains, so we can
 * delay setting bus ops until we're sure every possible SMMU is ready,
 * and that way ensure that no add_device() calls get missed.
 */
static int arm_smmu_legacy_bus_init(void)
{
	if (using_legacy_binding)
		arm_smmu_bus_init();
2227 2228
	return 0;
}
2229
device_initcall_sync(arm_smmu_legacy_bus_init);
2230

2231
static void arm_smmu_device_shutdown(struct platform_device *pdev)
2232
{
2233
	struct arm_smmu_device *smmu = platform_get_drvdata(pdev);
2234 2235

	if (!smmu)
2236
		return;
2237

2238
	if (!bitmap_empty(smmu->context_map, ARM_SMMU_MAX_CBS))
2239
		dev_err(&pdev->dev, "removing device with active domains!\n");
2240 2241

	/* Turn the thing off */
2242
	writel(sCR0_CLIENTPD, ARM_SMMU_GR0_NS(smmu) + ARM_SMMU_GR0_sCR0);
2243 2244
}

2245 2246 2247 2248 2249 2250 2251 2252 2253 2254
static int __maybe_unused arm_smmu_pm_resume(struct device *dev)
{
	struct arm_smmu_device *smmu = dev_get_drvdata(dev);

	arm_smmu_device_reset(smmu);
	return 0;
}

static SIMPLE_DEV_PM_OPS(arm_smmu_pm_ops, NULL, arm_smmu_pm_resume);

2255 2256
static struct platform_driver arm_smmu_driver = {
	.driver	= {
2257 2258 2259 2260
		.name			= "arm-smmu",
		.of_match_table		= of_match_ptr(arm_smmu_of_match),
		.pm			= &arm_smmu_pm_ops,
		.suppress_bind_attrs	= true,
2261
	},
2262
	.probe	= arm_smmu_device_probe,
2263
	.shutdown = arm_smmu_device_shutdown,
2264
};
2265
builtin_platform_driver(arm_smmu_driver);