arm-smmu.c 53.5 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30
/*
 * IOMMU API for ARM architected SMMU implementations.
 *
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License version 2 as
 * published by the Free Software Foundation.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, write to the Free Software
 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
 *
 * Copyright (C) 2013 ARM Limited
 *
 * Author: Will Deacon <will.deacon@arm.com>
 *
 * This driver currently supports:
 *	- SMMUv1 and v2 implementations
 *	- Stream-matching and stream-indexing
 *	- v7/v8 long-descriptor format
 *	- Non-secure access to the SMMU
 *	- Context fault reporting
 */

#define pr_fmt(fmt) "arm-smmu: " fmt

31
#include <linux/atomic.h>
32
#include <linux/delay.h>
33
#include <linux/dma-iommu.h>
34 35 36 37
#include <linux/dma-mapping.h>
#include <linux/err.h>
#include <linux/interrupt.h>
#include <linux/io.h>
38
#include <linux/io-64-nonatomic-hi-lo.h>
39
#include <linux/iommu.h>
40
#include <linux/iopoll.h>
41 42
#include <linux/module.h>
#include <linux/of.h>
43
#include <linux/of_address.h>
44
#include <linux/of_device.h>
45
#include <linux/pci.h>
46 47 48 49 50 51
#include <linux/platform_device.h>
#include <linux/slab.h>
#include <linux/spinlock.h>

#include <linux/amba/bus.h>

52
#include "io-pgtable.h"
53 54

/* Maximum number of stream IDs assigned to a single device */
55
#define MAX_MASTER_STREAMIDS		128
56 57 58 59 60 61

/* Maximum number of context banks per SMMU */
#define ARM_SMMU_MAX_CBS		128

/* SMMU global address space */
#define ARM_SMMU_GR0(smmu)		((smmu)->base)
62
#define ARM_SMMU_GR1(smmu)		((smmu)->base + (1 << (smmu)->pgshift))
63

64 65 66 67 68 69 70 71 72 73
/*
 * SMMU global address space with conditional offset to access secure
 * aliases of non-secure registers (e.g. nsCR0: 0x400, nsGFSR: 0x448,
 * nsGFSYNR0: 0x450)
 */
#define ARM_SMMU_GR0_NS(smmu)						\
	((smmu)->base +							\
		((smmu->options & ARM_SMMU_OPT_SECURE_CFG_ACCESS)	\
			? 0x400 : 0))

74 75 76 77 78
/*
 * Some 64-bit registers only make sense to write atomically, but in such
 * cases all the data relevant to AArch32 formats lies within the lower word,
 * therefore this actually makes more sense than it might first appear.
 */
79
#ifdef CONFIG_64BIT
80
#define smmu_write_atomic_lq		writeq_relaxed
81
#else
82
#define smmu_write_atomic_lq		writel_relaxed
83 84
#endif

85 86 87 88 89 90 91 92 93 94 95
/* Configuration registers */
#define ARM_SMMU_GR0_sCR0		0x0
#define sCR0_CLIENTPD			(1 << 0)
#define sCR0_GFRE			(1 << 1)
#define sCR0_GFIE			(1 << 2)
#define sCR0_GCFGFRE			(1 << 4)
#define sCR0_GCFGFIE			(1 << 5)
#define sCR0_USFCFG			(1 << 10)
#define sCR0_VMIDPNE			(1 << 11)
#define sCR0_PTM			(1 << 12)
#define sCR0_FB				(1 << 13)
96
#define sCR0_VMID16EN			(1 << 31)
97 98 99
#define sCR0_BSU_SHIFT			14
#define sCR0_BSU_MASK			0x3

100 101 102
/* Auxiliary Configuration register */
#define ARM_SMMU_GR0_sACR		0x10

103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120
/* Identification registers */
#define ARM_SMMU_GR0_ID0		0x20
#define ARM_SMMU_GR0_ID1		0x24
#define ARM_SMMU_GR0_ID2		0x28
#define ARM_SMMU_GR0_ID3		0x2c
#define ARM_SMMU_GR0_ID4		0x30
#define ARM_SMMU_GR0_ID5		0x34
#define ARM_SMMU_GR0_ID6		0x38
#define ARM_SMMU_GR0_ID7		0x3c
#define ARM_SMMU_GR0_sGFSR		0x48
#define ARM_SMMU_GR0_sGFSYNR0		0x50
#define ARM_SMMU_GR0_sGFSYNR1		0x54
#define ARM_SMMU_GR0_sGFSYNR2		0x58

#define ID0_S1TS			(1 << 30)
#define ID0_S2TS			(1 << 29)
#define ID0_NTS				(1 << 28)
#define ID0_SMS				(1 << 27)
121
#define ID0_ATOSNS			(1 << 26)
122 123
#define ID0_PTFS_NO_AARCH32		(1 << 25)
#define ID0_PTFS_NO_AARCH32S		(1 << 24)
124 125 126
#define ID0_CTTW			(1 << 14)
#define ID0_NUMIRPT_SHIFT		16
#define ID0_NUMIRPT_MASK		0xff
127 128
#define ID0_NUMSIDB_SHIFT		9
#define ID0_NUMSIDB_MASK		0xf
129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148
#define ID0_NUMSMRG_SHIFT		0
#define ID0_NUMSMRG_MASK		0xff

#define ID1_PAGESIZE			(1 << 31)
#define ID1_NUMPAGENDXB_SHIFT		28
#define ID1_NUMPAGENDXB_MASK		7
#define ID1_NUMS2CB_SHIFT		16
#define ID1_NUMS2CB_MASK		0xff
#define ID1_NUMCB_SHIFT			0
#define ID1_NUMCB_MASK			0xff

#define ID2_OAS_SHIFT			4
#define ID2_OAS_MASK			0xf
#define ID2_IAS_SHIFT			0
#define ID2_IAS_MASK			0xf
#define ID2_UBS_SHIFT			8
#define ID2_UBS_MASK			0xf
#define ID2_PTFS_4K			(1 << 12)
#define ID2_PTFS_16K			(1 << 13)
#define ID2_PTFS_64K			(1 << 14)
149
#define ID2_VMID16			(1 << 15)
150

151 152
#define ID7_MAJOR_SHIFT			4
#define ID7_MAJOR_MASK			0xf
153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173

/* Global TLB invalidation */
#define ARM_SMMU_GR0_TLBIVMID		0x64
#define ARM_SMMU_GR0_TLBIALLNSNH	0x68
#define ARM_SMMU_GR0_TLBIALLH		0x6c
#define ARM_SMMU_GR0_sTLBGSYNC		0x70
#define ARM_SMMU_GR0_sTLBGSTATUS	0x74
#define sTLBGSTATUS_GSACTIVE		(1 << 0)
#define TLB_LOOP_TIMEOUT		1000000	/* 1s! */

/* Stream mapping registers */
#define ARM_SMMU_GR0_SMR(n)		(0x800 + ((n) << 2))
#define SMR_VALID			(1 << 31)
#define SMR_MASK_SHIFT			16
#define SMR_ID_SHIFT			0

#define ARM_SMMU_GR0_S2CR(n)		(0xc00 + ((n) << 2))
#define S2CR_CBNDX_SHIFT		0
#define S2CR_CBNDX_MASK			0xff
#define S2CR_TYPE_SHIFT			16
#define S2CR_TYPE_MASK			0x3
174 175 176 177 178
enum arm_smmu_s2cr_type {
	S2CR_TYPE_TRANS,
	S2CR_TYPE_BYPASS,
	S2CR_TYPE_FAULT,
};
179

180
#define S2CR_PRIVCFG_SHIFT		24
181 182 183 184 185 186 187
#define S2CR_PRIVCFG_MASK		0x3
enum arm_smmu_s2cr_privcfg {
	S2CR_PRIVCFG_DEFAULT,
	S2CR_PRIVCFG_DIPAN,
	S2CR_PRIVCFG_UNPRIV,
	S2CR_PRIVCFG_PRIV,
};
188

189 190 191 192
/* Context bank attribute registers */
#define ARM_SMMU_GR1_CBAR(n)		(0x0 + ((n) << 2))
#define CBAR_VMID_SHIFT			0
#define CBAR_VMID_MASK			0xff
193 194 195
#define CBAR_S1_BPSHCFG_SHIFT		8
#define CBAR_S1_BPSHCFG_MASK		3
#define CBAR_S1_BPSHCFG_NSH		3
196 197 198 199 200 201 202 203 204 205 206 207 208 209 210
#define CBAR_S1_MEMATTR_SHIFT		12
#define CBAR_S1_MEMATTR_MASK		0xf
#define CBAR_S1_MEMATTR_WB		0xf
#define CBAR_TYPE_SHIFT			16
#define CBAR_TYPE_MASK			0x3
#define CBAR_TYPE_S2_TRANS		(0 << CBAR_TYPE_SHIFT)
#define CBAR_TYPE_S1_TRANS_S2_BYPASS	(1 << CBAR_TYPE_SHIFT)
#define CBAR_TYPE_S1_TRANS_S2_FAULT	(2 << CBAR_TYPE_SHIFT)
#define CBAR_TYPE_S1_TRANS_S2_TRANS	(3 << CBAR_TYPE_SHIFT)
#define CBAR_IRPTNDX_SHIFT		24
#define CBAR_IRPTNDX_MASK		0xff

#define ARM_SMMU_GR1_CBA2R(n)		(0x800 + ((n) << 2))
#define CBA2R_RW64_32BIT		(0 << 0)
#define CBA2R_RW64_64BIT		(1 << 0)
211 212
#define CBA2R_VMID_SHIFT		16
#define CBA2R_VMID_MASK			0xffff
213 214 215

/* Translation context bank */
#define ARM_SMMU_CB_BASE(smmu)		((smmu)->base + ((smmu)->size >> 1))
216
#define ARM_SMMU_CB(smmu, n)		((n) * (1 << (smmu)->pgshift))
217 218

#define ARM_SMMU_CB_SCTLR		0x0
219
#define ARM_SMMU_CB_ACTLR		0x4
220 221
#define ARM_SMMU_CB_RESUME		0x8
#define ARM_SMMU_CB_TTBCR2		0x10
222 223
#define ARM_SMMU_CB_TTBR0		0x20
#define ARM_SMMU_CB_TTBR1		0x28
224
#define ARM_SMMU_CB_TTBCR		0x30
225
#define ARM_SMMU_CB_CONTEXTIDR		0x34
226
#define ARM_SMMU_CB_S1_MAIR0		0x38
227
#define ARM_SMMU_CB_S1_MAIR1		0x3c
228
#define ARM_SMMU_CB_PAR			0x50
229
#define ARM_SMMU_CB_FSR			0x58
230
#define ARM_SMMU_CB_FAR			0x60
231
#define ARM_SMMU_CB_FSYNR0		0x68
232
#define ARM_SMMU_CB_S1_TLBIVA		0x600
233
#define ARM_SMMU_CB_S1_TLBIASID		0x610
234 235 236
#define ARM_SMMU_CB_S1_TLBIVAL		0x620
#define ARM_SMMU_CB_S2_TLBIIPAS2	0x630
#define ARM_SMMU_CB_S2_TLBIIPAS2L	0x638
237
#define ARM_SMMU_CB_ATS1PR		0x800
238
#define ARM_SMMU_CB_ATSR		0x8f0
239 240 241 242 243 244 245 246 247 248

#define SCTLR_S1_ASIDPNE		(1 << 12)
#define SCTLR_CFCFG			(1 << 7)
#define SCTLR_CFIE			(1 << 6)
#define SCTLR_CFRE			(1 << 5)
#define SCTLR_E				(1 << 4)
#define SCTLR_AFE			(1 << 2)
#define SCTLR_TRE			(1 << 1)
#define SCTLR_M				(1 << 0)

249 250
#define ARM_MMU500_ACTLR_CPRE		(1 << 1)

251 252
#define ARM_MMU500_ACR_CACHE_LOCK	(1 << 26)

253 254 255 256
#define CB_PAR_F			(1 << 0)

#define ATSR_ACTIVE			(1 << 0)

257 258 259 260
#define RESUME_RETRY			(0 << 0)
#define RESUME_TERMINATE		(1 << 0)

#define TTBCR2_SEP_SHIFT		15
261
#define TTBCR2_SEP_UPSTREAM		(0x7 << TTBCR2_SEP_SHIFT)
262

263
#define TTBRn_ASID_SHIFT		48
264 265 266 267 268 269 270 271 272 273 274 275

#define FSR_MULTI			(1 << 31)
#define FSR_SS				(1 << 30)
#define FSR_UUT				(1 << 8)
#define FSR_ASF				(1 << 7)
#define FSR_TLBLKF			(1 << 6)
#define FSR_TLBMCF			(1 << 5)
#define FSR_EF				(1 << 4)
#define FSR_PF				(1 << 3)
#define FSR_AFF				(1 << 2)
#define FSR_TF				(1 << 1)

276 277 278
#define FSR_IGN				(FSR_AFF | FSR_ASF | \
					 FSR_TLBMCF | FSR_TLBLKF)
#define FSR_FAULT			(FSR_MULTI | FSR_SS | FSR_UUT | \
279
					 FSR_EF | FSR_PF | FSR_TF | FSR_IGN)
280 281 282

#define FSYNR0_WNR			(1 << 4)

283
static int force_stage;
284
module_param(force_stage, int, S_IRUGO);
285 286
MODULE_PARM_DESC(force_stage,
	"Force SMMU mappings to be installed at a particular stage of translation. A value of '1' or '2' forces the corresponding stage. All other values are ignored (i.e. no stage is forced). Note that selecting a specific stage will disable support for nested translation.");
287 288 289 290
static bool disable_bypass;
module_param(disable_bypass, bool, S_IRUGO);
MODULE_PARM_DESC(disable_bypass,
	"Disable bypass streams such that incoming transactions from devices that are not attached to an iommu domain will report an abort back to the device and will not be allowed to pass through the SMMU.");
291

292
enum arm_smmu_arch_version {
293 294
	ARM_SMMU_V1,
	ARM_SMMU_V1_64K,
295 296 297
	ARM_SMMU_V2,
};

298 299
enum arm_smmu_implementation {
	GENERIC_SMMU,
300
	ARM_MMU500,
301
	CAVIUM_SMMUV2,
302 303
};

304 305 306 307 308 309 310 311 312 313
struct arm_smmu_s2cr {
	enum arm_smmu_s2cr_type		type;
	enum arm_smmu_s2cr_privcfg	privcfg;
	u8				cbndx;
};

#define s2cr_init_val (struct arm_smmu_s2cr){				\
	.type = disable_bypass ? S2CR_TYPE_FAULT : S2CR_TYPE_BYPASS,	\
}

314 315 316
struct arm_smmu_smr {
	u16				mask;
	u16				id;
317
	bool				valid;
318 319
};

320
struct arm_smmu_master_cfg {
321
	struct arm_smmu_device		*smmu;
322 323
	int				num_streamids;
	u16				streamids[MAX_MASTER_STREAMIDS];
324
	s16				smendx[MAX_MASTER_STREAMIDS];
325
};
326
#define INVALID_SMENDX			-1
327 328
#define for_each_cfg_sme(cfg, i, idx) \
	for (i = 0; idx = cfg->smendx[i], i < cfg->num_streamids; ++i)
329 330 331 332 333 334

struct arm_smmu_device {
	struct device			*dev;

	void __iomem			*base;
	unsigned long			size;
335
	unsigned long			pgshift;
336 337 338 339 340 341

#define ARM_SMMU_FEAT_COHERENT_WALK	(1 << 0)
#define ARM_SMMU_FEAT_STREAM_MATCH	(1 << 1)
#define ARM_SMMU_FEAT_TRANS_S1		(1 << 2)
#define ARM_SMMU_FEAT_TRANS_S2		(1 << 3)
#define ARM_SMMU_FEAT_TRANS_NESTED	(1 << 4)
342
#define ARM_SMMU_FEAT_TRANS_OPS		(1 << 5)
343
#define ARM_SMMU_FEAT_VMID16		(1 << 6)
344 345 346 347 348
#define ARM_SMMU_FEAT_FMT_AARCH64_4K	(1 << 7)
#define ARM_SMMU_FEAT_FMT_AARCH64_16K	(1 << 8)
#define ARM_SMMU_FEAT_FMT_AARCH64_64K	(1 << 9)
#define ARM_SMMU_FEAT_FMT_AARCH32_L	(1 << 10)
#define ARM_SMMU_FEAT_FMT_AARCH32_S	(1 << 11)
349
	u32				features;
350 351 352

#define ARM_SMMU_OPT_SECURE_CFG_ACCESS (1 << 0)
	u32				options;
353
	enum arm_smmu_arch_version	version;
354
	enum arm_smmu_implementation	model;
355 356 357 358 359 360 361

	u32				num_context_banks;
	u32				num_s2_context_banks;
	DECLARE_BITMAP(context_map, ARM_SMMU_MAX_CBS);
	atomic_t			irptndx;

	u32				num_mapping_groups;
362 363
	u16				streamid_mask;
	u16				smr_mask_mask;
364
	struct arm_smmu_smr		*smrs;
365
	struct arm_smmu_s2cr		*s2crs;
366

367 368 369
	unsigned long			va_size;
	unsigned long			ipa_size;
	unsigned long			pa_size;
370
	unsigned long			pgsize_bitmap;
371 372 373 374 375

	u32				num_global_irqs;
	u32				num_context_irqs;
	unsigned int			*irqs;

376
	u32				cavium_id_base; /* Specific to Cavium */
377 378
};

379 380 381 382 383
enum arm_smmu_context_fmt {
	ARM_SMMU_CTX_FMT_NONE,
	ARM_SMMU_CTX_FMT_AARCH64,
	ARM_SMMU_CTX_FMT_AARCH32_L,
	ARM_SMMU_CTX_FMT_AARCH32_S,
384 385 386 387 388 389
};

struct arm_smmu_cfg {
	u8				cbndx;
	u8				irptndx;
	u32				cbar;
390
	enum arm_smmu_context_fmt	fmt;
391
};
392
#define INVALID_IRPTNDX			0xff
393

394 395
#define ARM_SMMU_CB_ASID(smmu, cfg) ((u16)(smmu)->cavium_id_base + (cfg)->cbndx)
#define ARM_SMMU_CB_VMID(smmu, cfg) ((u16)(smmu)->cavium_id_base + (cfg)->cbndx + 1)
396

397 398 399 400 401 402
enum arm_smmu_domain_stage {
	ARM_SMMU_DOMAIN_S1 = 0,
	ARM_SMMU_DOMAIN_S2,
	ARM_SMMU_DOMAIN_NESTED,
};

403
struct arm_smmu_domain {
404
	struct arm_smmu_device		*smmu;
405 406
	struct io_pgtable_ops		*pgtbl_ops;
	spinlock_t			pgtbl_lock;
407
	struct arm_smmu_cfg		cfg;
408
	enum arm_smmu_domain_stage	stage;
409
	struct mutex			init_mutex; /* Protects smmu pointer */
410
	struct iommu_domain		domain;
411 412
};

413 414 415 416 417
struct arm_smmu_option_prop {
	u32 opt;
	const char *prop;
};

418 419
static atomic_t cavium_smmu_context_count = ATOMIC_INIT(0);

420
static struct arm_smmu_option_prop arm_smmu_options[] = {
421 422 423 424
	{ ARM_SMMU_OPT_SECURE_CFG_ACCESS, "calxeda,smmu-secure-config-access" },
	{ 0, NULL},
};

425 426 427 428 429
static struct arm_smmu_domain *to_smmu_domain(struct iommu_domain *dom)
{
	return container_of(dom, struct arm_smmu_domain, domain);
}

430 431 432
static void parse_driver_options(struct arm_smmu_device *smmu)
{
	int i = 0;
433

434 435 436 437 438 439 440 441 442 443
	do {
		if (of_property_read_bool(smmu->dev->of_node,
						arm_smmu_options[i].prop)) {
			smmu->options |= arm_smmu_options[i].opt;
			dev_notice(smmu->dev, "option %s\n",
				arm_smmu_options[i].prop);
		}
	} while (arm_smmu_options[++i].opt);
}

444
static struct device_node *dev_get_dev_node(struct device *dev)
445 446 447
{
	if (dev_is_pci(dev)) {
		struct pci_bus *bus = to_pci_dev(dev)->bus;
448

449 450
		while (!pci_is_root_bus(bus))
			bus = bus->parent;
451
		return of_node_get(bus->bridge->parent->of_node);
452 453
	}

454
	return of_node_get(dev->of_node);
455 456
}

457
static int __arm_smmu_get_pci_sid(struct pci_dev *pdev, u16 alias, void *data)
458
{
459 460
	*((__be32 *)data) = cpu_to_be32(alias);
	return 0; /* Continue walking */
461 462
}

463
static int __find_legacy_master_phandle(struct device *dev, void *data)
464
{
465 466 467 468 469 470 471 472 473 474 475 476
	struct of_phandle_iterator *it = *(void **)data;
	struct device_node *np = it->node;
	int err;

	of_for_each_phandle(it, err, dev->of_node, "mmu-masters",
			    "#stream-id-cells", 0)
		if (it->node == np) {
			*(void **)data = dev;
			return 1;
		}
	it->node = np;
	return err == -ENOENT ? 0 : err;
477 478
}

479 480
static struct platform_driver arm_smmu_driver;

481
static int arm_smmu_register_legacy_master(struct device *dev)
482
{
483 484 485 486 487 488 489
	struct arm_smmu_device *smmu;
	struct arm_smmu_master_cfg *cfg;
	struct device_node *np;
	struct of_phandle_iterator it;
	void *data = &it;
	__be32 pci_sid;
	int err;
490

491 492 493 494 495
	np = dev_get_dev_node(dev);
	if (!np || !of_find_property(np, "#stream-id-cells", NULL)) {
		of_node_put(np);
		return -ENODEV;
	}
496

497
	it.node = np;
498 499
	err = driver_for_each_device(&arm_smmu_driver.driver, NULL, &data,
				     __find_legacy_master_phandle);
500 501 502 503 504
	of_node_put(np);
	if (err == 0)
		return -ENODEV;
	if (err < 0)
		return err;
505

506 507
	smmu = dev_get_drvdata(data);

508 509
	if (it.cur_count > MAX_MASTER_STREAMIDS) {
		dev_err(smmu->dev,
510
			"reached maximum number (%d) of stream IDs for master device %s\n",
511
			MAX_MASTER_STREAMIDS, dev_name(dev));
512 513
		return -ENOSPC;
	}
514 515 516 517 518 519 520
	if (dev_is_pci(dev)) {
		/* "mmu-masters" assumes Stream ID == Requester ID */
		pci_for_each_dma_alias(to_pci_dev(dev), __arm_smmu_get_pci_sid,
				       &pci_sid);
		it.cur = &pci_sid;
		it.cur_count = 1;
	}
521

522 523
	cfg = kzalloc(sizeof(*cfg), GFP_KERNEL);
	if (!cfg)
524 525
		return -ENOMEM;

526 527
	cfg->smmu = smmu;
	dev->archdata.iommu = cfg;
528

529 530
	while (it.cur_count--)
		cfg->streamids[cfg->num_streamids++] = be32_to_cpup(it.cur++);
531

532
	return 0;
533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553
}

static int __arm_smmu_alloc_bitmap(unsigned long *map, int start, int end)
{
	int idx;

	do {
		idx = find_next_zero_bit(map, end, start);
		if (idx == end)
			return -ENOSPC;
	} while (test_and_set_bit(idx, map));

	return idx;
}

static void __arm_smmu_free_bitmap(unsigned long *map, int idx)
{
	clear_bit(idx, map);
}

/* Wait for any pending TLB invalidations to complete */
554
static void __arm_smmu_tlb_sync(struct arm_smmu_device *smmu)
555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571
{
	int count = 0;
	void __iomem *gr0_base = ARM_SMMU_GR0(smmu);

	writel_relaxed(0, gr0_base + ARM_SMMU_GR0_sTLBGSYNC);
	while (readl_relaxed(gr0_base + ARM_SMMU_GR0_sTLBGSTATUS)
	       & sTLBGSTATUS_GSACTIVE) {
		cpu_relax();
		if (++count == TLB_LOOP_TIMEOUT) {
			dev_err_ratelimited(smmu->dev,
			"TLB sync timed out -- SMMU may be deadlocked\n");
			return;
		}
		udelay(1);
	}
}

572 573 574 575 576 577 578
static void arm_smmu_tlb_sync(void *cookie)
{
	struct arm_smmu_domain *smmu_domain = cookie;
	__arm_smmu_tlb_sync(smmu_domain->smmu);
}

static void arm_smmu_tlb_inv_context(void *cookie)
579
{
580
	struct arm_smmu_domain *smmu_domain = cookie;
581 582
	struct arm_smmu_cfg *cfg = &smmu_domain->cfg;
	struct arm_smmu_device *smmu = smmu_domain->smmu;
583
	bool stage1 = cfg->cbar != CBAR_TYPE_S2_TRANS;
584
	void __iomem *base;
585 586 587

	if (stage1) {
		base = ARM_SMMU_CB_BASE(smmu) + ARM_SMMU_CB(smmu, cfg->cbndx);
588
		writel_relaxed(ARM_SMMU_CB_ASID(smmu, cfg),
589
			       base + ARM_SMMU_CB_S1_TLBIASID);
590 591
	} else {
		base = ARM_SMMU_GR0(smmu);
592
		writel_relaxed(ARM_SMMU_CB_VMID(smmu, cfg),
593
			       base + ARM_SMMU_GR0_TLBIVMID);
594 595
	}

596 597 598 599
	__arm_smmu_tlb_sync(smmu);
}

static void arm_smmu_tlb_inv_range_nosync(unsigned long iova, size_t size,
600
					  size_t granule, bool leaf, void *cookie)
601 602 603 604 605 606 607 608 609 610 611
{
	struct arm_smmu_domain *smmu_domain = cookie;
	struct arm_smmu_cfg *cfg = &smmu_domain->cfg;
	struct arm_smmu_device *smmu = smmu_domain->smmu;
	bool stage1 = cfg->cbar != CBAR_TYPE_S2_TRANS;
	void __iomem *reg;

	if (stage1) {
		reg = ARM_SMMU_CB_BASE(smmu) + ARM_SMMU_CB(smmu, cfg->cbndx);
		reg += leaf ? ARM_SMMU_CB_S1_TLBIVAL : ARM_SMMU_CB_S1_TLBIVA;

612
		if (cfg->fmt != ARM_SMMU_CTX_FMT_AARCH64) {
613
			iova &= ~12UL;
614
			iova |= ARM_SMMU_CB_ASID(smmu, cfg);
615 616 617 618
			do {
				writel_relaxed(iova, reg);
				iova += granule;
			} while (size -= granule);
619 620
		} else {
			iova >>= 12;
621
			iova |= (u64)ARM_SMMU_CB_ASID(smmu, cfg) << 48;
622 623 624 625
			do {
				writeq_relaxed(iova, reg);
				iova += granule >> 12;
			} while (size -= granule);
626 627 628 629 630
		}
	} else if (smmu->version == ARM_SMMU_V2) {
		reg = ARM_SMMU_CB_BASE(smmu) + ARM_SMMU_CB(smmu, cfg->cbndx);
		reg += leaf ? ARM_SMMU_CB_S2_TLBIIPAS2L :
			      ARM_SMMU_CB_S2_TLBIIPAS2;
631 632
		iova >>= 12;
		do {
633
			smmu_write_atomic_lq(iova, reg);
634 635
			iova += granule >> 12;
		} while (size -= granule);
636 637
	} else {
		reg = ARM_SMMU_GR0(smmu) + ARM_SMMU_GR0_TLBIVMID;
638
		writel_relaxed(ARM_SMMU_CB_VMID(smmu, cfg), reg);
639 640 641 642 643 644 645 646 647
	}
}

static struct iommu_gather_ops arm_smmu_gather_ops = {
	.tlb_flush_all	= arm_smmu_tlb_inv_context,
	.tlb_add_flush	= arm_smmu_tlb_inv_range_nosync,
	.tlb_sync	= arm_smmu_tlb_sync,
};

648 649
static irqreturn_t arm_smmu_context_fault(int irq, void *dev)
{
650
	u32 fsr, fsynr;
651 652
	unsigned long iova;
	struct iommu_domain *domain = dev;
653
	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
654 655
	struct arm_smmu_cfg *cfg = &smmu_domain->cfg;
	struct arm_smmu_device *smmu = smmu_domain->smmu;
656 657
	void __iomem *cb_base;

658
	cb_base = ARM_SMMU_CB_BASE(smmu) + ARM_SMMU_CB(smmu, cfg->cbndx);
659 660 661 662 663 664
	fsr = readl_relaxed(cb_base + ARM_SMMU_CB_FSR);

	if (!(fsr & FSR_FAULT))
		return IRQ_NONE;

	fsynr = readl_relaxed(cb_base + ARM_SMMU_CB_FSYNR0);
665
	iova = readq_relaxed(cb_base + ARM_SMMU_CB_FAR);
666

667 668 669
	dev_err_ratelimited(smmu->dev,
	"Unhandled context fault: fsr=0x%x, iova=0x%08lx, fsynr=0x%x, cb=%d\n",
			    fsr, iova, fsynr, cfg->cbndx);
670

671 672
	writel(fsr, cb_base + ARM_SMMU_CB_FSR);
	return IRQ_HANDLED;
673 674 675 676 677 678
}

static irqreturn_t arm_smmu_global_fault(int irq, void *dev)
{
	u32 gfsr, gfsynr0, gfsynr1, gfsynr2;
	struct arm_smmu_device *smmu = dev;
679
	void __iomem *gr0_base = ARM_SMMU_GR0_NS(smmu);
680 681 682 683 684 685

	gfsr = readl_relaxed(gr0_base + ARM_SMMU_GR0_sGFSR);
	gfsynr0 = readl_relaxed(gr0_base + ARM_SMMU_GR0_sGFSYNR0);
	gfsynr1 = readl_relaxed(gr0_base + ARM_SMMU_GR0_sGFSYNR1);
	gfsynr2 = readl_relaxed(gr0_base + ARM_SMMU_GR0_sGFSYNR2);

686 687 688
	if (!gfsr)
		return IRQ_NONE;

689 690 691 692 693 694 695
	dev_err_ratelimited(smmu->dev,
		"Unexpected global fault, this could be serious\n");
	dev_err_ratelimited(smmu->dev,
		"\tGFSR 0x%08x, GFSYNR0 0x%08x, GFSYNR1 0x%08x, GFSYNR2 0x%08x\n",
		gfsr, gfsynr0, gfsynr1, gfsynr2);

	writel(gfsr, gr0_base + ARM_SMMU_GR0_sGFSR);
696
	return IRQ_HANDLED;
697 698
}

699 700
static void arm_smmu_init_context_bank(struct arm_smmu_domain *smmu_domain,
				       struct io_pgtable_cfg *pgtbl_cfg)
701
{
702
	u32 reg, reg2;
703
	u64 reg64;
704
	bool stage1;
705 706
	struct arm_smmu_cfg *cfg = &smmu_domain->cfg;
	struct arm_smmu_device *smmu = smmu_domain->smmu;
707
	void __iomem *cb_base, *gr1_base;
708 709

	gr1_base = ARM_SMMU_GR1(smmu);
710 711
	stage1 = cfg->cbar != CBAR_TYPE_S2_TRANS;
	cb_base = ARM_SMMU_CB_BASE(smmu) + ARM_SMMU_CB(smmu, cfg->cbndx);
712

713
	if (smmu->version > ARM_SMMU_V1) {
714 715 716 717
		if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH64)
			reg = CBA2R_RW64_64BIT;
		else
			reg = CBA2R_RW64_32BIT;
718 719
		/* 16-bit VMIDs live in CBA2R */
		if (smmu->features & ARM_SMMU_FEAT_VMID16)
720
			reg |= ARM_SMMU_CB_VMID(smmu, cfg) << CBA2R_VMID_SHIFT;
721

722 723 724
		writel_relaxed(reg, gr1_base + ARM_SMMU_GR1_CBA2R(cfg->cbndx));
	}

725
	/* CBAR */
726
	reg = cfg->cbar;
727
	if (smmu->version < ARM_SMMU_V2)
728
		reg |= cfg->irptndx << CBAR_IRPTNDX_SHIFT;
729

730 731 732 733 734 735 736
	/*
	 * Use the weakest shareability/memory types, so they are
	 * overridden by the ttbcr/pte.
	 */
	if (stage1) {
		reg |= (CBAR_S1_BPSHCFG_NSH << CBAR_S1_BPSHCFG_SHIFT) |
			(CBAR_S1_MEMATTR_WB << CBAR_S1_MEMATTR_SHIFT);
737 738
	} else if (!(smmu->features & ARM_SMMU_FEAT_VMID16)) {
		/* 8-bit VMIDs live in CBAR */
739
		reg |= ARM_SMMU_CB_VMID(smmu, cfg) << CBAR_VMID_SHIFT;
740
	}
741
	writel_relaxed(reg, gr1_base + ARM_SMMU_GR1_CBAR(cfg->cbndx));
742

743 744
	/* TTBRs */
	if (stage1) {
745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760
		u16 asid = ARM_SMMU_CB_ASID(smmu, cfg);

		if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH32_S) {
			reg = pgtbl_cfg->arm_v7s_cfg.ttbr[0];
			writel_relaxed(reg, cb_base + ARM_SMMU_CB_TTBR0);
			reg = pgtbl_cfg->arm_v7s_cfg.ttbr[1];
			writel_relaxed(reg, cb_base + ARM_SMMU_CB_TTBR1);
			writel_relaxed(asid, cb_base + ARM_SMMU_CB_CONTEXTIDR);
		} else {
			reg64 = pgtbl_cfg->arm_lpae_s1_cfg.ttbr[0];
			reg64 |= (u64)asid << TTBRn_ASID_SHIFT;
			writeq_relaxed(reg64, cb_base + ARM_SMMU_CB_TTBR0);
			reg64 = pgtbl_cfg->arm_lpae_s1_cfg.ttbr[1];
			reg64 |= (u64)asid << TTBRn_ASID_SHIFT;
			writeq_relaxed(reg64, cb_base + ARM_SMMU_CB_TTBR1);
		}
761
	} else {
762
		reg64 = pgtbl_cfg->arm_lpae_s2_cfg.vttbr;
763
		writeq_relaxed(reg64, cb_base + ARM_SMMU_CB_TTBR0);
764
	}
765

766 767
	/* TTBCR */
	if (stage1) {
768 769 770 771 772 773 774
		if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH32_S) {
			reg = pgtbl_cfg->arm_v7s_cfg.tcr;
			reg2 = 0;
		} else {
			reg = pgtbl_cfg->arm_lpae_s1_cfg.tcr;
			reg2 = pgtbl_cfg->arm_lpae_s1_cfg.tcr >> 32;
			reg2 |= TTBCR2_SEP_UPSTREAM;
775
		}
776 777
		if (smmu->version > ARM_SMMU_V1)
			writel_relaxed(reg2, cb_base + ARM_SMMU_CB_TTBCR2);
778
	} else {
779
		reg = pgtbl_cfg->arm_lpae_s2_cfg.vtcr;
780
	}
781
	writel_relaxed(reg, cb_base + ARM_SMMU_CB_TTBCR);
782

783
	/* MAIRs (stage-1 only) */
784
	if (stage1) {
785 786 787 788 789 790 791
		if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH32_S) {
			reg = pgtbl_cfg->arm_v7s_cfg.prrr;
			reg2 = pgtbl_cfg->arm_v7s_cfg.nmrr;
		} else {
			reg = pgtbl_cfg->arm_lpae_s1_cfg.mair[0];
			reg2 = pgtbl_cfg->arm_lpae_s1_cfg.mair[1];
		}
792
		writel_relaxed(reg, cb_base + ARM_SMMU_CB_S1_MAIR0);
793
		writel_relaxed(reg2, cb_base + ARM_SMMU_CB_S1_MAIR1);
794 795 796
	}

	/* SCTLR */
797
	reg = SCTLR_CFIE | SCTLR_CFRE | SCTLR_AFE | SCTLR_TRE | SCTLR_M;
798 799 800 801 802
	if (stage1)
		reg |= SCTLR_S1_ASIDPNE;
#ifdef __BIG_ENDIAN
	reg |= SCTLR_E;
#endif
803
	writel_relaxed(reg, cb_base + ARM_SMMU_CB_SCTLR);
804 805 806
}

static int arm_smmu_init_domain_context(struct iommu_domain *domain,
807
					struct arm_smmu_device *smmu)
808
{
809
	int irq, start, ret = 0;
810 811 812 813
	unsigned long ias, oas;
	struct io_pgtable_ops *pgtbl_ops;
	struct io_pgtable_cfg pgtbl_cfg;
	enum io_pgtable_fmt fmt;
814
	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
815
	struct arm_smmu_cfg *cfg = &smmu_domain->cfg;
816

817
	mutex_lock(&smmu_domain->init_mutex);
818 819 820
	if (smmu_domain->smmu)
		goto out_unlock;

821 822 823 824 825 826
	/* We're bypassing these SIDs, so don't allocate an actual context */
	if (domain->type == IOMMU_DOMAIN_DMA) {
		smmu_domain->smmu = smmu;
		goto out_unlock;
	}

827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849
	/*
	 * Mapping the requested stage onto what we support is surprisingly
	 * complicated, mainly because the spec allows S1+S2 SMMUs without
	 * support for nested translation. That means we end up with the
	 * following table:
	 *
	 * Requested        Supported        Actual
	 *     S1               N              S1
	 *     S1             S1+S2            S1
	 *     S1               S2             S2
	 *     S1               S1             S1
	 *     N                N              N
	 *     N              S1+S2            S2
	 *     N                S2             S2
	 *     N                S1             S1
	 *
	 * Note that you can't actually request stage-2 mappings.
	 */
	if (!(smmu->features & ARM_SMMU_FEAT_TRANS_S1))
		smmu_domain->stage = ARM_SMMU_DOMAIN_S2;
	if (!(smmu->features & ARM_SMMU_FEAT_TRANS_S2))
		smmu_domain->stage = ARM_SMMU_DOMAIN_S1;

850 851 852 853 854 855 856 857 858 859
	/*
	 * Choosing a suitable context format is even more fiddly. Until we
	 * grow some way for the caller to express a preference, and/or move
	 * the decision into the io-pgtable code where it arguably belongs,
	 * just aim for the closest thing to the rest of the system, and hope
	 * that the hardware isn't esoteric enough that we can't assume AArch64
	 * support to be a superset of AArch32 support...
	 */
	if (smmu->features & ARM_SMMU_FEAT_FMT_AARCH32_L)
		cfg->fmt = ARM_SMMU_CTX_FMT_AARCH32_L;
860 861 862 863 864
	if (IS_ENABLED(CONFIG_IOMMU_IO_PGTABLE_ARMV7S) &&
	    !IS_ENABLED(CONFIG_64BIT) && !IS_ENABLED(CONFIG_ARM_LPAE) &&
	    (smmu->features & ARM_SMMU_FEAT_FMT_AARCH32_S) &&
	    (smmu_domain->stage == ARM_SMMU_DOMAIN_S1))
		cfg->fmt = ARM_SMMU_CTX_FMT_AARCH32_S;
865 866 867 868 869 870 871 872 873 874 875
	if ((IS_ENABLED(CONFIG_64BIT) || cfg->fmt == ARM_SMMU_CTX_FMT_NONE) &&
	    (smmu->features & (ARM_SMMU_FEAT_FMT_AARCH64_64K |
			       ARM_SMMU_FEAT_FMT_AARCH64_16K |
			       ARM_SMMU_FEAT_FMT_AARCH64_4K)))
		cfg->fmt = ARM_SMMU_CTX_FMT_AARCH64;

	if (cfg->fmt == ARM_SMMU_CTX_FMT_NONE) {
		ret = -EINVAL;
		goto out_unlock;
	}

876 877 878 879
	switch (smmu_domain->stage) {
	case ARM_SMMU_DOMAIN_S1:
		cfg->cbar = CBAR_TYPE_S1_TRANS_S2_BYPASS;
		start = smmu->num_s2_context_banks;
880 881
		ias = smmu->va_size;
		oas = smmu->ipa_size;
882
		if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH64) {
883
			fmt = ARM_64_LPAE_S1;
884
		} else if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH32_L) {
885
			fmt = ARM_32_LPAE_S1;
886 887
			ias = min(ias, 32UL);
			oas = min(oas, 40UL);
888 889 890 891
		} else {
			fmt = ARM_V7S;
			ias = min(ias, 32UL);
			oas = min(oas, 32UL);
892
		}
893 894
		break;
	case ARM_SMMU_DOMAIN_NESTED:
895 896 897 898
		/*
		 * We will likely want to change this if/when KVM gets
		 * involved.
		 */
899
	case ARM_SMMU_DOMAIN_S2:
900 901
		cfg->cbar = CBAR_TYPE_S2_TRANS;
		start = 0;
902 903
		ias = smmu->ipa_size;
		oas = smmu->pa_size;
904
		if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH64) {
905
			fmt = ARM_64_LPAE_S2;
906
		} else {
907
			fmt = ARM_32_LPAE_S2;
908 909 910
			ias = min(ias, 40UL);
			oas = min(oas, 40UL);
		}
911 912 913 914
		break;
	default:
		ret = -EINVAL;
		goto out_unlock;
915 916 917 918
	}

	ret = __arm_smmu_alloc_bitmap(smmu->context_map, start,
				      smmu->num_context_banks);
919
	if (ret < 0)
920
		goto out_unlock;
921

922
	cfg->cbndx = ret;
923
	if (smmu->version < ARM_SMMU_V2) {
924 925
		cfg->irptndx = atomic_inc_return(&smmu->irptndx);
		cfg->irptndx %= smmu->num_context_irqs;
926
	} else {
927
		cfg->irptndx = cfg->cbndx;
928 929
	}

930
	pgtbl_cfg = (struct io_pgtable_cfg) {
931
		.pgsize_bitmap	= smmu->pgsize_bitmap,
932 933 934
		.ias		= ias,
		.oas		= oas,
		.tlb		= &arm_smmu_gather_ops,
935
		.iommu_dev	= smmu->dev,
936 937 938 939 940 941 942 943 944
	};

	smmu_domain->smmu = smmu;
	pgtbl_ops = alloc_io_pgtable_ops(fmt, &pgtbl_cfg, smmu_domain);
	if (!pgtbl_ops) {
		ret = -ENOMEM;
		goto out_clear_smmu;
	}

945 946
	/* Update the domain's page sizes to reflect the page table format */
	domain->pgsize_bitmap = pgtbl_cfg.pgsize_bitmap;
947

948 949 950 951 952 953 954
	/* Initialise the context bank with our page table cfg */
	arm_smmu_init_context_bank(smmu_domain, &pgtbl_cfg);

	/*
	 * Request context fault interrupt. Do this last to avoid the
	 * handler seeing a half-initialised domain state.
	 */
955
	irq = smmu->irqs[smmu->num_global_irqs + cfg->irptndx];
956 957
	ret = devm_request_irq(smmu->dev, irq, arm_smmu_context_fault,
			       IRQF_SHARED, "arm-smmu-context-fault", domain);
958
	if (ret < 0) {
959
		dev_err(smmu->dev, "failed to request context IRQ %d (%u)\n",
960 961
			cfg->irptndx, irq);
		cfg->irptndx = INVALID_IRPTNDX;
962 963
	}

964 965 966 967
	mutex_unlock(&smmu_domain->init_mutex);

	/* Publish page table ops for map/unmap */
	smmu_domain->pgtbl_ops = pgtbl_ops;
968
	return 0;
969

970 971
out_clear_smmu:
	smmu_domain->smmu = NULL;
972
out_unlock:
973
	mutex_unlock(&smmu_domain->init_mutex);
974 975 976 977 978
	return ret;
}

static void arm_smmu_destroy_domain_context(struct iommu_domain *domain)
{
979
	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
980 981
	struct arm_smmu_device *smmu = smmu_domain->smmu;
	struct arm_smmu_cfg *cfg = &smmu_domain->cfg;
982
	void __iomem *cb_base;
983 984
	int irq;

985
	if (!smmu || domain->type == IOMMU_DOMAIN_DMA)
986 987
		return;

988 989 990 991
	/*
	 * Disable the context bank and free the page tables before freeing
	 * it.
	 */
992
	cb_base = ARM_SMMU_CB_BASE(smmu) + ARM_SMMU_CB(smmu, cfg->cbndx);
993 994
	writel_relaxed(0, cb_base + ARM_SMMU_CB_SCTLR);

995 996
	if (cfg->irptndx != INVALID_IRPTNDX) {
		irq = smmu->irqs[smmu->num_global_irqs + cfg->irptndx];
997
		devm_free_irq(smmu->dev, irq, domain);
998 999
	}

1000
	free_io_pgtable_ops(smmu_domain->pgtbl_ops);
1001
	__arm_smmu_free_bitmap(smmu->context_map, cfg->cbndx);
1002 1003
}

1004
static struct iommu_domain *arm_smmu_domain_alloc(unsigned type)
1005 1006 1007
{
	struct arm_smmu_domain *smmu_domain;

1008
	if (type != IOMMU_DOMAIN_UNMANAGED && type != IOMMU_DOMAIN_DMA)
1009
		return NULL;
1010 1011 1012 1013 1014 1015 1016
	/*
	 * Allocate the domain and initialise some of its data structures.
	 * We can't really do anything meaningful until we've added a
	 * master.
	 */
	smmu_domain = kzalloc(sizeof(*smmu_domain), GFP_KERNEL);
	if (!smmu_domain)
1017
		return NULL;
1018

1019 1020 1021 1022 1023 1024
	if (type == IOMMU_DOMAIN_DMA &&
	    iommu_get_dma_cookie(&smmu_domain->domain)) {
		kfree(smmu_domain);
		return NULL;
	}

1025 1026
	mutex_init(&smmu_domain->init_mutex);
	spin_lock_init(&smmu_domain->pgtbl_lock);
1027 1028

	return &smmu_domain->domain;
1029 1030
}

1031
static void arm_smmu_domain_free(struct iommu_domain *domain)
1032
{
1033
	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1034 1035 1036 1037 1038

	/*
	 * Free the domain resources. We assume that all devices have
	 * already been detached.
	 */
1039
	iommu_put_dma_cookie(domain);
1040 1041 1042 1043
	arm_smmu_destroy_domain_context(domain);
	kfree(smmu_domain);
}

1044
static int arm_smmu_alloc_smr(struct arm_smmu_device *smmu)
1045 1046 1047
{
	int i;

1048 1049 1050
	for (i = 0; i < smmu->num_mapping_groups; i++)
		if (!cmpxchg(&smmu->smrs[i].valid, false, true))
			return i;
1051

1052 1053
	return INVALID_SMENDX;
}
1054

1055 1056 1057 1058 1059 1060 1061 1062 1063
static void arm_smmu_free_smr(struct arm_smmu_device *smmu, int idx)
{
	writel_relaxed(~SMR_VALID, ARM_SMMU_GR0(smmu) + ARM_SMMU_GR0_SMR(idx));
	WRITE_ONCE(smmu->smrs[idx].valid, false);
}

static void arm_smmu_write_smr(struct arm_smmu_device *smmu, int idx)
{
	struct arm_smmu_smr *smr = smmu->smrs + idx;
1064
	u32 reg = smr->id << SMR_ID_SHIFT | smr->mask << SMR_MASK_SHIFT;
1065 1066 1067 1068 1069 1070

	if (smr->valid)
		reg |= SMR_VALID;
	writel_relaxed(reg, ARM_SMMU_GR0(smmu) + ARM_SMMU_GR0_SMR(idx));
}

1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087
static void arm_smmu_write_s2cr(struct arm_smmu_device *smmu, int idx)
{
	struct arm_smmu_s2cr *s2cr = smmu->s2crs + idx;
	u32 reg = (s2cr->type & S2CR_TYPE_MASK) << S2CR_TYPE_SHIFT |
		  (s2cr->cbndx & S2CR_CBNDX_MASK) << S2CR_CBNDX_SHIFT |
		  (s2cr->privcfg & S2CR_PRIVCFG_MASK) << S2CR_PRIVCFG_SHIFT;

	writel_relaxed(reg, ARM_SMMU_GR0(smmu) + ARM_SMMU_GR0_S2CR(idx));
}

static void arm_smmu_write_sme(struct arm_smmu_device *smmu, int idx)
{
	arm_smmu_write_s2cr(smmu, idx);
	if (smmu->smrs)
		arm_smmu_write_smr(smmu, idx);
}

1088 1089 1090 1091 1092
static int arm_smmu_master_alloc_smes(struct arm_smmu_device *smmu,
				      struct arm_smmu_master_cfg *cfg)
{
	struct arm_smmu_smr *smrs = smmu->smrs;
	int i, idx;
1093

1094
	/* Allocate the SMRs on the SMMU */
1095 1096
	for_each_cfg_sme(cfg, i, idx) {
		if (idx != INVALID_SMENDX)
1097 1098 1099 1100 1101 1102 1103 1104 1105
			return -EEXIST;

		/* ...except on stream indexing hardware, of course */
		if (!smrs) {
			cfg->smendx[i] = cfg->streamids[i];
			continue;
		}

		idx = arm_smmu_alloc_smr(smmu);
1106
		if (idx < 0) {
1107 1108 1109
			dev_err(smmu->dev, "failed to allocate free SMR\n");
			goto err_free_smrs;
		}
1110
		cfg->smendx[i] = idx;
1111

1112 1113
		smrs[idx].id = cfg->streamids[i];
		smrs[idx].mask = 0; /* We don't currently share SMRs */
1114 1115
	}

1116 1117 1118
	if (!smrs)
		return 0;

1119
	/* It worked! Now, poke the actual hardware */
1120 1121
	for_each_cfg_sme(cfg, i, idx)
		arm_smmu_write_smr(smmu, idx);
1122 1123 1124 1125

	return 0;

err_free_smrs:
1126 1127 1128 1129
	while (i--) {
		arm_smmu_free_smr(smmu, cfg->smendx[i]);
		cfg->smendx[i] = INVALID_SMENDX;
	}
1130 1131 1132
	return -ENOSPC;
}

1133
static void arm_smmu_master_free_smes(struct arm_smmu_master_cfg *cfg)
1134
{
1135
	struct arm_smmu_device *smmu = cfg->smmu;
1136
	int i, idx;
1137

1138 1139 1140 1141
	/*
	 * We *must* clear the S2CR first, because freeing the SMR means
	 * that it can be re-allocated immediately.
	 */
1142
	for_each_cfg_sme(cfg, i, idx) {
1143 1144 1145 1146 1147 1148 1149 1150 1151 1152
		/* An IOMMU group is torn down by the first device to be removed */
		if (idx == INVALID_SMENDX)
			return;

		smmu->s2crs[idx] = s2cr_init_val;
		arm_smmu_write_s2cr(smmu, idx);
	}
	/* Sync S2CR updates before touching anything else */
	__iowmb();

1153
	/* Invalidate the SMRs before freeing back to the allocator */
1154
	for_each_cfg_sme(cfg, i, idx) {
1155
		if (smmu->smrs)
1156
			arm_smmu_free_smr(smmu, idx);
1157

1158
		cfg->smendx[i] = INVALID_SMENDX;
1159 1160 1161 1162
	}
}

static int arm_smmu_domain_add_master(struct arm_smmu_domain *smmu_domain,
1163
				      struct arm_smmu_master_cfg *cfg)
1164
{
1165
	int i, idx, ret = 0;
1166
	struct arm_smmu_device *smmu = smmu_domain->smmu;
1167 1168 1169 1170 1171 1172 1173 1174
	struct arm_smmu_s2cr *s2cr = smmu->s2crs;
	enum arm_smmu_s2cr_type type = S2CR_TYPE_TRANS;
	u8 cbndx = smmu_domain->cfg.cbndx;

	if (cfg->smendx[0] == INVALID_SMENDX)
		ret = arm_smmu_master_alloc_smes(smmu, cfg);
	if (ret)
		return ret;
1175

1176 1177
	/*
	 * FIXME: This won't be needed once we have IOMMU-backed DMA ops
1178 1179 1180
	 * for all devices behind the SMMU. Note that we need to take
	 * care configuring SMRs for devices both a platform_device and
	 * and a PCI device (i.e. a PCI host controller)
1181 1182
	 */
	if (smmu_domain->domain.type == IOMMU_DOMAIN_DMA)
1183
		type = S2CR_TYPE_BYPASS;
1184

1185
	for_each_cfg_sme(cfg, i, idx) {
1186 1187 1188
		/* Devices in an IOMMU group may already be configured */
		if (type == s2cr[idx].type && cbndx == s2cr[idx].cbndx)
			break;
1189

1190 1191 1192 1193
		s2cr[idx].type = type;
		s2cr[idx].privcfg = S2CR_PRIVCFG_UNPRIV;
		s2cr[idx].cbndx = cbndx;
		arm_smmu_write_s2cr(smmu, idx);
1194
	}
1195
	return 0;
1196 1197
}

1198 1199
static int arm_smmu_attach_dev(struct iommu_domain *domain, struct device *dev)
{
1200
	int ret;
1201
	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1202
	struct arm_smmu_master_cfg *cfg = dev->archdata.iommu;
1203

1204
	if (!cfg) {
1205 1206 1207 1208
		dev_err(dev, "cannot attach to SMMU, is it on the same bus?\n");
		return -ENXIO;
	}

1209
	/* Ensure that the domain is finalised */
1210
	ret = arm_smmu_init_domain_context(domain, cfg->smmu);
1211
	if (ret < 0)
1212 1213
		return ret;

1214
	/*
1215 1216
	 * Sanity check the domain. We don't support domains across
	 * different SMMUs.
1217
	 */
1218
	if (smmu_domain->smmu != cfg->smmu) {
1219 1220
		dev_err(dev,
			"cannot attach to SMMU %s whilst already attached to domain on SMMU %s\n",
1221
			dev_name(smmu_domain->smmu->dev), dev_name(cfg->smmu->dev));
1222
		return -EINVAL;
1223 1224 1225
	}

	/* Looks ok, so add the device to the domain */
1226
	return arm_smmu_domain_add_master(smmu_domain, cfg);
1227 1228 1229
}

static int arm_smmu_map(struct iommu_domain *domain, unsigned long iova,
1230
			phys_addr_t paddr, size_t size, int prot)
1231
{
1232 1233
	int ret;
	unsigned long flags;
1234
	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1235
	struct io_pgtable_ops *ops= smmu_domain->pgtbl_ops;
1236

1237
	if (!ops)
1238 1239
		return -ENODEV;

1240 1241 1242 1243
	spin_lock_irqsave(&smmu_domain->pgtbl_lock, flags);
	ret = ops->map(ops, iova, paddr, size, prot);
	spin_unlock_irqrestore(&smmu_domain->pgtbl_lock, flags);
	return ret;
1244 1245 1246 1247 1248
}

static size_t arm_smmu_unmap(struct iommu_domain *domain, unsigned long iova,
			     size_t size)
{
1249 1250
	size_t ret;
	unsigned long flags;
1251
	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1252
	struct io_pgtable_ops *ops= smmu_domain->pgtbl_ops;
1253

1254 1255 1256 1257 1258 1259 1260
	if (!ops)
		return 0;

	spin_lock_irqsave(&smmu_domain->pgtbl_lock, flags);
	ret = ops->unmap(ops, iova, size);
	spin_unlock_irqrestore(&smmu_domain->pgtbl_lock, flags);
	return ret;
1261 1262
}

1263 1264 1265
static phys_addr_t arm_smmu_iova_to_phys_hard(struct iommu_domain *domain,
					      dma_addr_t iova)
{
1266
	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1267 1268 1269 1270 1271 1272 1273
	struct arm_smmu_device *smmu = smmu_domain->smmu;
	struct arm_smmu_cfg *cfg = &smmu_domain->cfg;
	struct io_pgtable_ops *ops= smmu_domain->pgtbl_ops;
	struct device *dev = smmu->dev;
	void __iomem *cb_base;
	u32 tmp;
	u64 phys;
1274
	unsigned long va;
1275 1276 1277

	cb_base = ARM_SMMU_CB_BASE(smmu) + ARM_SMMU_CB(smmu, cfg->cbndx);

1278 1279 1280
	/* ATS1 registers can only be written atomically */
	va = iova & ~0xfffUL;
	if (smmu->version == ARM_SMMU_V2)
1281 1282
		smmu_write_atomic_lq(va, cb_base + ARM_SMMU_CB_ATS1PR);
	else /* Register is only 32-bit in v1 */
1283
		writel_relaxed(va, cb_base + ARM_SMMU_CB_ATS1PR);
1284 1285 1286 1287

	if (readl_poll_timeout_atomic(cb_base + ARM_SMMU_CB_ATSR, tmp,
				      !(tmp & ATSR_ACTIVE), 5, 50)) {
		dev_err(dev,
1288
			"iova to phys timed out on %pad. Falling back to software table walk.\n",
1289 1290 1291 1292
			&iova);
		return ops->iova_to_phys(ops, iova);
	}

1293
	phys = readq_relaxed(cb_base + ARM_SMMU_CB_PAR);
1294 1295 1296 1297 1298 1299 1300 1301 1302
	if (phys & CB_PAR_F) {
		dev_err(dev, "translation fault!\n");
		dev_err(dev, "PAR = 0x%llx\n", phys);
		return 0;
	}

	return (phys & GENMASK_ULL(39, 12)) | (iova & 0xfff);
}

1303
static phys_addr_t arm_smmu_iova_to_phys(struct iommu_domain *domain,
1304
					dma_addr_t iova)
1305
{
1306 1307
	phys_addr_t ret;
	unsigned long flags;
1308
	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1309
	struct io_pgtable_ops *ops= smmu_domain->pgtbl_ops;
1310

1311
	if (!ops)
1312
		return 0;
1313

1314
	spin_lock_irqsave(&smmu_domain->pgtbl_lock, flags);
1315 1316
	if (smmu_domain->smmu->features & ARM_SMMU_FEAT_TRANS_OPS &&
			smmu_domain->stage == ARM_SMMU_DOMAIN_S1) {
1317
		ret = arm_smmu_iova_to_phys_hard(domain, iova);
1318
	} else {
1319
		ret = ops->iova_to_phys(ops, iova);
1320 1321
	}

1322
	spin_unlock_irqrestore(&smmu_domain->pgtbl_lock, flags);
1323

1324
	return ret;
1325 1326
}

1327
static bool arm_smmu_capable(enum iommu_cap cap)
1328
{
1329 1330
	switch (cap) {
	case IOMMU_CAP_CACHE_COHERENCY:
1331 1332 1333 1334 1335
		/*
		 * Return true here as the SMMU can always send out coherent
		 * requests.
		 */
		return true;
1336
	case IOMMU_CAP_INTR_REMAP:
1337
		return true; /* MSIs are just memory writes */
1338 1339
	case IOMMU_CAP_NOEXEC:
		return true;
1340
	default:
1341
		return false;
1342
	}
1343 1344
}

1345
static int arm_smmu_add_device(struct device *dev)
1346
{
1347
	struct arm_smmu_master_cfg *cfg;
1348 1349
	struct iommu_group *group;
	int i, ret;
1350

1351 1352 1353 1354
	ret = arm_smmu_register_legacy_master(dev);
	cfg = dev->archdata.iommu;
	if (ret)
		goto out_free;
1355

1356 1357 1358
	ret = -EINVAL;
	for (i = 0; i < cfg->num_streamids; i++) {
		u16 sid = cfg->streamids[i];
1359

1360 1361 1362 1363 1364
		if (sid & ~cfg->smmu->streamid_mask) {
			dev_err(dev, "stream ID 0x%x out of range for SMMU (0x%x)\n",
				sid, cfg->smmu->streamid_mask);
			goto out_free;
		}
1365 1366
		cfg->smendx[i] = INVALID_SMENDX;
	}
1367

1368
	group = iommu_group_get_for_dev(dev);
1369 1370 1371 1372
	if (IS_ERR(group)) {
		ret = PTR_ERR(group);
		goto out_free;
	}
1373
	iommu_group_put(group);
1374
	return 0;
1375 1376 1377 1378 1379

out_free:
	kfree(cfg);
	dev->archdata.iommu = NULL;
	return ret;
1380 1381
}

1382 1383
static void arm_smmu_remove_device(struct device *dev)
{
1384
	struct arm_smmu_master_cfg *cfg = dev->archdata.iommu;
1385

1386 1387
	if (!cfg)
		return;
1388

1389
	arm_smmu_master_free_smes(cfg);
1390
	iommu_group_remove_device(dev);
1391 1392
	kfree(cfg);
	dev->archdata.iommu = NULL;
1393 1394
}

1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406
static struct iommu_group *arm_smmu_device_group(struct device *dev)
{
	struct iommu_group *group;

	if (dev_is_pci(dev))
		group = pci_device_group(dev);
	else
		group = generic_device_group(dev);

	return group;
}

1407 1408 1409
static int arm_smmu_domain_get_attr(struct iommu_domain *domain,
				    enum iommu_attr attr, void *data)
{
1410
	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423

	switch (attr) {
	case DOMAIN_ATTR_NESTING:
		*(int *)data = (smmu_domain->stage == ARM_SMMU_DOMAIN_NESTED);
		return 0;
	default:
		return -ENODEV;
	}
}

static int arm_smmu_domain_set_attr(struct iommu_domain *domain,
				    enum iommu_attr attr, void *data)
{
1424
	int ret = 0;
1425
	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
1426

1427 1428
	mutex_lock(&smmu_domain->init_mutex);

1429 1430
	switch (attr) {
	case DOMAIN_ATTR_NESTING:
1431 1432 1433 1434 1435
		if (smmu_domain->smmu) {
			ret = -EPERM;
			goto out_unlock;
		}

1436 1437 1438 1439 1440
		if (*(int *)data)
			smmu_domain->stage = ARM_SMMU_DOMAIN_NESTED;
		else
			smmu_domain->stage = ARM_SMMU_DOMAIN_S1;

1441
		break;
1442
	default:
1443
		ret = -ENODEV;
1444
	}
1445 1446 1447 1448

out_unlock:
	mutex_unlock(&smmu_domain->init_mutex);
	return ret;
1449 1450
}

1451
static struct iommu_ops arm_smmu_ops = {
1452
	.capable		= arm_smmu_capable,
1453 1454
	.domain_alloc		= arm_smmu_domain_alloc,
	.domain_free		= arm_smmu_domain_free,
1455 1456 1457
	.attach_dev		= arm_smmu_attach_dev,
	.map			= arm_smmu_map,
	.unmap			= arm_smmu_unmap,
1458
	.map_sg			= default_iommu_map_sg,
1459 1460 1461
	.iova_to_phys		= arm_smmu_iova_to_phys,
	.add_device		= arm_smmu_add_device,
	.remove_device		= arm_smmu_remove_device,
1462
	.device_group		= arm_smmu_device_group,
1463 1464
	.domain_get_attr	= arm_smmu_domain_get_attr,
	.domain_set_attr	= arm_smmu_domain_set_attr,
1465
	.pgsize_bitmap		= -1UL, /* Restricted during device attach */
1466 1467 1468 1469 1470
};

static void arm_smmu_device_reset(struct arm_smmu_device *smmu)
{
	void __iomem *gr0_base = ARM_SMMU_GR0(smmu);
1471
	void __iomem *cb_base;
1472
	int i;
1473
	u32 reg, major;
1474

1475 1476 1477
	/* clear global FSR */
	reg = readl_relaxed(ARM_SMMU_GR0_NS(smmu) + ARM_SMMU_GR0_sGFSR);
	writel(reg, ARM_SMMU_GR0_NS(smmu) + ARM_SMMU_GR0_sGFSR);
1478

1479 1480 1481 1482
	/*
	 * Reset stream mapping groups: Initial values mark all SMRn as
	 * invalid and all S2CRn as bypass unless overridden.
	 */
1483 1484
	for (i = 0; i < smmu->num_mapping_groups; ++i)
		arm_smmu_write_sme(smmu, i);
1485

1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498
	/*
	 * Before clearing ARM_MMU500_ACTLR_CPRE, need to
	 * clear CACHE_LOCK bit of ACR first. And, CACHE_LOCK
	 * bit is only present in MMU-500r2 onwards.
	 */
	reg = readl_relaxed(gr0_base + ARM_SMMU_GR0_ID7);
	major = (reg >> ID7_MAJOR_SHIFT) & ID7_MAJOR_MASK;
	if ((smmu->model == ARM_MMU500) && (major >= 2)) {
		reg = readl_relaxed(gr0_base + ARM_SMMU_GR0_sACR);
		reg &= ~ARM_MMU500_ACR_CACHE_LOCK;
		writel_relaxed(reg, gr0_base + ARM_SMMU_GR0_sACR);
	}

1499 1500 1501 1502 1503
	/* Make sure all context banks are disabled and clear CB_FSR  */
	for (i = 0; i < smmu->num_context_banks; ++i) {
		cb_base = ARM_SMMU_CB_BASE(smmu) + ARM_SMMU_CB(smmu, i);
		writel_relaxed(0, cb_base + ARM_SMMU_CB_SCTLR);
		writel_relaxed(FSR_FAULT, cb_base + ARM_SMMU_CB_FSR);
1504 1505 1506 1507 1508 1509 1510 1511 1512
		/*
		 * Disable MMU-500's not-particularly-beneficial next-page
		 * prefetcher for the sake of errata #841119 and #826419.
		 */
		if (smmu->model == ARM_MMU500) {
			reg = readl_relaxed(cb_base + ARM_SMMU_CB_ACTLR);
			reg &= ~ARM_MMU500_ACTLR_CPRE;
			writel_relaxed(reg, cb_base + ARM_SMMU_CB_ACTLR);
		}
1513
	}
1514

1515 1516 1517 1518
	/* Invalidate the TLB, just in case */
	writel_relaxed(0, gr0_base + ARM_SMMU_GR0_TLBIALLH);
	writel_relaxed(0, gr0_base + ARM_SMMU_GR0_TLBIALLNSNH);

1519
	reg = readl_relaxed(ARM_SMMU_GR0_NS(smmu) + ARM_SMMU_GR0_sCR0);
1520

1521
	/* Enable fault reporting */
1522
	reg |= (sCR0_GFRE | sCR0_GFIE | sCR0_GCFGFRE | sCR0_GCFGFIE);
1523 1524

	/* Disable TLB broadcasting. */
1525
	reg |= (sCR0_VMIDPNE | sCR0_PTM);
1526

1527 1528 1529 1530 1531 1532
	/* Enable client access, handling unmatched streams as appropriate */
	reg &= ~sCR0_CLIENTPD;
	if (disable_bypass)
		reg |= sCR0_USFCFG;
	else
		reg &= ~sCR0_USFCFG;
1533 1534

	/* Disable forced broadcasting */
1535
	reg &= ~sCR0_FB;
1536 1537

	/* Don't upgrade barriers */
1538
	reg &= ~(sCR0_BSU_MASK << sCR0_BSU_SHIFT);
1539

1540 1541 1542
	if (smmu->features & ARM_SMMU_FEAT_VMID16)
		reg |= sCR0_VMID16EN;

1543
	/* Push the button */
1544
	__arm_smmu_tlb_sync(smmu);
1545
	writel(reg, ARM_SMMU_GR0_NS(smmu) + ARM_SMMU_GR0_sCR0);
1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571
}

static int arm_smmu_id_size_to_bits(int size)
{
	switch (size) {
	case 0:
		return 32;
	case 1:
		return 36;
	case 2:
		return 40;
	case 3:
		return 42;
	case 4:
		return 44;
	case 5:
	default:
		return 48;
	}
}

static int arm_smmu_device_cfg_probe(struct arm_smmu_device *smmu)
{
	unsigned long size;
	void __iomem *gr0_base = ARM_SMMU_GR0(smmu);
	u32 id;
1572
	bool cttw_dt, cttw_reg;
1573
	int i;
1574 1575

	dev_notice(smmu->dev, "probing hardware configuration...\n");
1576 1577
	dev_notice(smmu->dev, "SMMUv%d with:\n",
			smmu->version == ARM_SMMU_V2 ? 2 : 1);
1578 1579 1580

	/* ID0 */
	id = readl_relaxed(gr0_base + ARM_SMMU_GR0_ID0);
1581 1582 1583 1584 1585 1586 1587

	/* Restrict available stages based on module parameter */
	if (force_stage == 1)
		id &= ~(ID0_S2TS | ID0_NTS);
	else if (force_stage == 2)
		id &= ~(ID0_S1TS | ID0_NTS);

1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603
	if (id & ID0_S1TS) {
		smmu->features |= ARM_SMMU_FEAT_TRANS_S1;
		dev_notice(smmu->dev, "\tstage 1 translation\n");
	}

	if (id & ID0_S2TS) {
		smmu->features |= ARM_SMMU_FEAT_TRANS_S2;
		dev_notice(smmu->dev, "\tstage 2 translation\n");
	}

	if (id & ID0_NTS) {
		smmu->features |= ARM_SMMU_FEAT_TRANS_NESTED;
		dev_notice(smmu->dev, "\tnested translation\n");
	}

	if (!(smmu->features &
1604
		(ARM_SMMU_FEAT_TRANS_S1 | ARM_SMMU_FEAT_TRANS_S2))) {
1605 1606 1607 1608
		dev_err(smmu->dev, "\tno translation support!\n");
		return -ENODEV;
	}

1609 1610
	if ((id & ID0_S1TS) &&
		((smmu->version < ARM_SMMU_V2) || !(id & ID0_ATOSNS))) {
1611 1612 1613 1614
		smmu->features |= ARM_SMMU_FEAT_TRANS_OPS;
		dev_notice(smmu->dev, "\taddress translation ops\n");
	}

1615 1616 1617 1618 1619 1620 1621 1622 1623
	/*
	 * In order for DMA API calls to work properly, we must defer to what
	 * the DT says about coherency, regardless of what the hardware claims.
	 * Fortunately, this also opens up a workaround for systems where the
	 * ID register value has ended up configured incorrectly.
	 */
	cttw_dt = of_dma_is_coherent(smmu->dev->of_node);
	cttw_reg = !!(id & ID0_CTTW);
	if (cttw_dt)
1624
		smmu->features |= ARM_SMMU_FEAT_COHERENT_WALK;
1625 1626 1627 1628 1629 1630
	if (cttw_dt || cttw_reg)
		dev_notice(smmu->dev, "\t%scoherent table walk\n",
			   cttw_dt ? "" : "non-");
	if (cttw_dt != cttw_reg)
		dev_notice(smmu->dev,
			   "\t(IDR0.CTTW overridden by dma-coherent property)\n");
1631

1632 1633 1634
	/* Max. number of entries we have for stream matching/indexing */
	size = 1 << ((id >> ID0_NUMSIDB_SHIFT) & ID0_NUMSIDB_MASK);
	smmu->streamid_mask = size - 1;
1635
	if (id & ID0_SMS) {
1636
		u32 smr;
1637 1638

		smmu->features |= ARM_SMMU_FEAT_STREAM_MATCH;
1639 1640
		size = (id >> ID0_NUMSMRG_SHIFT) & ID0_NUMSMRG_MASK;
		if (size == 0) {
1641 1642 1643 1644 1645
			dev_err(smmu->dev,
				"stream-matching supported, but no SMRs present!\n");
			return -ENODEV;
		}

1646 1647 1648 1649 1650 1651
		/*
		 * SMR.ID bits may not be preserved if the corresponding MASK
		 * bits are set, so check each one separately. We can reject
		 * masters later if they try to claim IDs outside these masks.
		 */
		smr = smmu->streamid_mask << SMR_ID_SHIFT;
1652 1653
		writel_relaxed(smr, gr0_base + ARM_SMMU_GR0_SMR(0));
		smr = readl_relaxed(gr0_base + ARM_SMMU_GR0_SMR(0));
1654
		smmu->streamid_mask = smr >> SMR_ID_SHIFT;
1655

1656 1657 1658 1659
		smr = smmu->streamid_mask << SMR_MASK_SHIFT;
		writel_relaxed(smr, gr0_base + ARM_SMMU_GR0_SMR(0));
		smr = readl_relaxed(gr0_base + ARM_SMMU_GR0_SMR(0));
		smmu->smr_mask_mask = smr >> SMR_MASK_SHIFT;
1660

1661 1662 1663 1664 1665 1666
		/* Zero-initialised to mark as invalid */
		smmu->smrs = devm_kcalloc(smmu->dev, size, sizeof(*smmu->smrs),
					  GFP_KERNEL);
		if (!smmu->smrs)
			return -ENOMEM;

1667
		dev_notice(smmu->dev,
1668 1669
			   "\tstream matching with %lu register groups, mask 0x%x",
			   size, smmu->smr_mask_mask);
1670
	}
1671 1672 1673 1674 1675 1676 1677 1678
	/* s2cr->type == 0 means translation, so initialise explicitly */
	smmu->s2crs = devm_kmalloc_array(smmu->dev, size, sizeof(*smmu->s2crs),
					 GFP_KERNEL);
	if (!smmu->s2crs)
		return -ENOMEM;
	for (i = 0; i < size; i++)
		smmu->s2crs[i] = s2cr_init_val;

1679
	smmu->num_mapping_groups = size;
1680

1681 1682 1683 1684 1685 1686
	if (smmu->version < ARM_SMMU_V2 || !(id & ID0_PTFS_NO_AARCH32)) {
		smmu->features |= ARM_SMMU_FEAT_FMT_AARCH32_L;
		if (!(id & ID0_PTFS_NO_AARCH32S))
			smmu->features |= ARM_SMMU_FEAT_FMT_AARCH32_S;
	}

1687 1688
	/* ID1 */
	id = readl_relaxed(gr0_base + ARM_SMMU_GR0_ID1);
1689
	smmu->pgshift = (id & ID1_PAGESIZE) ? 16 : 12;
1690

1691
	/* Check for size mismatch of SMMU address space from mapped region */
1692
	size = 1 << (((id >> ID1_NUMPAGENDXB_SHIFT) & ID1_NUMPAGENDXB_MASK) + 1);
1693
	size *= 2 << smmu->pgshift;
1694
	if (smmu->size != size)
1695 1696 1697
		dev_warn(smmu->dev,
			"SMMU address space size (0x%lx) differs from mapped region size (0x%lx)!\n",
			size, smmu->size);
1698

1699
	smmu->num_s2_context_banks = (id >> ID1_NUMS2CB_SHIFT) & ID1_NUMS2CB_MASK;
1700 1701 1702 1703 1704 1705 1706
	smmu->num_context_banks = (id >> ID1_NUMCB_SHIFT) & ID1_NUMCB_MASK;
	if (smmu->num_s2_context_banks > smmu->num_context_banks) {
		dev_err(smmu->dev, "impossible number of S2 context banks!\n");
		return -ENODEV;
	}
	dev_notice(smmu->dev, "\t%u context banks (%u stage-2 only)\n",
		   smmu->num_context_banks, smmu->num_s2_context_banks);
1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717
	/*
	 * Cavium CN88xx erratum #27704.
	 * Ensure ASID and VMID allocation is unique across all SMMUs in
	 * the system.
	 */
	if (smmu->model == CAVIUM_SMMUV2) {
		smmu->cavium_id_base =
			atomic_add_return(smmu->num_context_banks,
					  &cavium_smmu_context_count);
		smmu->cavium_id_base -= smmu->num_context_banks;
	}
1718 1719 1720 1721

	/* ID2 */
	id = readl_relaxed(gr0_base + ARM_SMMU_GR0_ID2);
	size = arm_smmu_id_size_to_bits((id >> ID2_IAS_SHIFT) & ID2_IAS_MASK);
1722
	smmu->ipa_size = size;
1723

1724
	/* The output mask is also applied for bypass */
1725
	size = arm_smmu_id_size_to_bits((id >> ID2_OAS_SHIFT) & ID2_OAS_MASK);
1726
	smmu->pa_size = size;
1727

1728 1729 1730
	if (id & ID2_VMID16)
		smmu->features |= ARM_SMMU_FEAT_VMID16;

1731 1732 1733 1734 1735 1736 1737 1738 1739
	/*
	 * What the page table walker can address actually depends on which
	 * descriptor format is in use, but since a) we don't know that yet,
	 * and b) it can vary per context bank, this will have to do...
	 */
	if (dma_set_mask_and_coherent(smmu->dev, DMA_BIT_MASK(size)))
		dev_warn(smmu->dev,
			 "failed to set DMA mask for table walker\n");

1740
	if (smmu->version < ARM_SMMU_V2) {
1741
		smmu->va_size = smmu->ipa_size;
1742 1743
		if (smmu->version == ARM_SMMU_V1_64K)
			smmu->features |= ARM_SMMU_FEAT_FMT_AARCH64_64K;
1744 1745
	} else {
		size = (id >> ID2_UBS_SHIFT) & ID2_UBS_MASK;
1746 1747
		smmu->va_size = arm_smmu_id_size_to_bits(size);
		if (id & ID2_PTFS_4K)
1748
			smmu->features |= ARM_SMMU_FEAT_FMT_AARCH64_4K;
1749
		if (id & ID2_PTFS_16K)
1750
			smmu->features |= ARM_SMMU_FEAT_FMT_AARCH64_16K;
1751
		if (id & ID2_PTFS_64K)
1752
			smmu->features |= ARM_SMMU_FEAT_FMT_AARCH64_64K;
1753 1754
	}

1755 1756
	/* Now we've corralled the various formats, what'll it do? */
	if (smmu->features & ARM_SMMU_FEAT_FMT_AARCH32_S)
1757
		smmu->pgsize_bitmap |= SZ_4K | SZ_64K | SZ_1M | SZ_16M;
1758 1759
	if (smmu->features &
	    (ARM_SMMU_FEAT_FMT_AARCH32_L | ARM_SMMU_FEAT_FMT_AARCH64_4K))
1760
		smmu->pgsize_bitmap |= SZ_4K | SZ_2M | SZ_1G;
1761
	if (smmu->features & ARM_SMMU_FEAT_FMT_AARCH64_16K)
1762
		smmu->pgsize_bitmap |= SZ_16K | SZ_32M;
1763
	if (smmu->features & ARM_SMMU_FEAT_FMT_AARCH64_64K)
1764 1765 1766 1767 1768 1769 1770 1771
		smmu->pgsize_bitmap |= SZ_64K | SZ_512M;

	if (arm_smmu_ops.pgsize_bitmap == -1UL)
		arm_smmu_ops.pgsize_bitmap = smmu->pgsize_bitmap;
	else
		arm_smmu_ops.pgsize_bitmap |= smmu->pgsize_bitmap;
	dev_notice(smmu->dev, "\tSupported page sizes: 0x%08lx\n",
		   smmu->pgsize_bitmap);
1772

1773

1774 1775
	if (smmu->features & ARM_SMMU_FEAT_TRANS_S1)
		dev_notice(smmu->dev, "\tStage-1: %lu-bit VA -> %lu-bit IPA\n",
1776
			   smmu->va_size, smmu->ipa_size);
1777 1778 1779

	if (smmu->features & ARM_SMMU_FEAT_TRANS_S2)
		dev_notice(smmu->dev, "\tStage-2: %lu-bit IPA -> %lu-bit PA\n",
1780
			   smmu->ipa_size, smmu->pa_size);
1781

1782 1783 1784
	return 0;
}

1785 1786 1787 1788 1789 1790 1791 1792 1793 1794
struct arm_smmu_match_data {
	enum arm_smmu_arch_version version;
	enum arm_smmu_implementation model;
};

#define ARM_SMMU_MATCH_DATA(name, ver, imp)	\
static struct arm_smmu_match_data name = { .version = ver, .model = imp }

ARM_SMMU_MATCH_DATA(smmu_generic_v1, ARM_SMMU_V1, GENERIC_SMMU);
ARM_SMMU_MATCH_DATA(smmu_generic_v2, ARM_SMMU_V2, GENERIC_SMMU);
1795
ARM_SMMU_MATCH_DATA(arm_mmu401, ARM_SMMU_V1_64K, GENERIC_SMMU);
1796
ARM_SMMU_MATCH_DATA(arm_mmu500, ARM_SMMU_V2, ARM_MMU500);
1797
ARM_SMMU_MATCH_DATA(cavium_smmuv2, ARM_SMMU_V2, CAVIUM_SMMUV2);
1798

1799
static const struct of_device_id arm_smmu_of_match[] = {
1800 1801 1802
	{ .compatible = "arm,smmu-v1", .data = &smmu_generic_v1 },
	{ .compatible = "arm,smmu-v2", .data = &smmu_generic_v2 },
	{ .compatible = "arm,mmu-400", .data = &smmu_generic_v1 },
1803
	{ .compatible = "arm,mmu-401", .data = &arm_mmu401 },
1804
	{ .compatible = "arm,mmu-500", .data = &arm_mmu500 },
1805
	{ .compatible = "cavium,smmu-v2", .data = &cavium_smmuv2 },
1806 1807 1808 1809
	{ },
};
MODULE_DEVICE_TABLE(of, arm_smmu_of_match);

1810 1811
static int arm_smmu_device_dt_probe(struct platform_device *pdev)
{
1812
	const struct arm_smmu_match_data *data;
1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824
	struct resource *res;
	struct arm_smmu_device *smmu;
	struct device *dev = &pdev->dev;
	int num_irqs, i, err;

	smmu = devm_kzalloc(dev, sizeof(*smmu), GFP_KERNEL);
	if (!smmu) {
		dev_err(dev, "failed to allocate arm_smmu_device\n");
		return -ENOMEM;
	}
	smmu->dev = dev;

1825
	data = of_device_get_match_data(dev);
1826 1827
	smmu->version = data->version;
	smmu->model = data->model;
1828

1829
	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
1830 1831 1832
	smmu->base = devm_ioremap_resource(dev, res);
	if (IS_ERR(smmu->base))
		return PTR_ERR(smmu->base);
1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847
	smmu->size = resource_size(res);

	if (of_property_read_u32(dev->of_node, "#global-interrupts",
				 &smmu->num_global_irqs)) {
		dev_err(dev, "missing #global-interrupts property\n");
		return -ENODEV;
	}

	num_irqs = 0;
	while ((res = platform_get_resource(pdev, IORESOURCE_IRQ, num_irqs))) {
		num_irqs++;
		if (num_irqs > smmu->num_global_irqs)
			smmu->num_context_irqs++;
	}

1848 1849 1850 1851
	if (!smmu->num_context_irqs) {
		dev_err(dev, "found %d interrupts but expected at least %d\n",
			num_irqs, smmu->num_global_irqs + 1);
		return -ENODEV;
1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862
	}

	smmu->irqs = devm_kzalloc(dev, sizeof(*smmu->irqs) * num_irqs,
				  GFP_KERNEL);
	if (!smmu->irqs) {
		dev_err(dev, "failed to allocate %d irqs\n", num_irqs);
		return -ENOMEM;
	}

	for (i = 0; i < num_irqs; ++i) {
		int irq = platform_get_irq(pdev, i);
1863

1864 1865 1866 1867 1868 1869 1870
		if (irq < 0) {
			dev_err(dev, "failed to get irq index %d\n", i);
			return -ENODEV;
		}
		smmu->irqs[i] = irq;
	}

1871 1872 1873 1874
	err = arm_smmu_device_cfg_probe(smmu);
	if (err)
		return err;

1875 1876
	parse_driver_options(smmu);

1877
	if (smmu->version == ARM_SMMU_V2 &&
1878 1879 1880 1881
	    smmu->num_context_banks != smmu->num_context_irqs) {
		dev_err(dev,
			"found only %d context interrupt(s) but %d required\n",
			smmu->num_context_irqs, smmu->num_context_banks);
1882
		return -ENODEV;
1883 1884 1885
	}

	for (i = 0; i < smmu->num_global_irqs; ++i) {
1886 1887 1888 1889 1890
		err = devm_request_irq(smmu->dev, smmu->irqs[i],
				       arm_smmu_global_fault,
				       IRQF_SHARED,
				       "arm-smmu global fault",
				       smmu);
1891 1892 1893
		if (err) {
			dev_err(dev, "failed to request global IRQ %d (%u)\n",
				i, smmu->irqs[i]);
1894
			return err;
1895 1896 1897
		}
	}

1898
	platform_set_drvdata(pdev, smmu);
1899
	arm_smmu_device_reset(smmu);
1900 1901 1902 1903 1904
	return 0;
}

static int arm_smmu_device_remove(struct platform_device *pdev)
{
1905
	struct arm_smmu_device *smmu = platform_get_drvdata(pdev);
1906 1907 1908 1909

	if (!smmu)
		return -ENODEV;

1910
	if (!bitmap_empty(smmu->context_map, ARM_SMMU_MAX_CBS))
1911
		dev_err(&pdev->dev, "removing device with active domains!\n");
1912 1913

	/* Turn the thing off */
1914
	writel(sCR0_CLIENTPD, ARM_SMMU_GR0_NS(smmu) + ARM_SMMU_GR0_sCR0);
1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928
	return 0;
}

static struct platform_driver arm_smmu_driver = {
	.driver	= {
		.name		= "arm-smmu",
		.of_match_table	= of_match_ptr(arm_smmu_of_match),
	},
	.probe	= arm_smmu_device_dt_probe,
	.remove	= arm_smmu_device_remove,
};

static int __init arm_smmu_init(void)
{
1929
	struct device_node *np;
1930 1931
	int ret;

1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942
	/*
	 * Play nice with systems that don't have an ARM SMMU by checking that
	 * an ARM SMMU exists in the system before proceeding with the driver
	 * and IOMMU bus operation registration.
	 */
	np = of_find_matching_node(NULL, arm_smmu_of_match);
	if (!np)
		return 0;

	of_node_put(np);

1943 1944 1945 1946 1947
	ret = platform_driver_register(&arm_smmu_driver);
	if (ret)
		return ret;

	/* Oh, for a proper bus abstraction */
1948
	if (!iommu_present(&platform_bus_type))
1949 1950
		bus_set_iommu(&platform_bus_type, &arm_smmu_ops);

1951
#ifdef CONFIG_ARM_AMBA
1952
	if (!iommu_present(&amba_bustype))
1953
		bus_set_iommu(&amba_bustype, &arm_smmu_ops);
1954
#endif
1955

1956
#ifdef CONFIG_PCI
1957 1958
	if (!iommu_present(&pci_bus_type)) {
		pci_request_acs();
1959
		bus_set_iommu(&pci_bus_type, &arm_smmu_ops);
1960
	}
1961 1962
#endif

1963 1964 1965 1966 1967 1968 1969 1970
	return 0;
}

static void __exit arm_smmu_exit(void)
{
	return platform_driver_unregister(&arm_smmu_driver);
}

1971
subsys_initcall(arm_smmu_init);
1972 1973 1974 1975 1976
module_exit(arm_smmu_exit);

MODULE_DESCRIPTION("IOMMU API for ARM architected SMMU implementations");
MODULE_AUTHOR("Will Deacon <will.deacon@arm.com>");
MODULE_LICENSE("GPL v2");