io-pgtable-arm-v7s.c 26.0 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34
/*
 * CPU-agnostic ARM page table allocator.
 *
 * ARMv7 Short-descriptor format, supporting
 * - Basic memory attributes
 * - Simplified access permissions (AP[2:1] model)
 * - Backwards-compatible TEX remap
 * - Large pages/supersections (if indicated by the caller)
 *
 * Not supporting:
 * - Legacy access permissions (AP[2:0] model)
 *
 * Almost certainly never supporting:
 * - PXN
 * - Domains
 *
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License version 2 as
 * published by the Free Software Foundation.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program.  If not, see <http://www.gnu.org/licenses/>.
 *
 * Copyright (C) 2014-2015 ARM Limited
 * Copyright (c) 2014-2015 MediaTek Inc.
 */

#define pr_fmt(fmt)	"arm-v7s io-pgtable: " fmt

35
#include <linux/atomic.h>
36 37
#include <linux/dma-mapping.h>
#include <linux/gfp.h>
38
#include <linux/io-pgtable.h>
39 40 41 42 43
#include <linux/iommu.h>
#include <linux/kernel.h>
#include <linux/kmemleak.h>
#include <linux/sizes.h>
#include <linux/slab.h>
44
#include <linux/spinlock.h>
45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95
#include <linux/types.h>

#include <asm/barrier.h>

/* Struct accessors */
#define io_pgtable_to_data(x)						\
	container_of((x), struct arm_v7s_io_pgtable, iop)

#define io_pgtable_ops_to_data(x)					\
	io_pgtable_to_data(io_pgtable_ops_to_pgtable(x))

/*
 * We have 32 bits total; 12 bits resolved at level 1, 8 bits at level 2,
 * and 12 bits in a page. With some carefully-chosen coefficients we can
 * hide the ugly inconsistencies behind these macros and at least let the
 * rest of the code pretend to be somewhat sane.
 */
#define ARM_V7S_ADDR_BITS		32
#define _ARM_V7S_LVL_BITS(lvl)		(16 - (lvl) * 4)
#define ARM_V7S_LVL_SHIFT(lvl)		(ARM_V7S_ADDR_BITS - (4 + 8 * (lvl)))
#define ARM_V7S_TABLE_SHIFT		10

#define ARM_V7S_PTES_PER_LVL(lvl)	(1 << _ARM_V7S_LVL_BITS(lvl))
#define ARM_V7S_TABLE_SIZE(lvl)						\
	(ARM_V7S_PTES_PER_LVL(lvl) * sizeof(arm_v7s_iopte))

#define ARM_V7S_BLOCK_SIZE(lvl)		(1UL << ARM_V7S_LVL_SHIFT(lvl))
#define ARM_V7S_LVL_MASK(lvl)		((u32)(~0U << ARM_V7S_LVL_SHIFT(lvl)))
#define ARM_V7S_TABLE_MASK		((u32)(~0U << ARM_V7S_TABLE_SHIFT))
#define _ARM_V7S_IDX_MASK(lvl)		(ARM_V7S_PTES_PER_LVL(lvl) - 1)
#define ARM_V7S_LVL_IDX(addr, lvl)	({				\
	int _l = lvl;							\
	((u32)(addr) >> ARM_V7S_LVL_SHIFT(_l)) & _ARM_V7S_IDX_MASK(_l); \
})

/*
 * Large page/supersection entries are effectively a block of 16 page/section
 * entries, along the lines of the LPAE contiguous hint, but all with the
 * same output address. For want of a better common name we'll call them
 * "contiguous" versions of their respective page/section entries here, but
 * noting the distinction (WRT to TLB maintenance) that they represent *one*
 * entry repeated 16 times, not 16 separate entries (as in the LPAE case).
 */
#define ARM_V7S_CONT_PAGES		16

/* PTE type bits: these are all mixed up with XN/PXN bits in most cases */
#define ARM_V7S_PTE_TYPE_TABLE		0x1
#define ARM_V7S_PTE_TYPE_PAGE		0x2
#define ARM_V7S_PTE_TYPE_CONT_PAGE	0x1

#define ARM_V7S_PTE_IS_VALID(pte)	(((pte) & 0x3) != 0)
96 97
#define ARM_V7S_PTE_IS_TABLE(pte, lvl) \
	((lvl) == 1 && (((pte) & 0x3) == ARM_V7S_PTE_TYPE_TABLE))
98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125

/* Page table bits */
#define ARM_V7S_ATTR_XN(lvl)		BIT(4 * (2 - (lvl)))
#define ARM_V7S_ATTR_B			BIT(2)
#define ARM_V7S_ATTR_C			BIT(3)
#define ARM_V7S_ATTR_NS_TABLE		BIT(3)
#define ARM_V7S_ATTR_NS_SECTION		BIT(19)

#define ARM_V7S_CONT_SECTION		BIT(18)
#define ARM_V7S_CONT_PAGE_XN_SHIFT	15

/*
 * The attribute bits are consistently ordered*, but occupy bits [17:10] of
 * a level 1 PTE vs. bits [11:4] at level 2. Thus we define the individual
 * fields relative to that 8-bit block, plus a total shift relative to the PTE.
 */
#define ARM_V7S_ATTR_SHIFT(lvl)		(16 - (lvl) * 6)

#define ARM_V7S_ATTR_MASK		0xff
#define ARM_V7S_ATTR_AP0		BIT(0)
#define ARM_V7S_ATTR_AP1		BIT(1)
#define ARM_V7S_ATTR_AP2		BIT(5)
#define ARM_V7S_ATTR_S			BIT(6)
#define ARM_V7S_ATTR_NG			BIT(7)
#define ARM_V7S_TEX_SHIFT		2
#define ARM_V7S_TEX_MASK		0x7
#define ARM_V7S_ATTR_TEX(val)		(((val) & ARM_V7S_TEX_MASK) << ARM_V7S_TEX_SHIFT)

126 127
#define ARM_V7S_ATTR_MTK_4GB		BIT(9) /* MTK extend it for 4GB mode */

128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162
/* *well, except for TEX on level 2 large pages, of course :( */
#define ARM_V7S_CONT_PAGE_TEX_SHIFT	6
#define ARM_V7S_CONT_PAGE_TEX_MASK	(ARM_V7S_TEX_MASK << ARM_V7S_CONT_PAGE_TEX_SHIFT)

/* Simplified access permissions */
#define ARM_V7S_PTE_AF			ARM_V7S_ATTR_AP0
#define ARM_V7S_PTE_AP_UNPRIV		ARM_V7S_ATTR_AP1
#define ARM_V7S_PTE_AP_RDONLY		ARM_V7S_ATTR_AP2

/* Register bits */
#define ARM_V7S_RGN_NC			0
#define ARM_V7S_RGN_WBWA		1
#define ARM_V7S_RGN_WT			2
#define ARM_V7S_RGN_WB			3

#define ARM_V7S_PRRR_TYPE_DEVICE	1
#define ARM_V7S_PRRR_TYPE_NORMAL	2
#define ARM_V7S_PRRR_TR(n, type)	(((type) & 0x3) << ((n) * 2))
#define ARM_V7S_PRRR_DS0		BIT(16)
#define ARM_V7S_PRRR_DS1		BIT(17)
#define ARM_V7S_PRRR_NS0		BIT(18)
#define ARM_V7S_PRRR_NS1		BIT(19)
#define ARM_V7S_PRRR_NOS(n)		BIT((n) + 24)

#define ARM_V7S_NMRR_IR(n, attr)	(((attr) & 0x3) << ((n) * 2))
#define ARM_V7S_NMRR_OR(n, attr)	(((attr) & 0x3) << ((n) * 2 + 16))

#define ARM_V7S_TTBR_S			BIT(1)
#define ARM_V7S_TTBR_NOS		BIT(5)
#define ARM_V7S_TTBR_ORGN_ATTR(attr)	(((attr) & 0x3) << 3)
#define ARM_V7S_TTBR_IRGN_ATTR(attr)					\
	((((attr) & 0x1) << 6) | (((attr) & 0x2) >> 1))

#define ARM_V7S_TCR_PD1			BIT(5)

163 164 165 166 167 168 169 170
#ifdef CONFIG_ZONE_DMA32
#define ARM_V7S_TABLE_GFP_DMA GFP_DMA32
#define ARM_V7S_TABLE_SLAB_FLAGS SLAB_CACHE_DMA32
#else
#define ARM_V7S_TABLE_GFP_DMA GFP_DMA
#define ARM_V7S_TABLE_SLAB_FLAGS SLAB_CACHE_DMA
#endif

171 172 173 174 175 176 177 178 179
typedef u32 arm_v7s_iopte;

static bool selftest_running;

struct arm_v7s_io_pgtable {
	struct io_pgtable	iop;

	arm_v7s_iopte		*pgd;
	struct kmem_cache	*l2_tables;
180
	spinlock_t		split_lock;
181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199
};

static dma_addr_t __arm_v7s_dma_addr(void *pages)
{
	return (dma_addr_t)virt_to_phys(pages);
}

static arm_v7s_iopte *iopte_deref(arm_v7s_iopte pte, int lvl)
{
	if (ARM_V7S_PTE_IS_TABLE(pte, lvl))
		pte &= ARM_V7S_TABLE_MASK;
	else
		pte &= ARM_V7S_LVL_MASK(lvl);
	return phys_to_virt(pte);
}

static void *__arm_v7s_alloc_table(int lvl, gfp_t gfp,
				   struct arm_v7s_io_pgtable *data)
{
200 201
	struct io_pgtable_cfg *cfg = &data->iop.cfg;
	struct device *dev = cfg->iommu_dev;
202
	phys_addr_t phys;
203 204 205 206 207
	dma_addr_t dma;
	size_t size = ARM_V7S_TABLE_SIZE(lvl);
	void *table = NULL;

	if (lvl == 1)
208 209
		table = (void *)__get_free_pages(
			__GFP_ZERO | ARM_V7S_TABLE_GFP_DMA, get_order(size));
210
	else if (lvl == 2)
211
		table = kmem_cache_zalloc(data->l2_tables, gfp);
212
	phys = virt_to_phys(table);
213
	if (phys != (arm_v7s_iopte)phys) {
214
		/* Doesn't fit in PTE */
215
		dev_err(dev, "Page table does not fit in PTE: %pa", &phys);
216
		goto out_free;
217
	}
218
	if (table && !(cfg->quirks & IO_PGTABLE_QUIRK_NO_DMA)) {
219 220 221 222 223 224 225 226
		dma = dma_map_single(dev, table, size, DMA_TO_DEVICE);
		if (dma_mapping_error(dev, dma))
			goto out_free;
		/*
		 * We depend on the IOMMU being able to work with any physical
		 * address directly, so if the DMA layer suggests otherwise by
		 * translating or truncating them, that bodes very badly...
		 */
227
		if (dma != phys)
228 229
			goto out_unmap;
	}
230 231
	if (lvl == 2)
		kmemleak_ignore(table);
232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247
	return table;

out_unmap:
	dev_err(dev, "Cannot accommodate DMA translation for IOMMU page tables\n");
	dma_unmap_single(dev, dma, size, DMA_TO_DEVICE);
out_free:
	if (lvl == 1)
		free_pages((unsigned long)table, get_order(size));
	else
		kmem_cache_free(data->l2_tables, table);
	return NULL;
}

static void __arm_v7s_free_table(void *table, int lvl,
				 struct arm_v7s_io_pgtable *data)
{
248 249
	struct io_pgtable_cfg *cfg = &data->iop.cfg;
	struct device *dev = cfg->iommu_dev;
250 251
	size_t size = ARM_V7S_TABLE_SIZE(lvl);

252
	if (!(cfg->quirks & IO_PGTABLE_QUIRK_NO_DMA))
253 254 255 256 257 258 259 260 261 262 263
		dma_unmap_single(dev, __arm_v7s_dma_addr(table), size,
				 DMA_TO_DEVICE);
	if (lvl == 1)
		free_pages((unsigned long)table, get_order(size));
	else
		kmem_cache_free(data->l2_tables, table);
}

static void __arm_v7s_pte_sync(arm_v7s_iopte *ptep, int num_entries,
			       struct io_pgtable_cfg *cfg)
{
264
	if (cfg->quirks & IO_PGTABLE_QUIRK_NO_DMA)
265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284
		return;

	dma_sync_single_for_device(cfg->iommu_dev, __arm_v7s_dma_addr(ptep),
				   num_entries * sizeof(*ptep), DMA_TO_DEVICE);
}
static void __arm_v7s_set_pte(arm_v7s_iopte *ptep, arm_v7s_iopte pte,
			      int num_entries, struct io_pgtable_cfg *cfg)
{
	int i;

	for (i = 0; i < num_entries; i++)
		ptep[i] = pte;

	__arm_v7s_pte_sync(ptep, num_entries, cfg);
}

static arm_v7s_iopte arm_v7s_prot_to_pte(int prot, int lvl,
					 struct io_pgtable_cfg *cfg)
{
	bool ap = !(cfg->quirks & IO_PGTABLE_QUIRK_NO_PERMS);
285
	arm_v7s_iopte pte = ARM_V7S_ATTR_NG | ARM_V7S_ATTR_S;
286

287 288
	if (!(prot & IOMMU_MMIO))
		pte |= ARM_V7S_ATTR_TEX(1);
289
	if (ap) {
290 291 292
		pte |= ARM_V7S_PTE_AF;
		if (!(prot & IOMMU_PRIV))
			pte |= ARM_V7S_PTE_AP_UNPRIV;
293 294 295 296 297 298 299
		if (!(prot & IOMMU_WRITE))
			pte |= ARM_V7S_PTE_AP_RDONLY;
	}
	pte <<= ARM_V7S_ATTR_SHIFT(lvl);

	if ((prot & IOMMU_NOEXEC) && ap)
		pte |= ARM_V7S_ATTR_XN(lvl);
300 301 302
	if (prot & IOMMU_MMIO)
		pte |= ARM_V7S_ATTR_B;
	else if (prot & IOMMU_CACHE)
303 304
		pte |= ARM_V7S_ATTR_B | ARM_V7S_ATTR_C;

305 306 307 308 309 310 311
	pte |= ARM_V7S_PTE_TYPE_PAGE;
	if (lvl == 1 && (cfg->quirks & IO_PGTABLE_QUIRK_ARM_NS))
		pte |= ARM_V7S_ATTR_NS_SECTION;

	if (cfg->quirks & IO_PGTABLE_QUIRK_ARM_MTK_4GB)
		pte |= ARM_V7S_ATTR_MTK_4GB;

312 313 314 315 316 317
	return pte;
}

static int arm_v7s_pte_to_prot(arm_v7s_iopte pte, int lvl)
{
	int prot = IOMMU_READ;
318
	arm_v7s_iopte attr = pte >> ARM_V7S_ATTR_SHIFT(lvl);
319

320
	if (!(attr & ARM_V7S_PTE_AP_RDONLY))
321
		prot |= IOMMU_WRITE;
322 323
	if (!(attr & ARM_V7S_PTE_AP_UNPRIV))
		prot |= IOMMU_PRIV;
324 325 326
	if ((attr & (ARM_V7S_TEX_MASK << ARM_V7S_TEX_SHIFT)) == 0)
		prot |= IOMMU_MMIO;
	else if (pte & ARM_V7S_ATTR_C)
327
		prot |= IOMMU_CACHE;
328 329
	if (pte & ARM_V7S_ATTR_XN(lvl))
		prot |= IOMMU_NOEXEC;
330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375

	return prot;
}

static arm_v7s_iopte arm_v7s_pte_to_cont(arm_v7s_iopte pte, int lvl)
{
	if (lvl == 1) {
		pte |= ARM_V7S_CONT_SECTION;
	} else if (lvl == 2) {
		arm_v7s_iopte xn = pte & ARM_V7S_ATTR_XN(lvl);
		arm_v7s_iopte tex = pte & ARM_V7S_CONT_PAGE_TEX_MASK;

		pte ^= xn | tex | ARM_V7S_PTE_TYPE_PAGE;
		pte |= (xn << ARM_V7S_CONT_PAGE_XN_SHIFT) |
		       (tex << ARM_V7S_CONT_PAGE_TEX_SHIFT) |
		       ARM_V7S_PTE_TYPE_CONT_PAGE;
	}
	return pte;
}

static arm_v7s_iopte arm_v7s_cont_to_pte(arm_v7s_iopte pte, int lvl)
{
	if (lvl == 1) {
		pte &= ~ARM_V7S_CONT_SECTION;
	} else if (lvl == 2) {
		arm_v7s_iopte xn = pte & BIT(ARM_V7S_CONT_PAGE_XN_SHIFT);
		arm_v7s_iopte tex = pte & (ARM_V7S_CONT_PAGE_TEX_MASK <<
					   ARM_V7S_CONT_PAGE_TEX_SHIFT);

		pte ^= xn | tex | ARM_V7S_PTE_TYPE_CONT_PAGE;
		pte |= (xn >> ARM_V7S_CONT_PAGE_XN_SHIFT) |
		       (tex >> ARM_V7S_CONT_PAGE_TEX_SHIFT) |
		       ARM_V7S_PTE_TYPE_PAGE;
	}
	return pte;
}

static bool arm_v7s_pte_is_cont(arm_v7s_iopte pte, int lvl)
{
	if (lvl == 1 && !ARM_V7S_PTE_IS_TABLE(pte, lvl))
		return pte & ARM_V7S_CONT_SECTION;
	else if (lvl == 2)
		return !(pte & ARM_V7S_PTE_TYPE_PAGE);
	return false;
}

376 377
static size_t __arm_v7s_unmap(struct arm_v7s_io_pgtable *, unsigned long,
			      size_t, int, arm_v7s_iopte *);
378 379 380 381 382 383

static int arm_v7s_init_pte(struct arm_v7s_io_pgtable *data,
			    unsigned long iova, phys_addr_t paddr, int prot,
			    int lvl, int num_entries, arm_v7s_iopte *ptep)
{
	struct io_pgtable_cfg *cfg = &data->iop.cfg;
384
	arm_v7s_iopte pte;
385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405
	int i;

	for (i = 0; i < num_entries; i++)
		if (ARM_V7S_PTE_IS_TABLE(ptep[i], lvl)) {
			/*
			 * We need to unmap and free the old table before
			 * overwriting it with a block entry.
			 */
			arm_v7s_iopte *tblp;
			size_t sz = ARM_V7S_BLOCK_SIZE(lvl);

			tblp = ptep - ARM_V7S_LVL_IDX(iova, lvl);
			if (WARN_ON(__arm_v7s_unmap(data, iova + i * sz,
						    sz, lvl, tblp) != sz))
				return -EINVAL;
		} else if (ptep[i]) {
			/* We require an unmap first */
			WARN_ON(!selftest_running);
			return -EEXIST;
		}

406
	pte = arm_v7s_prot_to_pte(prot, lvl, cfg);
407 408 409 410 411 412 413 414 415
	if (num_entries > 1)
		pte = arm_v7s_pte_to_cont(pte, lvl);

	pte |= paddr & ARM_V7S_LVL_MASK(lvl);

	__arm_v7s_set_pte(ptep, pte, num_entries, cfg);
	return 0;
}

416 417
static arm_v7s_iopte arm_v7s_install_table(arm_v7s_iopte *table,
					   arm_v7s_iopte *ptep,
418
					   arm_v7s_iopte curr,
419 420
					   struct io_pgtable_cfg *cfg)
{
421
	arm_v7s_iopte old, new;
422 423 424 425 426

	new = virt_to_phys(table) | ARM_V7S_PTE_TYPE_TABLE;
	if (cfg->quirks & IO_PGTABLE_QUIRK_ARM_NS)
		new |= ARM_V7S_ATTR_NS_TABLE;

427 428 429 430 431 432
	/*
	 * Ensure the table itself is visible before its PTE can be.
	 * Whilst we could get away with cmpxchg64_release below, this
	 * doesn't have any ordering semantics when !CONFIG_SMP.
	 */
	dma_wmb();
433 434 435 436 437

	old = cmpxchg_relaxed(ptep, curr, new);
	__arm_v7s_pte_sync(ptep, 1, cfg);

	return old;
438 439
}

440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460
static int __arm_v7s_map(struct arm_v7s_io_pgtable *data, unsigned long iova,
			 phys_addr_t paddr, size_t size, int prot,
			 int lvl, arm_v7s_iopte *ptep)
{
	struct io_pgtable_cfg *cfg = &data->iop.cfg;
	arm_v7s_iopte pte, *cptep;
	int num_entries = size >> ARM_V7S_LVL_SHIFT(lvl);

	/* Find our entry at the current level */
	ptep += ARM_V7S_LVL_IDX(iova, lvl);

	/* If we can install a leaf entry at this level, then do so */
	if (num_entries)
		return arm_v7s_init_pte(data, iova, paddr, prot,
					lvl, num_entries, ptep);

	/* We can't allocate tables at the final level */
	if (WARN_ON(lvl == 2))
		return -EINVAL;

	/* Grab a pointer to the next level */
461
	pte = READ_ONCE(*ptep);
462 463 464 465 466
	if (!pte) {
		cptep = __arm_v7s_alloc_table(lvl + 1, GFP_ATOMIC, data);
		if (!cptep)
			return -ENOMEM;

467 468 469
		pte = arm_v7s_install_table(cptep, ptep, 0, cfg);
		if (pte)
			__arm_v7s_free_table(cptep, lvl + 1, data);
470
	} else {
471 472 473 474 475 476 477
		/* We've no easy way of knowing if it's synced yet, so... */
		__arm_v7s_pte_sync(ptep, 1, cfg);
	}

	if (ARM_V7S_PTE_IS_TABLE(pte, lvl)) {
		cptep = iopte_deref(pte, lvl);
	} else if (pte) {
478 479 480
		/* We require an unmap first */
		WARN_ON(!selftest_running);
		return -EEXIST;
481 482 483 484 485 486 487 488 489 490
	}

	/* Rinse, repeat */
	return __arm_v7s_map(data, iova, paddr, size, prot, lvl + 1, cptep);
}

static int arm_v7s_map(struct io_pgtable_ops *ops, unsigned long iova,
			phys_addr_t paddr, size_t size, int prot)
{
	struct arm_v7s_io_pgtable *data = io_pgtable_ops_to_data(ops);
491
	struct io_pgtable *iop = &data->iop;
492 493 494 495 496 497
	int ret;

	/* If no access, then nothing to do */
	if (!(prot & (IOMMU_READ | IOMMU_WRITE)))
		return 0;

498 499 500
	if (WARN_ON(upper_32_bits(iova) || upper_32_bits(paddr)))
		return -ERANGE;

501 502 503 504 505
	ret = __arm_v7s_map(data, iova, paddr, size, prot, 1, data->pgd);
	/*
	 * Synchronise all PTE updates for the new mapping before there's
	 * a chance for anything to kick off a table walk for the new iova.
	 */
506 507 508 509
	if (iop->cfg.quirks & IO_PGTABLE_QUIRK_TLBI_ON_MAP) {
		io_pgtable_tlb_add_flush(iop, iova, size,
					 ARM_V7S_BLOCK_SIZE(2), false);
		io_pgtable_tlb_sync(iop);
510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532
	} else {
		wmb();
	}

	return ret;
}

static void arm_v7s_free_pgtable(struct io_pgtable *iop)
{
	struct arm_v7s_io_pgtable *data = io_pgtable_to_data(iop);
	int i;

	for (i = 0; i < ARM_V7S_PTES_PER_LVL(1); i++) {
		arm_v7s_iopte pte = data->pgd[i];

		if (ARM_V7S_PTE_IS_TABLE(pte, 1))
			__arm_v7s_free_table(iopte_deref(pte, 1), 2, data);
	}
	__arm_v7s_free_table(data->pgd, 1, data);
	kmem_cache_destroy(data->l2_tables);
	kfree(data);
}

533 534 535
static arm_v7s_iopte arm_v7s_split_cont(struct arm_v7s_io_pgtable *data,
					unsigned long iova, int idx, int lvl,
					arm_v7s_iopte *ptep)
536
{
537
	struct io_pgtable *iop = &data->iop;
538 539 540 541
	arm_v7s_iopte pte;
	size_t size = ARM_V7S_BLOCK_SIZE(lvl);
	int i;

542 543 544 545 546
	/* Check that we didn't lose a race to get the lock */
	pte = *ptep;
	if (!arm_v7s_pte_is_cont(pte, lvl))
		return pte;

547
	ptep -= idx & (ARM_V7S_CONT_PAGES - 1);
548 549 550
	pte = arm_v7s_cont_to_pte(pte, lvl);
	for (i = 0; i < ARM_V7S_CONT_PAGES; i++)
		ptep[i] = pte + i * size;
551

552
	__arm_v7s_pte_sync(ptep, ARM_V7S_CONT_PAGES, &iop->cfg);
553 554

	size *= ARM_V7S_CONT_PAGES;
555 556
	io_pgtable_tlb_add_flush(iop, iova, size, size, true);
	io_pgtable_tlb_sync(iop);
557
	return pte;
558 559
}

560 561 562 563
static size_t arm_v7s_split_blk_unmap(struct arm_v7s_io_pgtable *data,
				      unsigned long iova, size_t size,
				      arm_v7s_iopte blk_pte,
				      arm_v7s_iopte *ptep)
564
{
565 566 567 568 569 570 571
	struct io_pgtable_cfg *cfg = &data->iop.cfg;
	arm_v7s_iopte pte, *tablep;
	int i, unmap_idx, num_entries, num_ptes;

	tablep = __arm_v7s_alloc_table(2, GFP_ATOMIC, data);
	if (!tablep)
		return 0; /* Bytes unmapped */
572

573 574 575
	num_ptes = ARM_V7S_PTES_PER_LVL(2);
	num_entries = size >> ARM_V7S_LVL_SHIFT(2);
	unmap_idx = ARM_V7S_LVL_IDX(iova, 2);
576

577 578 579
	pte = arm_v7s_prot_to_pte(arm_v7s_pte_to_prot(blk_pte, 1), 2, cfg);
	if (num_entries > 1)
		pte = arm_v7s_pte_to_cont(pte, 2);
580

581
	for (i = 0; i < num_ptes; i += num_entries, pte += size) {
582
		/* Unmap! */
583
		if (i == unmap_idx)
584 585
			continue;

586
		__arm_v7s_set_pte(&tablep[i], pte, num_entries, cfg);
587 588
	}

589 590 591 592 593 594 595 596 597 598
	pte = arm_v7s_install_table(tablep, ptep, blk_pte, cfg);
	if (pte != blk_pte) {
		__arm_v7s_free_table(tablep, 2, data);

		if (!ARM_V7S_PTE_IS_TABLE(pte, 1))
			return 0;

		tablep = iopte_deref(pte, 1);
		return __arm_v7s_unmap(data, iova, size, 2, tablep);
	}
599 600

	io_pgtable_tlb_add_flush(&data->iop, iova, size, size, true);
601
	io_pgtable_tlb_sync(&data->iop);
602 603 604
	return size;
}

605 606 607
static size_t __arm_v7s_unmap(struct arm_v7s_io_pgtable *data,
			      unsigned long iova, size_t size, int lvl,
			      arm_v7s_iopte *ptep)
608 609
{
	arm_v7s_iopte pte[ARM_V7S_CONT_PAGES];
610
	struct io_pgtable *iop = &data->iop;
611 612 613 614 615 616 617 618 619
	int idx, i = 0, num_entries = size >> ARM_V7S_LVL_SHIFT(lvl);

	/* Something went horribly wrong and we ran out of page table */
	if (WARN_ON(lvl > 2))
		return 0;

	idx = ARM_V7S_LVL_IDX(iova, lvl);
	ptep += idx;
	do {
620 621
		pte[i] = READ_ONCE(ptep[i]);
		if (WARN_ON(!ARM_V7S_PTE_IS_VALID(pte[i])))
622 623 624 625 626 627
			return 0;
	} while (++i < num_entries);

	/*
	 * If we've hit a contiguous 'large page' entry at this level, it
	 * needs splitting first, unless we're unmapping the whole lot.
628 629 630 631 632 633
	 *
	 * For splitting, we can't rewrite 16 PTEs atomically, and since we
	 * can't necessarily assume TEX remap we don't have a software bit to
	 * mark live entries being split. In practice (i.e. DMA API code), we
	 * will never be splitting large pages anyway, so just wrap this edge
	 * case in a lock for the sake of correctness and be done with it.
634
	 */
635 636 637 638 639 640 641
	if (num_entries <= 1 && arm_v7s_pte_is_cont(pte[0], lvl)) {
		unsigned long flags;

		spin_lock_irqsave(&data->split_lock, flags);
		pte[0] = arm_v7s_split_cont(data, iova, idx, lvl, ptep);
		spin_unlock_irqrestore(&data->split_lock, flags);
	}
642 643 644 645 646

	/* If the size matches this level, we're in the right place */
	if (num_entries) {
		size_t blk_size = ARM_V7S_BLOCK_SIZE(lvl);

647
		__arm_v7s_set_pte(ptep, 0, num_entries, &iop->cfg);
648 649 650 651

		for (i = 0; i < num_entries; i++) {
			if (ARM_V7S_PTE_IS_TABLE(pte[i], lvl)) {
				/* Also flush any partial walks */
652 653 654
				io_pgtable_tlb_add_flush(iop, iova, blk_size,
					ARM_V7S_BLOCK_SIZE(lvl + 1), false);
				io_pgtable_tlb_sync(iop);
655 656
				ptep = iopte_deref(pte[i], lvl);
				__arm_v7s_free_table(ptep, lvl + 1, data);
657 658 659 660 661 662 663
			} else if (iop->cfg.quirks & IO_PGTABLE_QUIRK_NON_STRICT) {
				/*
				 * Order the PTE update against queueing the IOVA, to
				 * guarantee that a flush callback from a different CPU
				 * has observed it before the TLBIALL can be issued.
				 */
				smp_wmb();
664
			} else {
665 666
				io_pgtable_tlb_add_flush(iop, iova, blk_size,
							 blk_size, true);
667 668 669 670 671 672 673 674 675
			}
			iova += blk_size;
		}
		return size;
	} else if (lvl == 1 && !ARM_V7S_PTE_IS_TABLE(pte[0], lvl)) {
		/*
		 * Insert a table at the next level to map the old region,
		 * minus the part we want to unmap
		 */
676
		return arm_v7s_split_blk_unmap(data, iova, size, pte[0], ptep);
677 678 679 680 681 682 683
	}

	/* Keep on walkin' */
	ptep = iopte_deref(pte[0], lvl);
	return __arm_v7s_unmap(data, iova, size, lvl + 1, ptep);
}

684 685
static size_t arm_v7s_unmap(struct io_pgtable_ops *ops, unsigned long iova,
			    size_t size)
686 687 688
{
	struct arm_v7s_io_pgtable *data = io_pgtable_ops_to_data(ops);

689 690 691
	if (WARN_ON(upper_32_bits(iova)))
		return 0;

692
	return __arm_v7s_unmap(data, iova, size, 1, data->pgd);
693 694 695 696 697 698 699 700 701 702 703
}

static phys_addr_t arm_v7s_iova_to_phys(struct io_pgtable_ops *ops,
					unsigned long iova)
{
	struct arm_v7s_io_pgtable *data = io_pgtable_ops_to_data(ops);
	arm_v7s_iopte *ptep = data->pgd, pte;
	int lvl = 0;
	u32 mask;

	do {
704 705
		ptep += ARM_V7S_LVL_IDX(iova, ++lvl);
		pte = READ_ONCE(*ptep);
706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725
		ptep = iopte_deref(pte, lvl);
	} while (ARM_V7S_PTE_IS_TABLE(pte, lvl));

	if (!ARM_V7S_PTE_IS_VALID(pte))
		return 0;

	mask = ARM_V7S_LVL_MASK(lvl);
	if (arm_v7s_pte_is_cont(pte, lvl))
		mask *= ARM_V7S_CONT_PAGES;
	return (pte & mask) | (iova & ~mask);
}

static struct io_pgtable *arm_v7s_alloc_pgtable(struct io_pgtable_cfg *cfg,
						void *cookie)
{
	struct arm_v7s_io_pgtable *data;

	if (cfg->ias > ARM_V7S_ADDR_BITS || cfg->oas > ARM_V7S_ADDR_BITS)
		return NULL;

726 727
	if (cfg->quirks & ~(IO_PGTABLE_QUIRK_ARM_NS |
			    IO_PGTABLE_QUIRK_NO_PERMS |
728
			    IO_PGTABLE_QUIRK_TLBI_ON_MAP |
729
			    IO_PGTABLE_QUIRK_ARM_MTK_4GB |
730 731
			    IO_PGTABLE_QUIRK_NO_DMA |
			    IO_PGTABLE_QUIRK_NON_STRICT))
732 733
		return NULL;

734 735 736 737 738
	/* If ARM_MTK_4GB is enabled, the NO_PERMS is also expected. */
	if (cfg->quirks & IO_PGTABLE_QUIRK_ARM_MTK_4GB &&
	    !(cfg->quirks & IO_PGTABLE_QUIRK_NO_PERMS))
			return NULL;

739 740 741 742
	data = kmalloc(sizeof(*data), GFP_KERNEL);
	if (!data)
		return NULL;

743
	spin_lock_init(&data->split_lock);
744 745 746
	data->l2_tables = kmem_cache_create("io-pgtable_armv7s_l2",
					    ARM_V7S_TABLE_SIZE(2),
					    ARM_V7S_TABLE_SIZE(2),
747
					    ARM_V7S_TABLE_SLAB_FLAGS, NULL);
748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829
	if (!data->l2_tables)
		goto out_free_data;

	data->iop.ops = (struct io_pgtable_ops) {
		.map		= arm_v7s_map,
		.unmap		= arm_v7s_unmap,
		.iova_to_phys	= arm_v7s_iova_to_phys,
	};

	/* We have to do this early for __arm_v7s_alloc_table to work... */
	data->iop.cfg = *cfg;

	/*
	 * Unless the IOMMU driver indicates supersection support by
	 * having SZ_16M set in the initial bitmap, they won't be used.
	 */
	cfg->pgsize_bitmap &= SZ_4K | SZ_64K | SZ_1M | SZ_16M;

	/* TCR: T0SZ=0, disable TTBR1 */
	cfg->arm_v7s_cfg.tcr = ARM_V7S_TCR_PD1;

	/*
	 * TEX remap: the indices used map to the closest equivalent types
	 * under the non-TEX-remap interpretation of those attribute bits,
	 * excepting various implementation-defined aspects of shareability.
	 */
	cfg->arm_v7s_cfg.prrr = ARM_V7S_PRRR_TR(1, ARM_V7S_PRRR_TYPE_DEVICE) |
				ARM_V7S_PRRR_TR(4, ARM_V7S_PRRR_TYPE_NORMAL) |
				ARM_V7S_PRRR_TR(7, ARM_V7S_PRRR_TYPE_NORMAL) |
				ARM_V7S_PRRR_DS0 | ARM_V7S_PRRR_DS1 |
				ARM_V7S_PRRR_NS1 | ARM_V7S_PRRR_NOS(7);
	cfg->arm_v7s_cfg.nmrr = ARM_V7S_NMRR_IR(7, ARM_V7S_RGN_WBWA) |
				ARM_V7S_NMRR_OR(7, ARM_V7S_RGN_WBWA);

	/* Looking good; allocate a pgd */
	data->pgd = __arm_v7s_alloc_table(1, GFP_KERNEL, data);
	if (!data->pgd)
		goto out_free_data;

	/* Ensure the empty pgd is visible before any actual TTBR write */
	wmb();

	/* TTBRs */
	cfg->arm_v7s_cfg.ttbr[0] = virt_to_phys(data->pgd) |
				   ARM_V7S_TTBR_S | ARM_V7S_TTBR_NOS |
				   ARM_V7S_TTBR_IRGN_ATTR(ARM_V7S_RGN_WBWA) |
				   ARM_V7S_TTBR_ORGN_ATTR(ARM_V7S_RGN_WBWA);
	cfg->arm_v7s_cfg.ttbr[1] = 0;
	return &data->iop;

out_free_data:
	kmem_cache_destroy(data->l2_tables);
	kfree(data);
	return NULL;
}

struct io_pgtable_init_fns io_pgtable_arm_v7s_init_fns = {
	.alloc	= arm_v7s_alloc_pgtable,
	.free	= arm_v7s_free_pgtable,
};

#ifdef CONFIG_IOMMU_IO_PGTABLE_ARMV7S_SELFTEST

static struct io_pgtable_cfg *cfg_cookie;

static void dummy_tlb_flush_all(void *cookie)
{
	WARN_ON(cookie != cfg_cookie);
}

static void dummy_tlb_add_flush(unsigned long iova, size_t size,
				size_t granule, bool leaf, void *cookie)
{
	WARN_ON(cookie != cfg_cookie);
	WARN_ON(!(size & cfg_cookie->pgsize_bitmap));
}

static void dummy_tlb_sync(void *cookie)
{
	WARN_ON(cookie != cfg_cookie);
}

830
static const struct iommu_gather_ops dummy_tlb_ops = {
831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848
	.tlb_flush_all	= dummy_tlb_flush_all,
	.tlb_add_flush	= dummy_tlb_add_flush,
	.tlb_sync	= dummy_tlb_sync,
};

#define __FAIL(ops)	({				\
		WARN(1, "selftest: test failed\n");	\
		selftest_running = false;		\
		-EFAULT;				\
})

static int __init arm_v7s_do_selftests(void)
{
	struct io_pgtable_ops *ops;
	struct io_pgtable_cfg cfg = {
		.tlb = &dummy_tlb_ops,
		.oas = 32,
		.ias = 32,
849
		.quirks = IO_PGTABLE_QUIRK_ARM_NS | IO_PGTABLE_QUIRK_NO_DMA,
850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881
		.pgsize_bitmap = SZ_4K | SZ_64K | SZ_1M | SZ_16M,
	};
	unsigned int iova, size, iova_start;
	unsigned int i, loopnr = 0;

	selftest_running = true;

	cfg_cookie = &cfg;

	ops = alloc_io_pgtable_ops(ARM_V7S, &cfg, &cfg);
	if (!ops) {
		pr_err("selftest: failed to allocate io pgtable ops\n");
		return -EINVAL;
	}

	/*
	 * Initial sanity checks.
	 * Empty page tables shouldn't provide any translations.
	 */
	if (ops->iova_to_phys(ops, 42))
		return __FAIL(ops);

	if (ops->iova_to_phys(ops, SZ_1G + 42))
		return __FAIL(ops);

	if (ops->iova_to_phys(ops, SZ_2G + 42))
		return __FAIL(ops);

	/*
	 * Distinct mappings of different granule sizes.
	 */
	iova = 0;
882
	for_each_set_bit(i, &cfg.pgsize_bitmap, BITS_PER_LONG) {
883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921
		size = 1UL << i;
		if (ops->map(ops, iova, iova, size, IOMMU_READ |
						    IOMMU_WRITE |
						    IOMMU_NOEXEC |
						    IOMMU_CACHE))
			return __FAIL(ops);

		/* Overlapping mappings */
		if (!ops->map(ops, iova, iova + size, size,
			      IOMMU_READ | IOMMU_NOEXEC))
			return __FAIL(ops);

		if (ops->iova_to_phys(ops, iova + 42) != (iova + 42))
			return __FAIL(ops);

		iova += SZ_16M;
		loopnr++;
	}

	/* Partial unmap */
	i = 1;
	size = 1UL << __ffs(cfg.pgsize_bitmap);
	while (i < loopnr) {
		iova_start = i * SZ_16M;
		if (ops->unmap(ops, iova_start + size, size) != size)
			return __FAIL(ops);

		/* Remap of partial unmap */
		if (ops->map(ops, iova_start + size, size, size, IOMMU_READ))
			return __FAIL(ops);

		if (ops->iova_to_phys(ops, iova_start + size + 42)
		    != (size + 42))
			return __FAIL(ops);
		i++;
	}

	/* Full unmap */
	iova = 0;
922
	for_each_set_bit(i, &cfg.pgsize_bitmap, BITS_PER_LONG) {
923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949
		size = 1UL << i;

		if (ops->unmap(ops, iova, size) != size)
			return __FAIL(ops);

		if (ops->iova_to_phys(ops, iova + 42))
			return __FAIL(ops);

		/* Remap full block */
		if (ops->map(ops, iova, iova, size, IOMMU_WRITE))
			return __FAIL(ops);

		if (ops->iova_to_phys(ops, iova + 42) != (iova + 42))
			return __FAIL(ops);

		iova += SZ_16M;
	}

	free_io_pgtable_ops(ops);

	selftest_running = false;

	pr_info("self test ok\n");
	return 0;
}
subsys_initcall(arm_v7s_do_selftests);
#endif